]> Git Repo - linux.git/blob - drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
Merge tag 'sound-5.19-rc2' of git://git.kernel.org/pub/scm/linux/kernel/git/tiwai...
[linux.git] / drivers / gpu / drm / amd / amdgpu / gfx_v8_0.c
1 /*
2  * Copyright 2014 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23
24 #include <linux/delay.h>
25 #include <linux/kernel.h>
26 #include <linux/firmware.h>
27 #include <linux/module.h>
28 #include <linux/pci.h>
29
30 #include "amdgpu.h"
31 #include "amdgpu_gfx.h"
32 #include "amdgpu_ring.h"
33 #include "vi.h"
34 #include "vi_structs.h"
35 #include "vid.h"
36 #include "amdgpu_ucode.h"
37 #include "amdgpu_atombios.h"
38 #include "atombios_i2c.h"
39 #include "clearstate_vi.h"
40
41 #include "gmc/gmc_8_2_d.h"
42 #include "gmc/gmc_8_2_sh_mask.h"
43
44 #include "oss/oss_3_0_d.h"
45 #include "oss/oss_3_0_sh_mask.h"
46
47 #include "bif/bif_5_0_d.h"
48 #include "bif/bif_5_0_sh_mask.h"
49 #include "gca/gfx_8_0_d.h"
50 #include "gca/gfx_8_0_enum.h"
51 #include "gca/gfx_8_0_sh_mask.h"
52
53 #include "dce/dce_10_0_d.h"
54 #include "dce/dce_10_0_sh_mask.h"
55
56 #include "smu/smu_7_1_3_d.h"
57
58 #include "ivsrcid/ivsrcid_vislands30.h"
59
60 #define GFX8_NUM_GFX_RINGS     1
61 #define GFX8_MEC_HPD_SIZE 4096
62
63 #define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001
64 #define CARRIZO_GB_ADDR_CONFIG_GOLDEN 0x22010001
65 #define POLARIS11_GB_ADDR_CONFIG_GOLDEN 0x22011002
66 #define TONGA_GB_ADDR_CONFIG_GOLDEN 0x22011003
67
68 #define ARRAY_MODE(x)                                   ((x) << GB_TILE_MODE0__ARRAY_MODE__SHIFT)
69 #define PIPE_CONFIG(x)                                  ((x) << GB_TILE_MODE0__PIPE_CONFIG__SHIFT)
70 #define TILE_SPLIT(x)                                   ((x) << GB_TILE_MODE0__TILE_SPLIT__SHIFT)
71 #define MICRO_TILE_MODE_NEW(x)                          ((x) << GB_TILE_MODE0__MICRO_TILE_MODE_NEW__SHIFT)
72 #define SAMPLE_SPLIT(x)                                 ((x) << GB_TILE_MODE0__SAMPLE_SPLIT__SHIFT)
73 #define BANK_WIDTH(x)                                   ((x) << GB_MACROTILE_MODE0__BANK_WIDTH__SHIFT)
74 #define BANK_HEIGHT(x)                                  ((x) << GB_MACROTILE_MODE0__BANK_HEIGHT__SHIFT)
75 #define MACRO_TILE_ASPECT(x)                            ((x) << GB_MACROTILE_MODE0__MACRO_TILE_ASPECT__SHIFT)
76 #define NUM_BANKS(x)                                    ((x) << GB_MACROTILE_MODE0__NUM_BANKS__SHIFT)
77
78 #define RLC_CGTT_MGCG_OVERRIDE__CPF_MASK            0x00000001L
79 #define RLC_CGTT_MGCG_OVERRIDE__RLC_MASK            0x00000002L
80 #define RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK           0x00000004L
81 #define RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK           0x00000008L
82 #define RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK           0x00000010L
83 #define RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK           0x00000020L
84
85 /* BPM SERDES CMD */
86 #define SET_BPM_SERDES_CMD    1
87 #define CLE_BPM_SERDES_CMD    0
88
89 /* BPM Register Address*/
90 enum {
91         BPM_REG_CGLS_EN = 0,        /* Enable/Disable CGLS */
92         BPM_REG_CGLS_ON,            /* ON/OFF CGLS: shall be controlled by RLC FW */
93         BPM_REG_CGCG_OVERRIDE,      /* Set/Clear CGCG Override */
94         BPM_REG_MGCG_OVERRIDE,      /* Set/Clear MGCG Override */
95         BPM_REG_FGCG_OVERRIDE,      /* Set/Clear FGCG Override */
96         BPM_REG_FGCG_MAX
97 };
98
99 #define RLC_FormatDirectRegListLength        14
100
101 MODULE_FIRMWARE("amdgpu/carrizo_ce.bin");
102 MODULE_FIRMWARE("amdgpu/carrizo_pfp.bin");
103 MODULE_FIRMWARE("amdgpu/carrizo_me.bin");
104 MODULE_FIRMWARE("amdgpu/carrizo_mec.bin");
105 MODULE_FIRMWARE("amdgpu/carrizo_mec2.bin");
106 MODULE_FIRMWARE("amdgpu/carrizo_rlc.bin");
107
108 MODULE_FIRMWARE("amdgpu/stoney_ce.bin");
109 MODULE_FIRMWARE("amdgpu/stoney_pfp.bin");
110 MODULE_FIRMWARE("amdgpu/stoney_me.bin");
111 MODULE_FIRMWARE("amdgpu/stoney_mec.bin");
112 MODULE_FIRMWARE("amdgpu/stoney_rlc.bin");
113
114 MODULE_FIRMWARE("amdgpu/tonga_ce.bin");
115 MODULE_FIRMWARE("amdgpu/tonga_pfp.bin");
116 MODULE_FIRMWARE("amdgpu/tonga_me.bin");
117 MODULE_FIRMWARE("amdgpu/tonga_mec.bin");
118 MODULE_FIRMWARE("amdgpu/tonga_mec2.bin");
119 MODULE_FIRMWARE("amdgpu/tonga_rlc.bin");
120
121 MODULE_FIRMWARE("amdgpu/topaz_ce.bin");
122 MODULE_FIRMWARE("amdgpu/topaz_pfp.bin");
123 MODULE_FIRMWARE("amdgpu/topaz_me.bin");
124 MODULE_FIRMWARE("amdgpu/topaz_mec.bin");
125 MODULE_FIRMWARE("amdgpu/topaz_rlc.bin");
126
127 MODULE_FIRMWARE("amdgpu/fiji_ce.bin");
128 MODULE_FIRMWARE("amdgpu/fiji_pfp.bin");
129 MODULE_FIRMWARE("amdgpu/fiji_me.bin");
130 MODULE_FIRMWARE("amdgpu/fiji_mec.bin");
131 MODULE_FIRMWARE("amdgpu/fiji_mec2.bin");
132 MODULE_FIRMWARE("amdgpu/fiji_rlc.bin");
133
134 MODULE_FIRMWARE("amdgpu/polaris10_ce.bin");
135 MODULE_FIRMWARE("amdgpu/polaris10_ce_2.bin");
136 MODULE_FIRMWARE("amdgpu/polaris10_pfp.bin");
137 MODULE_FIRMWARE("amdgpu/polaris10_pfp_2.bin");
138 MODULE_FIRMWARE("amdgpu/polaris10_me.bin");
139 MODULE_FIRMWARE("amdgpu/polaris10_me_2.bin");
140 MODULE_FIRMWARE("amdgpu/polaris10_mec.bin");
141 MODULE_FIRMWARE("amdgpu/polaris10_mec_2.bin");
142 MODULE_FIRMWARE("amdgpu/polaris10_mec2.bin");
143 MODULE_FIRMWARE("amdgpu/polaris10_mec2_2.bin");
144 MODULE_FIRMWARE("amdgpu/polaris10_rlc.bin");
145
146 MODULE_FIRMWARE("amdgpu/polaris11_ce.bin");
147 MODULE_FIRMWARE("amdgpu/polaris11_ce_2.bin");
148 MODULE_FIRMWARE("amdgpu/polaris11_pfp.bin");
149 MODULE_FIRMWARE("amdgpu/polaris11_pfp_2.bin");
150 MODULE_FIRMWARE("amdgpu/polaris11_me.bin");
151 MODULE_FIRMWARE("amdgpu/polaris11_me_2.bin");
152 MODULE_FIRMWARE("amdgpu/polaris11_mec.bin");
153 MODULE_FIRMWARE("amdgpu/polaris11_mec_2.bin");
154 MODULE_FIRMWARE("amdgpu/polaris11_mec2.bin");
155 MODULE_FIRMWARE("amdgpu/polaris11_mec2_2.bin");
156 MODULE_FIRMWARE("amdgpu/polaris11_rlc.bin");
157
158 MODULE_FIRMWARE("amdgpu/polaris12_ce.bin");
159 MODULE_FIRMWARE("amdgpu/polaris12_ce_2.bin");
160 MODULE_FIRMWARE("amdgpu/polaris12_pfp.bin");
161 MODULE_FIRMWARE("amdgpu/polaris12_pfp_2.bin");
162 MODULE_FIRMWARE("amdgpu/polaris12_me.bin");
163 MODULE_FIRMWARE("amdgpu/polaris12_me_2.bin");
164 MODULE_FIRMWARE("amdgpu/polaris12_mec.bin");
165 MODULE_FIRMWARE("amdgpu/polaris12_mec_2.bin");
166 MODULE_FIRMWARE("amdgpu/polaris12_mec2.bin");
167 MODULE_FIRMWARE("amdgpu/polaris12_mec2_2.bin");
168 MODULE_FIRMWARE("amdgpu/polaris12_rlc.bin");
169
170 MODULE_FIRMWARE("amdgpu/vegam_ce.bin");
171 MODULE_FIRMWARE("amdgpu/vegam_pfp.bin");
172 MODULE_FIRMWARE("amdgpu/vegam_me.bin");
173 MODULE_FIRMWARE("amdgpu/vegam_mec.bin");
174 MODULE_FIRMWARE("amdgpu/vegam_mec2.bin");
175 MODULE_FIRMWARE("amdgpu/vegam_rlc.bin");
176
177 static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] =
178 {
179         {mmGDS_VMID0_BASE, mmGDS_VMID0_SIZE, mmGDS_GWS_VMID0, mmGDS_OA_VMID0},
180         {mmGDS_VMID1_BASE, mmGDS_VMID1_SIZE, mmGDS_GWS_VMID1, mmGDS_OA_VMID1},
181         {mmGDS_VMID2_BASE, mmGDS_VMID2_SIZE, mmGDS_GWS_VMID2, mmGDS_OA_VMID2},
182         {mmGDS_VMID3_BASE, mmGDS_VMID3_SIZE, mmGDS_GWS_VMID3, mmGDS_OA_VMID3},
183         {mmGDS_VMID4_BASE, mmGDS_VMID4_SIZE, mmGDS_GWS_VMID4, mmGDS_OA_VMID4},
184         {mmGDS_VMID5_BASE, mmGDS_VMID5_SIZE, mmGDS_GWS_VMID5, mmGDS_OA_VMID5},
185         {mmGDS_VMID6_BASE, mmGDS_VMID6_SIZE, mmGDS_GWS_VMID6, mmGDS_OA_VMID6},
186         {mmGDS_VMID7_BASE, mmGDS_VMID7_SIZE, mmGDS_GWS_VMID7, mmGDS_OA_VMID7},
187         {mmGDS_VMID8_BASE, mmGDS_VMID8_SIZE, mmGDS_GWS_VMID8, mmGDS_OA_VMID8},
188         {mmGDS_VMID9_BASE, mmGDS_VMID9_SIZE, mmGDS_GWS_VMID9, mmGDS_OA_VMID9},
189         {mmGDS_VMID10_BASE, mmGDS_VMID10_SIZE, mmGDS_GWS_VMID10, mmGDS_OA_VMID10},
190         {mmGDS_VMID11_BASE, mmGDS_VMID11_SIZE, mmGDS_GWS_VMID11, mmGDS_OA_VMID11},
191         {mmGDS_VMID12_BASE, mmGDS_VMID12_SIZE, mmGDS_GWS_VMID12, mmGDS_OA_VMID12},
192         {mmGDS_VMID13_BASE, mmGDS_VMID13_SIZE, mmGDS_GWS_VMID13, mmGDS_OA_VMID13},
193         {mmGDS_VMID14_BASE, mmGDS_VMID14_SIZE, mmGDS_GWS_VMID14, mmGDS_OA_VMID14},
194         {mmGDS_VMID15_BASE, mmGDS_VMID15_SIZE, mmGDS_GWS_VMID15, mmGDS_OA_VMID15}
195 };
196
197 static const u32 golden_settings_tonga_a11[] =
198 {
199         mmCB_HW_CONTROL, 0xfffdf3cf, 0x00007208,
200         mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
201         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
202         mmGB_GPU_ID, 0x0000000f, 0x00000000,
203         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
204         mmPA_SC_FIFO_DEPTH_CNTL, 0x000003ff, 0x000000fc,
205         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
206         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
207         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
208         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
209         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
210         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
211         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000002fb,
212         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x0000543b,
213         mmTCP_CHAN_STEER_LO, 0xffffffff, 0xa9210876,
214         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
215 };
216
217 static const u32 tonga_golden_common_all[] =
218 {
219         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
220         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
221         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
222         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
223         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
224         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
225         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
226         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
227 };
228
229 static const u32 tonga_mgcg_cgcg_init[] =
230 {
231         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
232         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
233         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
234         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
235         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
236         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
237         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
238         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
239         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
240         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
241         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
242         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
243         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
244         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
245         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
246         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
247         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
248         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
249         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
250         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
251         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
252         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
253         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
254         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
255         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
256         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
257         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
258         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
259         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
260         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
261         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
262         mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
263         mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
264         mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
265         mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
266         mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
267         mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
268         mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
269         mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
270         mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
271         mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
272         mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
273         mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
274         mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
275         mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
276         mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
277         mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
278         mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
279         mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
280         mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
281         mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
282         mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
283         mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
284         mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
285         mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
286         mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
287         mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
288         mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
289         mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
290         mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
291         mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
292         mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
293         mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
294         mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
295         mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
296         mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
297         mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
298         mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
299         mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
300         mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
301         mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
302         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
303         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
304         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
305         mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
306 };
307
308 static const u32 golden_settings_vegam_a11[] =
309 {
310         mmCB_HW_CONTROL, 0x0001f3cf, 0x00007208,
311         mmCB_HW_CONTROL_2, 0x0f000000, 0x0d000000,
312         mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
313         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
314         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
315         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
316         mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x3a00161a,
317         mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002e,
318         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
319         mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
320         mmSQ_CONFIG, 0x07f80000, 0x01180000,
321         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
322         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
323         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7,
324         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
325         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x32761054,
326         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
327 };
328
329 static const u32 vegam_golden_common_all[] =
330 {
331         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
332         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
333         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
334         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
335         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
336         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
337 };
338
339 static const u32 golden_settings_polaris11_a11[] =
340 {
341         mmCB_HW_CONTROL, 0x0000f3cf, 0x00007208,
342         mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
343         mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
344         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
345         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
346         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
347         mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
348         mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
349         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
350         mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
351         mmSQ_CONFIG, 0x07f80000, 0x01180000,
352         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
353         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
354         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f3,
355         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
356         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003210,
357         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
358 };
359
360 static const u32 polaris11_golden_common_all[] =
361 {
362         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
363         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011002,
364         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
365         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
366         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
367         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
368 };
369
370 static const u32 golden_settings_polaris10_a11[] =
371 {
372         mmATC_MISC_CG, 0x000c0fc0, 0x000c0200,
373         mmCB_HW_CONTROL, 0x0001f3cf, 0x00007208,
374         mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
375         mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
376         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
377         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
378         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
379         mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
380         mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002a,
381         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
382         mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
383         mmSQ_CONFIG, 0x07f80000, 0x07180000,
384         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
385         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
386         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7,
387         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
388         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
389 };
390
391 static const u32 polaris10_golden_common_all[] =
392 {
393         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
394         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
395         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
396         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
397         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
398         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
399         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
400         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
401 };
402
403 static const u32 fiji_golden_common_all[] =
404 {
405         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
406         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x3a00161a,
407         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002e,
408         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
409         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
410         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
411         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
412         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
413         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
414         mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x00000009,
415 };
416
417 static const u32 golden_settings_fiji_a10[] =
418 {
419         mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
420         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
421         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
422         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
423         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
424         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
425         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
426         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
427         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
428         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000ff,
429         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
430 };
431
432 static const u32 fiji_mgcg_cgcg_init[] =
433 {
434         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
435         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
436         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
437         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
438         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
439         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
440         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
441         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
442         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
443         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
444         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
445         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
446         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
447         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
448         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
449         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
450         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
451         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
452         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
453         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
454         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
455         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
456         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
457         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
458         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
459         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
460         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
461         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
462         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
463         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
464         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
465         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
466         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
467         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
468         mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
469 };
470
471 static const u32 golden_settings_iceland_a11[] =
472 {
473         mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
474         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
475         mmDB_DEBUG3, 0xc0000000, 0xc0000000,
476         mmGB_GPU_ID, 0x0000000f, 0x00000000,
477         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
478         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
479         mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x00000002,
480         mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
481         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
482         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
483         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
484         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
485         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
486         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f1,
487         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
488         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010,
489 };
490
491 static const u32 iceland_golden_common_all[] =
492 {
493         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
494         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
495         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
496         mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
497         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
498         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
499         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
500         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
501 };
502
503 static const u32 iceland_mgcg_cgcg_init[] =
504 {
505         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
506         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
507         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
508         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
509         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0xc0000100,
510         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0xc0000100,
511         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0xc0000100,
512         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
513         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
514         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
515         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
516         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
517         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
518         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
519         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
520         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
521         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
522         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
523         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
524         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
525         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
526         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
527         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0xff000100,
528         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
529         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
530         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
531         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
532         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
533         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
534         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
535         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
536         mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
537         mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
538         mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
539         mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
540         mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
541         mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
542         mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
543         mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
544         mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
545         mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
546         mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
547         mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
548         mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
549         mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
550         mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
551         mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
552         mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
553         mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
554         mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
555         mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
556         mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
557         mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
558         mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
559         mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
560         mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
561         mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
562         mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
563         mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
564         mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
565         mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
566         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
567         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
568         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
569 };
570
571 static const u32 cz_golden_settings_a11[] =
572 {
573         mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
574         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
575         mmGB_GPU_ID, 0x0000000f, 0x00000000,
576         mmPA_SC_ENHANCE, 0xffffffff, 0x00000001,
577         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
578         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
579         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
580         mmTA_CNTL_AUX, 0x000f000f, 0x00010000,
581         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
582         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
583         mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f3,
584         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00001302
585 };
586
587 static const u32 cz_golden_common_all[] =
588 {
589         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
590         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
591         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
592         mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
593         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
594         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
595         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
596         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
597 };
598
599 static const u32 cz_mgcg_cgcg_init[] =
600 {
601         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
602         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
603         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
604         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
605         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
606         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
607         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x00000100,
608         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
609         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
610         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
611         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
612         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
613         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
614         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
615         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
616         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
617         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
618         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
619         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
620         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
621         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
622         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
623         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
624         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
625         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
626         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
627         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
628         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
629         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
630         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
631         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
632         mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
633         mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
634         mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
635         mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
636         mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
637         mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
638         mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
639         mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
640         mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
641         mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
642         mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
643         mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
644         mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
645         mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
646         mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
647         mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
648         mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
649         mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
650         mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
651         mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
652         mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
653         mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
654         mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
655         mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
656         mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
657         mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
658         mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
659         mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
660         mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
661         mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
662         mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
663         mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
664         mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
665         mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
666         mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
667         mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
668         mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
669         mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
670         mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
671         mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
672         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
673         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
674         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
675         mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
676 };
677
678 static const u32 stoney_golden_settings_a11[] =
679 {
680         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
681         mmGB_GPU_ID, 0x0000000f, 0x00000000,
682         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
683         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
684         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
685         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
686         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
687         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
688         mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f1,
689         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x10101010,
690 };
691
692 static const u32 stoney_golden_common_all[] =
693 {
694         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
695         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000000,
696         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
697         mmGB_ADDR_CONFIG, 0xffffffff, 0x12010001,
698         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
699         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
700         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
701         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
702 };
703
704 static const u32 stoney_mgcg_cgcg_init[] =
705 {
706         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
707         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
708         mmCP_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
709         mmRLC_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
710         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96940200,
711 };
712
713
714 static const char * const sq_edc_source_names[] = {
715         "SQ_EDC_INFO_SOURCE_INVALID: No EDC error has occurred",
716         "SQ_EDC_INFO_SOURCE_INST: EDC source is Instruction Fetch",
717         "SQ_EDC_INFO_SOURCE_SGPR: EDC source is SGPR or SQC data return",
718         "SQ_EDC_INFO_SOURCE_VGPR: EDC source is VGPR",
719         "SQ_EDC_INFO_SOURCE_LDS: EDC source is LDS",
720         "SQ_EDC_INFO_SOURCE_GDS: EDC source is GDS",
721         "SQ_EDC_INFO_SOURCE_TA: EDC source is TA",
722 };
723
724 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev);
725 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev);
726 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev);
727 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev);
728 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev);
729 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev);
730 static void gfx_v8_0_ring_emit_ce_meta(struct amdgpu_ring *ring);
731 static void gfx_v8_0_ring_emit_de_meta(struct amdgpu_ring *ring);
732
733 #define CG_ACLK_CNTL__ACLK_DIVIDER_MASK                    0x0000007fL
734 #define CG_ACLK_CNTL__ACLK_DIVIDER__SHIFT                  0x00000000L
735
736 static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev)
737 {
738         uint32_t data;
739
740         switch (adev->asic_type) {
741         case CHIP_TOPAZ:
742                 amdgpu_device_program_register_sequence(adev,
743                                                         iceland_mgcg_cgcg_init,
744                                                         ARRAY_SIZE(iceland_mgcg_cgcg_init));
745                 amdgpu_device_program_register_sequence(adev,
746                                                         golden_settings_iceland_a11,
747                                                         ARRAY_SIZE(golden_settings_iceland_a11));
748                 amdgpu_device_program_register_sequence(adev,
749                                                         iceland_golden_common_all,
750                                                         ARRAY_SIZE(iceland_golden_common_all));
751                 break;
752         case CHIP_FIJI:
753                 amdgpu_device_program_register_sequence(adev,
754                                                         fiji_mgcg_cgcg_init,
755                                                         ARRAY_SIZE(fiji_mgcg_cgcg_init));
756                 amdgpu_device_program_register_sequence(adev,
757                                                         golden_settings_fiji_a10,
758                                                         ARRAY_SIZE(golden_settings_fiji_a10));
759                 amdgpu_device_program_register_sequence(adev,
760                                                         fiji_golden_common_all,
761                                                         ARRAY_SIZE(fiji_golden_common_all));
762                 break;
763
764         case CHIP_TONGA:
765                 amdgpu_device_program_register_sequence(adev,
766                                                         tonga_mgcg_cgcg_init,
767                                                         ARRAY_SIZE(tonga_mgcg_cgcg_init));
768                 amdgpu_device_program_register_sequence(adev,
769                                                         golden_settings_tonga_a11,
770                                                         ARRAY_SIZE(golden_settings_tonga_a11));
771                 amdgpu_device_program_register_sequence(adev,
772                                                         tonga_golden_common_all,
773                                                         ARRAY_SIZE(tonga_golden_common_all));
774                 break;
775         case CHIP_VEGAM:
776                 amdgpu_device_program_register_sequence(adev,
777                                                         golden_settings_vegam_a11,
778                                                         ARRAY_SIZE(golden_settings_vegam_a11));
779                 amdgpu_device_program_register_sequence(adev,
780                                                         vegam_golden_common_all,
781                                                         ARRAY_SIZE(vegam_golden_common_all));
782                 break;
783         case CHIP_POLARIS11:
784         case CHIP_POLARIS12:
785                 amdgpu_device_program_register_sequence(adev,
786                                                         golden_settings_polaris11_a11,
787                                                         ARRAY_SIZE(golden_settings_polaris11_a11));
788                 amdgpu_device_program_register_sequence(adev,
789                                                         polaris11_golden_common_all,
790                                                         ARRAY_SIZE(polaris11_golden_common_all));
791                 break;
792         case CHIP_POLARIS10:
793                 amdgpu_device_program_register_sequence(adev,
794                                                         golden_settings_polaris10_a11,
795                                                         ARRAY_SIZE(golden_settings_polaris10_a11));
796                 amdgpu_device_program_register_sequence(adev,
797                                                         polaris10_golden_common_all,
798                                                         ARRAY_SIZE(polaris10_golden_common_all));
799                 data = RREG32_SMC(ixCG_ACLK_CNTL);
800                 data &= ~CG_ACLK_CNTL__ACLK_DIVIDER_MASK;
801                 data |= 0x18 << CG_ACLK_CNTL__ACLK_DIVIDER__SHIFT;
802                 WREG32_SMC(ixCG_ACLK_CNTL, data);
803                 if ((adev->pdev->device == 0x67DF) && (adev->pdev->revision == 0xc7) &&
804                     ((adev->pdev->subsystem_device == 0xb37 && adev->pdev->subsystem_vendor == 0x1002) ||
805                      (adev->pdev->subsystem_device == 0x4a8 && adev->pdev->subsystem_vendor == 0x1043) ||
806                      (adev->pdev->subsystem_device == 0x9480 && adev->pdev->subsystem_vendor == 0x1680))) {
807                         amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1E, 0xDD);
808                         amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1F, 0xD0);
809                 }
810                 break;
811         case CHIP_CARRIZO:
812                 amdgpu_device_program_register_sequence(adev,
813                                                         cz_mgcg_cgcg_init,
814                                                         ARRAY_SIZE(cz_mgcg_cgcg_init));
815                 amdgpu_device_program_register_sequence(adev,
816                                                         cz_golden_settings_a11,
817                                                         ARRAY_SIZE(cz_golden_settings_a11));
818                 amdgpu_device_program_register_sequence(adev,
819                                                         cz_golden_common_all,
820                                                         ARRAY_SIZE(cz_golden_common_all));
821                 break;
822         case CHIP_STONEY:
823                 amdgpu_device_program_register_sequence(adev,
824                                                         stoney_mgcg_cgcg_init,
825                                                         ARRAY_SIZE(stoney_mgcg_cgcg_init));
826                 amdgpu_device_program_register_sequence(adev,
827                                                         stoney_golden_settings_a11,
828                                                         ARRAY_SIZE(stoney_golden_settings_a11));
829                 amdgpu_device_program_register_sequence(adev,
830                                                         stoney_golden_common_all,
831                                                         ARRAY_SIZE(stoney_golden_common_all));
832                 break;
833         default:
834                 break;
835         }
836 }
837
838 static int gfx_v8_0_ring_test_ring(struct amdgpu_ring *ring)
839 {
840         struct amdgpu_device *adev = ring->adev;
841         uint32_t tmp = 0;
842         unsigned i;
843         int r;
844
845         WREG32(mmSCRATCH_REG0, 0xCAFEDEAD);
846         r = amdgpu_ring_alloc(ring, 3);
847         if (r)
848                 return r;
849
850         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
851         amdgpu_ring_write(ring, mmSCRATCH_REG0 - PACKET3_SET_UCONFIG_REG_START);
852         amdgpu_ring_write(ring, 0xDEADBEEF);
853         amdgpu_ring_commit(ring);
854
855         for (i = 0; i < adev->usec_timeout; i++) {
856                 tmp = RREG32(mmSCRATCH_REG0);
857                 if (tmp == 0xDEADBEEF)
858                         break;
859                 udelay(1);
860         }
861
862         if (i >= adev->usec_timeout)
863                 r = -ETIMEDOUT;
864
865         return r;
866 }
867
868 static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
869 {
870         struct amdgpu_device *adev = ring->adev;
871         struct amdgpu_ib ib;
872         struct dma_fence *f = NULL;
873
874         unsigned int index;
875         uint64_t gpu_addr;
876         uint32_t tmp;
877         long r;
878
879         r = amdgpu_device_wb_get(adev, &index);
880         if (r)
881                 return r;
882
883         gpu_addr = adev->wb.gpu_addr + (index * 4);
884         adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
885         memset(&ib, 0, sizeof(ib));
886         r = amdgpu_ib_get(adev, NULL, 16,
887                                         AMDGPU_IB_POOL_DIRECT, &ib);
888         if (r)
889                 goto err1;
890
891         ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3);
892         ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
893         ib.ptr[2] = lower_32_bits(gpu_addr);
894         ib.ptr[3] = upper_32_bits(gpu_addr);
895         ib.ptr[4] = 0xDEADBEEF;
896         ib.length_dw = 5;
897
898         r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
899         if (r)
900                 goto err2;
901
902         r = dma_fence_wait_timeout(f, false, timeout);
903         if (r == 0) {
904                 r = -ETIMEDOUT;
905                 goto err2;
906         } else if (r < 0) {
907                 goto err2;
908         }
909
910         tmp = adev->wb.wb[index];
911         if (tmp == 0xDEADBEEF)
912                 r = 0;
913         else
914                 r = -EINVAL;
915
916 err2:
917         amdgpu_ib_free(adev, &ib, NULL);
918         dma_fence_put(f);
919 err1:
920         amdgpu_device_wb_free(adev, index);
921         return r;
922 }
923
924
925 static void gfx_v8_0_free_microcode(struct amdgpu_device *adev)
926 {
927         release_firmware(adev->gfx.pfp_fw);
928         adev->gfx.pfp_fw = NULL;
929         release_firmware(adev->gfx.me_fw);
930         adev->gfx.me_fw = NULL;
931         release_firmware(adev->gfx.ce_fw);
932         adev->gfx.ce_fw = NULL;
933         release_firmware(adev->gfx.rlc_fw);
934         adev->gfx.rlc_fw = NULL;
935         release_firmware(adev->gfx.mec_fw);
936         adev->gfx.mec_fw = NULL;
937         if ((adev->asic_type != CHIP_STONEY) &&
938             (adev->asic_type != CHIP_TOPAZ))
939                 release_firmware(adev->gfx.mec2_fw);
940         adev->gfx.mec2_fw = NULL;
941
942         kfree(adev->gfx.rlc.register_list_format);
943 }
944
945 static int gfx_v8_0_init_microcode(struct amdgpu_device *adev)
946 {
947         const char *chip_name;
948         char fw_name[30];
949         int err;
950         struct amdgpu_firmware_info *info = NULL;
951         const struct common_firmware_header *header = NULL;
952         const struct gfx_firmware_header_v1_0 *cp_hdr;
953         const struct rlc_firmware_header_v2_0 *rlc_hdr;
954         unsigned int *tmp = NULL, i;
955
956         DRM_DEBUG("\n");
957
958         switch (adev->asic_type) {
959         case CHIP_TOPAZ:
960                 chip_name = "topaz";
961                 break;
962         case CHIP_TONGA:
963                 chip_name = "tonga";
964                 break;
965         case CHIP_CARRIZO:
966                 chip_name = "carrizo";
967                 break;
968         case CHIP_FIJI:
969                 chip_name = "fiji";
970                 break;
971         case CHIP_STONEY:
972                 chip_name = "stoney";
973                 break;
974         case CHIP_POLARIS10:
975                 chip_name = "polaris10";
976                 break;
977         case CHIP_POLARIS11:
978                 chip_name = "polaris11";
979                 break;
980         case CHIP_POLARIS12:
981                 chip_name = "polaris12";
982                 break;
983         case CHIP_VEGAM:
984                 chip_name = "vegam";
985                 break;
986         default:
987                 BUG();
988         }
989
990         if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
991                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp_2.bin", chip_name);
992                 err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
993                 if (err == -ENOENT) {
994                         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
995                         err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
996                 }
997         } else {
998                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
999                 err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
1000         }
1001         if (err)
1002                 goto out;
1003         err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
1004         if (err)
1005                 goto out;
1006         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
1007         adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1008         adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1009
1010         if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1011                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me_2.bin", chip_name);
1012                 err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
1013                 if (err == -ENOENT) {
1014                         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
1015                         err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
1016                 }
1017         } else {
1018                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
1019                 err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
1020         }
1021         if (err)
1022                 goto out;
1023         err = amdgpu_ucode_validate(adev->gfx.me_fw);
1024         if (err)
1025                 goto out;
1026         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
1027         adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1028
1029         adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1030
1031         if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1032                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce_2.bin", chip_name);
1033                 err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
1034                 if (err == -ENOENT) {
1035                         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
1036                         err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
1037                 }
1038         } else {
1039                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
1040                 err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
1041         }
1042         if (err)
1043                 goto out;
1044         err = amdgpu_ucode_validate(adev->gfx.ce_fw);
1045         if (err)
1046                 goto out;
1047         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
1048         adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1049         adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1050
1051         /*
1052          * Support for MCBP/Virtualization in combination with chained IBs is
1053          * formal released on feature version #46
1054          */
1055         if (adev->gfx.ce_feature_version >= 46 &&
1056             adev->gfx.pfp_feature_version >= 46) {
1057                 adev->virt.chained_ib_support = true;
1058                 DRM_INFO("Chained IB support enabled!\n");
1059         } else
1060                 adev->virt.chained_ib_support = false;
1061
1062         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
1063         err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
1064         if (err)
1065                 goto out;
1066         err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
1067         rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
1068         adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
1069         adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
1070
1071         adev->gfx.rlc.save_and_restore_offset =
1072                         le32_to_cpu(rlc_hdr->save_and_restore_offset);
1073         adev->gfx.rlc.clear_state_descriptor_offset =
1074                         le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
1075         adev->gfx.rlc.avail_scratch_ram_locations =
1076                         le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
1077         adev->gfx.rlc.reg_restore_list_size =
1078                         le32_to_cpu(rlc_hdr->reg_restore_list_size);
1079         adev->gfx.rlc.reg_list_format_start =
1080                         le32_to_cpu(rlc_hdr->reg_list_format_start);
1081         adev->gfx.rlc.reg_list_format_separate_start =
1082                         le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
1083         adev->gfx.rlc.starting_offsets_start =
1084                         le32_to_cpu(rlc_hdr->starting_offsets_start);
1085         adev->gfx.rlc.reg_list_format_size_bytes =
1086                         le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
1087         adev->gfx.rlc.reg_list_size_bytes =
1088                         le32_to_cpu(rlc_hdr->reg_list_size_bytes);
1089
1090         adev->gfx.rlc.register_list_format =
1091                         kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
1092                                         adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
1093
1094         if (!adev->gfx.rlc.register_list_format) {
1095                 err = -ENOMEM;
1096                 goto out;
1097         }
1098
1099         tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1100                         le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
1101         for (i = 0 ; i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2); i++)
1102                 adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]);
1103
1104         adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
1105
1106         tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1107                         le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
1108         for (i = 0 ; i < (adev->gfx.rlc.reg_list_size_bytes >> 2); i++)
1109                 adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
1110
1111         if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1112                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec_2.bin", chip_name);
1113                 err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1114                 if (err == -ENOENT) {
1115                         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
1116                         err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1117                 }
1118         } else {
1119                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
1120                 err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1121         }
1122         if (err)
1123                 goto out;
1124         err = amdgpu_ucode_validate(adev->gfx.mec_fw);
1125         if (err)
1126                 goto out;
1127         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1128         adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1129         adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1130
1131         if ((adev->asic_type != CHIP_STONEY) &&
1132             (adev->asic_type != CHIP_TOPAZ)) {
1133                 if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1134                         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2_2.bin", chip_name);
1135                         err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1136                         if (err == -ENOENT) {
1137                                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1138                                 err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1139                         }
1140                 } else {
1141                         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1142                         err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1143                 }
1144                 if (!err) {
1145                         err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
1146                         if (err)
1147                                 goto out;
1148                         cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1149                                 adev->gfx.mec2_fw->data;
1150                         adev->gfx.mec2_fw_version =
1151                                 le32_to_cpu(cp_hdr->header.ucode_version);
1152                         adev->gfx.mec2_feature_version =
1153                                 le32_to_cpu(cp_hdr->ucode_feature_version);
1154                 } else {
1155                         err = 0;
1156                         adev->gfx.mec2_fw = NULL;
1157                 }
1158         }
1159
1160         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
1161         info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
1162         info->fw = adev->gfx.pfp_fw;
1163         header = (const struct common_firmware_header *)info->fw->data;
1164         adev->firmware.fw_size +=
1165                 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1166
1167         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
1168         info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
1169         info->fw = adev->gfx.me_fw;
1170         header = (const struct common_firmware_header *)info->fw->data;
1171         adev->firmware.fw_size +=
1172                 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1173
1174         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
1175         info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
1176         info->fw = adev->gfx.ce_fw;
1177         header = (const struct common_firmware_header *)info->fw->data;
1178         adev->firmware.fw_size +=
1179                 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1180
1181         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
1182         info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
1183         info->fw = adev->gfx.rlc_fw;
1184         header = (const struct common_firmware_header *)info->fw->data;
1185         adev->firmware.fw_size +=
1186                 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1187
1188         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
1189         info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
1190         info->fw = adev->gfx.mec_fw;
1191         header = (const struct common_firmware_header *)info->fw->data;
1192         adev->firmware.fw_size +=
1193                 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1194
1195         /* we need account JT in */
1196         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1197         adev->firmware.fw_size +=
1198                 ALIGN(le32_to_cpu(cp_hdr->jt_size) << 2, PAGE_SIZE);
1199
1200         if (amdgpu_sriov_vf(adev)) {
1201                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_STORAGE];
1202                 info->ucode_id = AMDGPU_UCODE_ID_STORAGE;
1203                 info->fw = adev->gfx.mec_fw;
1204                 adev->firmware.fw_size +=
1205                         ALIGN(le32_to_cpu(64 * PAGE_SIZE), PAGE_SIZE);
1206         }
1207
1208         if (adev->gfx.mec2_fw) {
1209                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
1210                 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
1211                 info->fw = adev->gfx.mec2_fw;
1212                 header = (const struct common_firmware_header *)info->fw->data;
1213                 adev->firmware.fw_size +=
1214                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1215         }
1216
1217 out:
1218         if (err) {
1219                 dev_err(adev->dev,
1220                         "gfx8: Failed to load firmware \"%s\"\n",
1221                         fw_name);
1222                 release_firmware(adev->gfx.pfp_fw);
1223                 adev->gfx.pfp_fw = NULL;
1224                 release_firmware(adev->gfx.me_fw);
1225                 adev->gfx.me_fw = NULL;
1226                 release_firmware(adev->gfx.ce_fw);
1227                 adev->gfx.ce_fw = NULL;
1228                 release_firmware(adev->gfx.rlc_fw);
1229                 adev->gfx.rlc_fw = NULL;
1230                 release_firmware(adev->gfx.mec_fw);
1231                 adev->gfx.mec_fw = NULL;
1232                 release_firmware(adev->gfx.mec2_fw);
1233                 adev->gfx.mec2_fw = NULL;
1234         }
1235         return err;
1236 }
1237
1238 static void gfx_v8_0_get_csb_buffer(struct amdgpu_device *adev,
1239                                     volatile u32 *buffer)
1240 {
1241         u32 count = 0, i;
1242         const struct cs_section_def *sect = NULL;
1243         const struct cs_extent_def *ext = NULL;
1244
1245         if (adev->gfx.rlc.cs_data == NULL)
1246                 return;
1247         if (buffer == NULL)
1248                 return;
1249
1250         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1251         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1252
1253         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
1254         buffer[count++] = cpu_to_le32(0x80000000);
1255         buffer[count++] = cpu_to_le32(0x80000000);
1256
1257         for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
1258                 for (ext = sect->section; ext->extent != NULL; ++ext) {
1259                         if (sect->id == SECT_CONTEXT) {
1260                                 buffer[count++] =
1261                                         cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
1262                                 buffer[count++] = cpu_to_le32(ext->reg_index -
1263                                                 PACKET3_SET_CONTEXT_REG_START);
1264                                 for (i = 0; i < ext->reg_count; i++)
1265                                         buffer[count++] = cpu_to_le32(ext->extent[i]);
1266                         } else {
1267                                 return;
1268                         }
1269                 }
1270         }
1271
1272         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
1273         buffer[count++] = cpu_to_le32(mmPA_SC_RASTER_CONFIG -
1274                         PACKET3_SET_CONTEXT_REG_START);
1275         buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config);
1276         buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config_1);
1277
1278         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1279         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
1280
1281         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
1282         buffer[count++] = cpu_to_le32(0);
1283 }
1284
1285 static int gfx_v8_0_cp_jump_table_num(struct amdgpu_device *adev)
1286 {
1287         if (adev->asic_type == CHIP_CARRIZO)
1288                 return 5;
1289         else
1290                 return 4;
1291 }
1292
1293 static int gfx_v8_0_rlc_init(struct amdgpu_device *adev)
1294 {
1295         const struct cs_section_def *cs_data;
1296         int r;
1297
1298         adev->gfx.rlc.cs_data = vi_cs_data;
1299
1300         cs_data = adev->gfx.rlc.cs_data;
1301
1302         if (cs_data) {
1303                 /* init clear state block */
1304                 r = amdgpu_gfx_rlc_init_csb(adev);
1305                 if (r)
1306                         return r;
1307         }
1308
1309         if ((adev->asic_type == CHIP_CARRIZO) ||
1310             (adev->asic_type == CHIP_STONEY)) {
1311                 adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
1312                 r = amdgpu_gfx_rlc_init_cpt(adev);
1313                 if (r)
1314                         return r;
1315         }
1316
1317         /* init spm vmid with 0xf */
1318         if (adev->gfx.rlc.funcs->update_spm_vmid)
1319                 adev->gfx.rlc.funcs->update_spm_vmid(adev, 0xf);
1320
1321         return 0;
1322 }
1323
1324 static void gfx_v8_0_mec_fini(struct amdgpu_device *adev)
1325 {
1326         amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
1327 }
1328
1329 static int gfx_v8_0_mec_init(struct amdgpu_device *adev)
1330 {
1331         int r;
1332         u32 *hpd;
1333         size_t mec_hpd_size;
1334
1335         bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
1336
1337         /* take ownership of the relevant compute queues */
1338         amdgpu_gfx_compute_queue_acquire(adev);
1339
1340         mec_hpd_size = adev->gfx.num_compute_rings * GFX8_MEC_HPD_SIZE;
1341         if (mec_hpd_size) {
1342                 r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
1343                                               AMDGPU_GEM_DOMAIN_VRAM,
1344                                               &adev->gfx.mec.hpd_eop_obj,
1345                                               &adev->gfx.mec.hpd_eop_gpu_addr,
1346                                               (void **)&hpd);
1347                 if (r) {
1348                         dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
1349                         return r;
1350                 }
1351
1352                 memset(hpd, 0, mec_hpd_size);
1353
1354                 amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
1355                 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1356         }
1357
1358         return 0;
1359 }
1360
1361 static const u32 vgpr_init_compute_shader[] =
1362 {
1363         0x7e000209, 0x7e020208,
1364         0x7e040207, 0x7e060206,
1365         0x7e080205, 0x7e0a0204,
1366         0x7e0c0203, 0x7e0e0202,
1367         0x7e100201, 0x7e120200,
1368         0x7e140209, 0x7e160208,
1369         0x7e180207, 0x7e1a0206,
1370         0x7e1c0205, 0x7e1e0204,
1371         0x7e200203, 0x7e220202,
1372         0x7e240201, 0x7e260200,
1373         0x7e280209, 0x7e2a0208,
1374         0x7e2c0207, 0x7e2e0206,
1375         0x7e300205, 0x7e320204,
1376         0x7e340203, 0x7e360202,
1377         0x7e380201, 0x7e3a0200,
1378         0x7e3c0209, 0x7e3e0208,
1379         0x7e400207, 0x7e420206,
1380         0x7e440205, 0x7e460204,
1381         0x7e480203, 0x7e4a0202,
1382         0x7e4c0201, 0x7e4e0200,
1383         0x7e500209, 0x7e520208,
1384         0x7e540207, 0x7e560206,
1385         0x7e580205, 0x7e5a0204,
1386         0x7e5c0203, 0x7e5e0202,
1387         0x7e600201, 0x7e620200,
1388         0x7e640209, 0x7e660208,
1389         0x7e680207, 0x7e6a0206,
1390         0x7e6c0205, 0x7e6e0204,
1391         0x7e700203, 0x7e720202,
1392         0x7e740201, 0x7e760200,
1393         0x7e780209, 0x7e7a0208,
1394         0x7e7c0207, 0x7e7e0206,
1395         0xbf8a0000, 0xbf810000,
1396 };
1397
1398 static const u32 sgpr_init_compute_shader[] =
1399 {
1400         0xbe8a0100, 0xbe8c0102,
1401         0xbe8e0104, 0xbe900106,
1402         0xbe920108, 0xbe940100,
1403         0xbe960102, 0xbe980104,
1404         0xbe9a0106, 0xbe9c0108,
1405         0xbe9e0100, 0xbea00102,
1406         0xbea20104, 0xbea40106,
1407         0xbea60108, 0xbea80100,
1408         0xbeaa0102, 0xbeac0104,
1409         0xbeae0106, 0xbeb00108,
1410         0xbeb20100, 0xbeb40102,
1411         0xbeb60104, 0xbeb80106,
1412         0xbeba0108, 0xbebc0100,
1413         0xbebe0102, 0xbec00104,
1414         0xbec20106, 0xbec40108,
1415         0xbec60100, 0xbec80102,
1416         0xbee60004, 0xbee70005,
1417         0xbeea0006, 0xbeeb0007,
1418         0xbee80008, 0xbee90009,
1419         0xbefc0000, 0xbf8a0000,
1420         0xbf810000, 0x00000000,
1421 };
1422
1423 static const u32 vgpr_init_regs[] =
1424 {
1425         mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xffffffff,
1426         mmCOMPUTE_RESOURCE_LIMITS, 0x1000000, /* CU_GROUP_COUNT=1 */
1427         mmCOMPUTE_NUM_THREAD_X, 256*4,
1428         mmCOMPUTE_NUM_THREAD_Y, 1,
1429         mmCOMPUTE_NUM_THREAD_Z, 1,
1430         mmCOMPUTE_PGM_RSRC1, 0x100004f, /* VGPRS=15 (64 logical VGPRs), SGPRS=1 (16 SGPRs), BULKY=1 */
1431         mmCOMPUTE_PGM_RSRC2, 20,
1432         mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1433         mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1434         mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1435         mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1436         mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1437         mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1438         mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1439         mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1440         mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1441         mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1442 };
1443
1444 static const u32 sgpr1_init_regs[] =
1445 {
1446         mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0x0f,
1447         mmCOMPUTE_RESOURCE_LIMITS, 0x1000000, /* CU_GROUP_COUNT=1 */
1448         mmCOMPUTE_NUM_THREAD_X, 256*5,
1449         mmCOMPUTE_NUM_THREAD_Y, 1,
1450         mmCOMPUTE_NUM_THREAD_Z, 1,
1451         mmCOMPUTE_PGM_RSRC1, 0x240, /* SGPRS=9 (80 GPRS) */
1452         mmCOMPUTE_PGM_RSRC2, 20,
1453         mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1454         mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1455         mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1456         mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1457         mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1458         mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1459         mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1460         mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1461         mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1462         mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1463 };
1464
1465 static const u32 sgpr2_init_regs[] =
1466 {
1467         mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xf0,
1468         mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1469         mmCOMPUTE_NUM_THREAD_X, 256*5,
1470         mmCOMPUTE_NUM_THREAD_Y, 1,
1471         mmCOMPUTE_NUM_THREAD_Z, 1,
1472         mmCOMPUTE_PGM_RSRC1, 0x240, /* SGPRS=9 (80 GPRS) */
1473         mmCOMPUTE_PGM_RSRC2, 20,
1474         mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1475         mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1476         mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1477         mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1478         mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1479         mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1480         mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1481         mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1482         mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1483         mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1484 };
1485
1486 static const u32 sec_ded_counter_registers[] =
1487 {
1488         mmCPC_EDC_ATC_CNT,
1489         mmCPC_EDC_SCRATCH_CNT,
1490         mmCPC_EDC_UCODE_CNT,
1491         mmCPF_EDC_ATC_CNT,
1492         mmCPF_EDC_ROQ_CNT,
1493         mmCPF_EDC_TAG_CNT,
1494         mmCPG_EDC_ATC_CNT,
1495         mmCPG_EDC_DMA_CNT,
1496         mmCPG_EDC_TAG_CNT,
1497         mmDC_EDC_CSINVOC_CNT,
1498         mmDC_EDC_RESTORE_CNT,
1499         mmDC_EDC_STATE_CNT,
1500         mmGDS_EDC_CNT,
1501         mmGDS_EDC_GRBM_CNT,
1502         mmGDS_EDC_OA_DED,
1503         mmSPI_EDC_CNT,
1504         mmSQC_ATC_EDC_GATCL1_CNT,
1505         mmSQC_EDC_CNT,
1506         mmSQ_EDC_DED_CNT,
1507         mmSQ_EDC_INFO,
1508         mmSQ_EDC_SEC_CNT,
1509         mmTCC_EDC_CNT,
1510         mmTCP_ATC_EDC_GATCL1_CNT,
1511         mmTCP_EDC_CNT,
1512         mmTD_EDC_CNT
1513 };
1514
1515 static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
1516 {
1517         struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
1518         struct amdgpu_ib ib;
1519         struct dma_fence *f = NULL;
1520         int r, i;
1521         u32 tmp;
1522         unsigned total_size, vgpr_offset, sgpr_offset;
1523         u64 gpu_addr;
1524
1525         /* only supported on CZ */
1526         if (adev->asic_type != CHIP_CARRIZO)
1527                 return 0;
1528
1529         /* bail if the compute ring is not ready */
1530         if (!ring->sched.ready)
1531                 return 0;
1532
1533         tmp = RREG32(mmGB_EDC_MODE);
1534         WREG32(mmGB_EDC_MODE, 0);
1535
1536         total_size =
1537                 (((ARRAY_SIZE(vgpr_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1538         total_size +=
1539                 (((ARRAY_SIZE(sgpr1_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1540         total_size +=
1541                 (((ARRAY_SIZE(sgpr2_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1542         total_size = ALIGN(total_size, 256);
1543         vgpr_offset = total_size;
1544         total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256);
1545         sgpr_offset = total_size;
1546         total_size += sizeof(sgpr_init_compute_shader);
1547
1548         /* allocate an indirect buffer to put the commands in */
1549         memset(&ib, 0, sizeof(ib));
1550         r = amdgpu_ib_get(adev, NULL, total_size,
1551                                         AMDGPU_IB_POOL_DIRECT, &ib);
1552         if (r) {
1553                 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
1554                 return r;
1555         }
1556
1557         /* load the compute shaders */
1558         for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++)
1559                 ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i];
1560
1561         for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
1562                 ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
1563
1564         /* init the ib length to 0 */
1565         ib.length_dw = 0;
1566
1567         /* VGPR */
1568         /* write the register state for the compute dispatch */
1569         for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i += 2) {
1570                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1571                 ib.ptr[ib.length_dw++] = vgpr_init_regs[i] - PACKET3_SET_SH_REG_START;
1572                 ib.ptr[ib.length_dw++] = vgpr_init_regs[i + 1];
1573         }
1574         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1575         gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
1576         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1577         ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1578         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1579         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1580
1581         /* write dispatch packet */
1582         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1583         ib.ptr[ib.length_dw++] = 8; /* x */
1584         ib.ptr[ib.length_dw++] = 1; /* y */
1585         ib.ptr[ib.length_dw++] = 1; /* z */
1586         ib.ptr[ib.length_dw++] =
1587                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1588
1589         /* write CS partial flush packet */
1590         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1591         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1592
1593         /* SGPR1 */
1594         /* write the register state for the compute dispatch */
1595         for (i = 0; i < ARRAY_SIZE(sgpr1_init_regs); i += 2) {
1596                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1597                 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i] - PACKET3_SET_SH_REG_START;
1598                 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i + 1];
1599         }
1600         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1601         gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1602         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1603         ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1604         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1605         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1606
1607         /* write dispatch packet */
1608         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1609         ib.ptr[ib.length_dw++] = 8; /* x */
1610         ib.ptr[ib.length_dw++] = 1; /* y */
1611         ib.ptr[ib.length_dw++] = 1; /* z */
1612         ib.ptr[ib.length_dw++] =
1613                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1614
1615         /* write CS partial flush packet */
1616         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1617         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1618
1619         /* SGPR2 */
1620         /* write the register state for the compute dispatch */
1621         for (i = 0; i < ARRAY_SIZE(sgpr2_init_regs); i += 2) {
1622                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1623                 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i] - PACKET3_SET_SH_REG_START;
1624                 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i + 1];
1625         }
1626         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1627         gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1628         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1629         ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1630         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1631         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1632
1633         /* write dispatch packet */
1634         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1635         ib.ptr[ib.length_dw++] = 8; /* x */
1636         ib.ptr[ib.length_dw++] = 1; /* y */
1637         ib.ptr[ib.length_dw++] = 1; /* z */
1638         ib.ptr[ib.length_dw++] =
1639                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1640
1641         /* write CS partial flush packet */
1642         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1643         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1644
1645         /* shedule the ib on the ring */
1646         r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
1647         if (r) {
1648                 DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
1649                 goto fail;
1650         }
1651
1652         /* wait for the GPU to finish processing the IB */
1653         r = dma_fence_wait(f, false);
1654         if (r) {
1655                 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
1656                 goto fail;
1657         }
1658
1659         tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, DED_MODE, 2);
1660         tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, PROP_FED, 1);
1661         WREG32(mmGB_EDC_MODE, tmp);
1662
1663         tmp = RREG32(mmCC_GC_EDC_CONFIG);
1664         tmp = REG_SET_FIELD(tmp, CC_GC_EDC_CONFIG, DIS_EDC, 0) | 1;
1665         WREG32(mmCC_GC_EDC_CONFIG, tmp);
1666
1667
1668         /* read back registers to clear the counters */
1669         for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++)
1670                 RREG32(sec_ded_counter_registers[i]);
1671
1672 fail:
1673         amdgpu_ib_free(adev, &ib, NULL);
1674         dma_fence_put(f);
1675
1676         return r;
1677 }
1678
1679 static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev)
1680 {
1681         u32 gb_addr_config;
1682         u32 mc_arb_ramcfg;
1683         u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map;
1684         u32 tmp;
1685         int ret;
1686
1687         switch (adev->asic_type) {
1688         case CHIP_TOPAZ:
1689                 adev->gfx.config.max_shader_engines = 1;
1690                 adev->gfx.config.max_tile_pipes = 2;
1691                 adev->gfx.config.max_cu_per_sh = 6;
1692                 adev->gfx.config.max_sh_per_se = 1;
1693                 adev->gfx.config.max_backends_per_se = 2;
1694                 adev->gfx.config.max_texture_channel_caches = 2;
1695                 adev->gfx.config.max_gprs = 256;
1696                 adev->gfx.config.max_gs_threads = 32;
1697                 adev->gfx.config.max_hw_contexts = 8;
1698
1699                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1700                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1701                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1702                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1703                 gb_addr_config = TOPAZ_GB_ADDR_CONFIG_GOLDEN;
1704                 break;
1705         case CHIP_FIJI:
1706                 adev->gfx.config.max_shader_engines = 4;
1707                 adev->gfx.config.max_tile_pipes = 16;
1708                 adev->gfx.config.max_cu_per_sh = 16;
1709                 adev->gfx.config.max_sh_per_se = 1;
1710                 adev->gfx.config.max_backends_per_se = 4;
1711                 adev->gfx.config.max_texture_channel_caches = 16;
1712                 adev->gfx.config.max_gprs = 256;
1713                 adev->gfx.config.max_gs_threads = 32;
1714                 adev->gfx.config.max_hw_contexts = 8;
1715
1716                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1717                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1718                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1719                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1720                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1721                 break;
1722         case CHIP_POLARIS11:
1723         case CHIP_POLARIS12:
1724                 ret = amdgpu_atombios_get_gfx_info(adev);
1725                 if (ret)
1726                         return ret;
1727                 adev->gfx.config.max_gprs = 256;
1728                 adev->gfx.config.max_gs_threads = 32;
1729                 adev->gfx.config.max_hw_contexts = 8;
1730
1731                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1732                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1733                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1734                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1735                 gb_addr_config = POLARIS11_GB_ADDR_CONFIG_GOLDEN;
1736                 break;
1737         case CHIP_POLARIS10:
1738         case CHIP_VEGAM:
1739                 ret = amdgpu_atombios_get_gfx_info(adev);
1740                 if (ret)
1741                         return ret;
1742                 adev->gfx.config.max_gprs = 256;
1743                 adev->gfx.config.max_gs_threads = 32;
1744                 adev->gfx.config.max_hw_contexts = 8;
1745
1746                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1747                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1748                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1749                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1750                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1751                 break;
1752         case CHIP_TONGA:
1753                 adev->gfx.config.max_shader_engines = 4;
1754                 adev->gfx.config.max_tile_pipes = 8;
1755                 adev->gfx.config.max_cu_per_sh = 8;
1756                 adev->gfx.config.max_sh_per_se = 1;
1757                 adev->gfx.config.max_backends_per_se = 2;
1758                 adev->gfx.config.max_texture_channel_caches = 8;
1759                 adev->gfx.config.max_gprs = 256;
1760                 adev->gfx.config.max_gs_threads = 32;
1761                 adev->gfx.config.max_hw_contexts = 8;
1762
1763                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1764                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1765                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1766                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1767                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1768                 break;
1769         case CHIP_CARRIZO:
1770                 adev->gfx.config.max_shader_engines = 1;
1771                 adev->gfx.config.max_tile_pipes = 2;
1772                 adev->gfx.config.max_sh_per_se = 1;
1773                 adev->gfx.config.max_backends_per_se = 2;
1774                 adev->gfx.config.max_cu_per_sh = 8;
1775                 adev->gfx.config.max_texture_channel_caches = 2;
1776                 adev->gfx.config.max_gprs = 256;
1777                 adev->gfx.config.max_gs_threads = 32;
1778                 adev->gfx.config.max_hw_contexts = 8;
1779
1780                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1781                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1782                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1783                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1784                 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1785                 break;
1786         case CHIP_STONEY:
1787                 adev->gfx.config.max_shader_engines = 1;
1788                 adev->gfx.config.max_tile_pipes = 2;
1789                 adev->gfx.config.max_sh_per_se = 1;
1790                 adev->gfx.config.max_backends_per_se = 1;
1791                 adev->gfx.config.max_cu_per_sh = 3;
1792                 adev->gfx.config.max_texture_channel_caches = 2;
1793                 adev->gfx.config.max_gprs = 256;
1794                 adev->gfx.config.max_gs_threads = 16;
1795                 adev->gfx.config.max_hw_contexts = 8;
1796
1797                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1798                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1799                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1800                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1801                 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1802                 break;
1803         default:
1804                 adev->gfx.config.max_shader_engines = 2;
1805                 adev->gfx.config.max_tile_pipes = 4;
1806                 adev->gfx.config.max_cu_per_sh = 2;
1807                 adev->gfx.config.max_sh_per_se = 1;
1808                 adev->gfx.config.max_backends_per_se = 2;
1809                 adev->gfx.config.max_texture_channel_caches = 4;
1810                 adev->gfx.config.max_gprs = 256;
1811                 adev->gfx.config.max_gs_threads = 32;
1812                 adev->gfx.config.max_hw_contexts = 8;
1813
1814                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1815                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1816                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1817                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1818                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1819                 break;
1820         }
1821
1822         adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG);
1823         mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg;
1824
1825         adev->gfx.config.num_banks = REG_GET_FIELD(mc_arb_ramcfg,
1826                                 MC_ARB_RAMCFG, NOOFBANK);
1827         adev->gfx.config.num_ranks = REG_GET_FIELD(mc_arb_ramcfg,
1828                                 MC_ARB_RAMCFG, NOOFRANKS);
1829
1830         adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes;
1831         adev->gfx.config.mem_max_burst_length_bytes = 256;
1832         if (adev->flags & AMD_IS_APU) {
1833                 /* Get memory bank mapping mode. */
1834                 tmp = RREG32(mmMC_FUS_DRAM0_BANK_ADDR_MAPPING);
1835                 dimm00_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1836                 dimm01_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1837
1838                 tmp = RREG32(mmMC_FUS_DRAM1_BANK_ADDR_MAPPING);
1839                 dimm10_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1840                 dimm11_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1841
1842                 /* Validate settings in case only one DIMM installed. */
1843                 if ((dimm00_addr_map == 0) || (dimm00_addr_map == 3) || (dimm00_addr_map == 4) || (dimm00_addr_map > 12))
1844                         dimm00_addr_map = 0;
1845                 if ((dimm01_addr_map == 0) || (dimm01_addr_map == 3) || (dimm01_addr_map == 4) || (dimm01_addr_map > 12))
1846                         dimm01_addr_map = 0;
1847                 if ((dimm10_addr_map == 0) || (dimm10_addr_map == 3) || (dimm10_addr_map == 4) || (dimm10_addr_map > 12))
1848                         dimm10_addr_map = 0;
1849                 if ((dimm11_addr_map == 0) || (dimm11_addr_map == 3) || (dimm11_addr_map == 4) || (dimm11_addr_map > 12))
1850                         dimm11_addr_map = 0;
1851
1852                 /* If DIMM Addr map is 8GB, ROW size should be 2KB. Otherwise 1KB. */
1853                 /* If ROW size(DIMM1) != ROW size(DMIMM0), ROW size should be larger one. */
1854                 if ((dimm00_addr_map == 11) || (dimm01_addr_map == 11) || (dimm10_addr_map == 11) || (dimm11_addr_map == 11))
1855                         adev->gfx.config.mem_row_size_in_kb = 2;
1856                 else
1857                         adev->gfx.config.mem_row_size_in_kb = 1;
1858         } else {
1859                 tmp = REG_GET_FIELD(mc_arb_ramcfg, MC_ARB_RAMCFG, NOOFCOLS);
1860                 adev->gfx.config.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
1861                 if (adev->gfx.config.mem_row_size_in_kb > 4)
1862                         adev->gfx.config.mem_row_size_in_kb = 4;
1863         }
1864
1865         adev->gfx.config.shader_engine_tile_size = 32;
1866         adev->gfx.config.num_gpus = 1;
1867         adev->gfx.config.multi_gpu_tile_size = 64;
1868
1869         /* fix up row size */
1870         switch (adev->gfx.config.mem_row_size_in_kb) {
1871         case 1:
1872         default:
1873                 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 0);
1874                 break;
1875         case 2:
1876                 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 1);
1877                 break;
1878         case 4:
1879                 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 2);
1880                 break;
1881         }
1882         adev->gfx.config.gb_addr_config = gb_addr_config;
1883
1884         return 0;
1885 }
1886
1887 static int gfx_v8_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
1888                                         int mec, int pipe, int queue)
1889 {
1890         int r;
1891         unsigned irq_type;
1892         struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
1893         unsigned int hw_prio;
1894
1895         ring = &adev->gfx.compute_ring[ring_id];
1896
1897         /* mec0 is me1 */
1898         ring->me = mec + 1;
1899         ring->pipe = pipe;
1900         ring->queue = queue;
1901
1902         ring->ring_obj = NULL;
1903         ring->use_doorbell = true;
1904         ring->doorbell_index = adev->doorbell_index.mec_ring0 + ring_id;
1905         ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
1906                                 + (ring_id * GFX8_MEC_HPD_SIZE);
1907         sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
1908
1909         irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
1910                 + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
1911                 + ring->pipe;
1912
1913         hw_prio = amdgpu_gfx_is_high_priority_compute_queue(adev, ring) ?
1914                         AMDGPU_RING_PRIO_2 : AMDGPU_RING_PRIO_DEFAULT;
1915         /* type-2 packets are deprecated on MEC, use type-3 instead */
1916         r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, irq_type,
1917                              hw_prio, NULL);
1918         if (r)
1919                 return r;
1920
1921
1922         return 0;
1923 }
1924
1925 static void gfx_v8_0_sq_irq_work_func(struct work_struct *work);
1926
1927 static int gfx_v8_0_sw_init(void *handle)
1928 {
1929         int i, j, k, r, ring_id;
1930         struct amdgpu_ring *ring;
1931         struct amdgpu_kiq *kiq;
1932         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1933
1934         switch (adev->asic_type) {
1935         case CHIP_TONGA:
1936         case CHIP_CARRIZO:
1937         case CHIP_FIJI:
1938         case CHIP_POLARIS10:
1939         case CHIP_POLARIS11:
1940         case CHIP_POLARIS12:
1941         case CHIP_VEGAM:
1942                 adev->gfx.mec.num_mec = 2;
1943                 break;
1944         case CHIP_TOPAZ:
1945         case CHIP_STONEY:
1946         default:
1947                 adev->gfx.mec.num_mec = 1;
1948                 break;
1949         }
1950
1951         adev->gfx.mec.num_pipe_per_mec = 4;
1952         adev->gfx.mec.num_queue_per_pipe = 8;
1953
1954         /* EOP Event */
1955         r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_END_OF_PIPE, &adev->gfx.eop_irq);
1956         if (r)
1957                 return r;
1958
1959         /* Privileged reg */
1960         r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_PRIV_REG_FAULT,
1961                               &adev->gfx.priv_reg_irq);
1962         if (r)
1963                 return r;
1964
1965         /* Privileged inst */
1966         r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_PRIV_INSTR_FAULT,
1967                               &adev->gfx.priv_inst_irq);
1968         if (r)
1969                 return r;
1970
1971         /* Add CP EDC/ECC irq  */
1972         r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_ECC_ERROR,
1973                               &adev->gfx.cp_ecc_error_irq);
1974         if (r)
1975                 return r;
1976
1977         /* SQ interrupts. */
1978         r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_SQ_INTERRUPT_MSG,
1979                               &adev->gfx.sq_irq);
1980         if (r) {
1981                 DRM_ERROR("amdgpu_irq_add() for SQ failed: %d\n", r);
1982                 return r;
1983         }
1984
1985         INIT_WORK(&adev->gfx.sq_work.work, gfx_v8_0_sq_irq_work_func);
1986
1987         adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
1988
1989         r = gfx_v8_0_init_microcode(adev);
1990         if (r) {
1991                 DRM_ERROR("Failed to load gfx firmware!\n");
1992                 return r;
1993         }
1994
1995         r = adev->gfx.rlc.funcs->init(adev);
1996         if (r) {
1997                 DRM_ERROR("Failed to init rlc BOs!\n");
1998                 return r;
1999         }
2000
2001         r = gfx_v8_0_mec_init(adev);
2002         if (r) {
2003                 DRM_ERROR("Failed to init MEC BOs!\n");
2004                 return r;
2005         }
2006
2007         /* set up the gfx ring */
2008         for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
2009                 ring = &adev->gfx.gfx_ring[i];
2010                 ring->ring_obj = NULL;
2011                 sprintf(ring->name, "gfx");
2012                 /* no gfx doorbells on iceland */
2013                 if (adev->asic_type != CHIP_TOPAZ) {
2014                         ring->use_doorbell = true;
2015                         ring->doorbell_index = adev->doorbell_index.gfx_ring0;
2016                 }
2017
2018                 r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
2019                                      AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP,
2020                                      AMDGPU_RING_PRIO_DEFAULT, NULL);
2021                 if (r)
2022                         return r;
2023         }
2024
2025
2026         /* set up the compute queues - allocate horizontally across pipes */
2027         ring_id = 0;
2028         for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
2029                 for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
2030                         for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
2031                                 if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, j))
2032                                         continue;
2033
2034                                 r = gfx_v8_0_compute_ring_init(adev,
2035                                                                 ring_id,
2036                                                                 i, k, j);
2037                                 if (r)
2038                                         return r;
2039
2040                                 ring_id++;
2041                         }
2042                 }
2043         }
2044
2045         r = amdgpu_gfx_kiq_init(adev, GFX8_MEC_HPD_SIZE);
2046         if (r) {
2047                 DRM_ERROR("Failed to init KIQ BOs!\n");
2048                 return r;
2049         }
2050
2051         kiq = &adev->gfx.kiq;
2052         r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
2053         if (r)
2054                 return r;
2055
2056         /* create MQD for all compute queues as well as KIQ for SRIOV case */
2057         r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct vi_mqd_allocation));
2058         if (r)
2059                 return r;
2060
2061         adev->gfx.ce_ram_size = 0x8000;
2062
2063         r = gfx_v8_0_gpu_early_init(adev);
2064         if (r)
2065                 return r;
2066
2067         return 0;
2068 }
2069
2070 static int gfx_v8_0_sw_fini(void *handle)
2071 {
2072         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2073         int i;
2074
2075         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2076                 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
2077         for (i = 0; i < adev->gfx.num_compute_rings; i++)
2078                 amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
2079
2080         amdgpu_gfx_mqd_sw_fini(adev);
2081         amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring);
2082         amdgpu_gfx_kiq_fini(adev);
2083
2084         gfx_v8_0_mec_fini(adev);
2085         amdgpu_gfx_rlc_fini(adev);
2086         amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj,
2087                                 &adev->gfx.rlc.clear_state_gpu_addr,
2088                                 (void **)&adev->gfx.rlc.cs_ptr);
2089         if ((adev->asic_type == CHIP_CARRIZO) ||
2090             (adev->asic_type == CHIP_STONEY)) {
2091                 amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj,
2092                                 &adev->gfx.rlc.cp_table_gpu_addr,
2093                                 (void **)&adev->gfx.rlc.cp_table_ptr);
2094         }
2095         gfx_v8_0_free_microcode(adev);
2096
2097         return 0;
2098 }
2099
2100 static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev)
2101 {
2102         uint32_t *modearray, *mod2array;
2103         const u32 num_tile_mode_states = ARRAY_SIZE(adev->gfx.config.tile_mode_array);
2104         const u32 num_secondary_tile_mode_states = ARRAY_SIZE(adev->gfx.config.macrotile_mode_array);
2105         u32 reg_offset;
2106
2107         modearray = adev->gfx.config.tile_mode_array;
2108         mod2array = adev->gfx.config.macrotile_mode_array;
2109
2110         for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2111                 modearray[reg_offset] = 0;
2112
2113         for (reg_offset = 0; reg_offset <  num_secondary_tile_mode_states; reg_offset++)
2114                 mod2array[reg_offset] = 0;
2115
2116         switch (adev->asic_type) {
2117         case CHIP_TOPAZ:
2118                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2119                                 PIPE_CONFIG(ADDR_SURF_P2) |
2120                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2121                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2122                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2123                                 PIPE_CONFIG(ADDR_SURF_P2) |
2124                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2125                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2126                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2127                                 PIPE_CONFIG(ADDR_SURF_P2) |
2128                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2129                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2130                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2131                                 PIPE_CONFIG(ADDR_SURF_P2) |
2132                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2133                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2134                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2135                                 PIPE_CONFIG(ADDR_SURF_P2) |
2136                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2137                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2138                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2139                                 PIPE_CONFIG(ADDR_SURF_P2) |
2140                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2141                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2142                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2143                                 PIPE_CONFIG(ADDR_SURF_P2) |
2144                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2145                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2146                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2147                                 PIPE_CONFIG(ADDR_SURF_P2));
2148                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2149                                 PIPE_CONFIG(ADDR_SURF_P2) |
2150                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2151                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2152                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2153                                  PIPE_CONFIG(ADDR_SURF_P2) |
2154                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2155                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2156                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2157                                  PIPE_CONFIG(ADDR_SURF_P2) |
2158                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2159                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2160                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2161                                  PIPE_CONFIG(ADDR_SURF_P2) |
2162                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2163                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2164                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2165                                  PIPE_CONFIG(ADDR_SURF_P2) |
2166                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2167                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2168                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2169                                  PIPE_CONFIG(ADDR_SURF_P2) |
2170                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2171                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2172                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2173                                  PIPE_CONFIG(ADDR_SURF_P2) |
2174                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2175                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2176                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2177                                  PIPE_CONFIG(ADDR_SURF_P2) |
2178                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2179                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2180                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2181                                  PIPE_CONFIG(ADDR_SURF_P2) |
2182                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2183                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2184                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2185                                  PIPE_CONFIG(ADDR_SURF_P2) |
2186                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2187                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2188                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2189                                  PIPE_CONFIG(ADDR_SURF_P2) |
2190                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2191                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2192                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2193                                  PIPE_CONFIG(ADDR_SURF_P2) |
2194                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2195                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2196                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2197                                  PIPE_CONFIG(ADDR_SURF_P2) |
2198                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2199                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2200                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2201                                  PIPE_CONFIG(ADDR_SURF_P2) |
2202                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2203                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2204                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2205                                  PIPE_CONFIG(ADDR_SURF_P2) |
2206                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2207                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2208                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2209                                  PIPE_CONFIG(ADDR_SURF_P2) |
2210                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2211                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2212                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2213                                  PIPE_CONFIG(ADDR_SURF_P2) |
2214                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2215                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2216                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2217                                  PIPE_CONFIG(ADDR_SURF_P2) |
2218                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2219                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2220
2221                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2222                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2223                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2224                                 NUM_BANKS(ADDR_SURF_8_BANK));
2225                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2226                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2227                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2228                                 NUM_BANKS(ADDR_SURF_8_BANK));
2229                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2230                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2231                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2232                                 NUM_BANKS(ADDR_SURF_8_BANK));
2233                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2234                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2235                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2236                                 NUM_BANKS(ADDR_SURF_8_BANK));
2237                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2238                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2239                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2240                                 NUM_BANKS(ADDR_SURF_8_BANK));
2241                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2242                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2243                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2244                                 NUM_BANKS(ADDR_SURF_8_BANK));
2245                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2246                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2247                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2248                                 NUM_BANKS(ADDR_SURF_8_BANK));
2249                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2250                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2251                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2252                                 NUM_BANKS(ADDR_SURF_16_BANK));
2253                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2254                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2255                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2256                                 NUM_BANKS(ADDR_SURF_16_BANK));
2257                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2258                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2259                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2260                                  NUM_BANKS(ADDR_SURF_16_BANK));
2261                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2262                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2263                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2264                                  NUM_BANKS(ADDR_SURF_16_BANK));
2265                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2266                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2267                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2268                                  NUM_BANKS(ADDR_SURF_16_BANK));
2269                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2270                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2271                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2272                                  NUM_BANKS(ADDR_SURF_16_BANK));
2273                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2274                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2275                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2276                                  NUM_BANKS(ADDR_SURF_8_BANK));
2277
2278                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2279                         if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
2280                             reg_offset != 23)
2281                                 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2282
2283                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2284                         if (reg_offset != 7)
2285                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2286
2287                 break;
2288         case CHIP_FIJI:
2289         case CHIP_VEGAM:
2290                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2291                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2292                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2293                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2294                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2295                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2296                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2297                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2298                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2299                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2300                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2301                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2302                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2303                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2304                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2305                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2306                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2307                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2308                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2309                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2310                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2311                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2312                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2313                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2314                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2315                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2316                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2317                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2318                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2319                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2320                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2321                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2322                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2323                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2324                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2325                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2326                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2327                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2328                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2329                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2330                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2331                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2332                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2333                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2334                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2335                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2336                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2337                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2338                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2339                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2340                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2341                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2342                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2343                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2344                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2345                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2346                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2347                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2348                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2349                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2350                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2351                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2352                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2353                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2354                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2355                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2356                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2357                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2358                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2359                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2360                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2361                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2362                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2363                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2364                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2365                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2366                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2367                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2368                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2369                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2370                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2371                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2372                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2373                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2374                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2375                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2376                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2377                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2378                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2379                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2380                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2381                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2382                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2383                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2384                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2385                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2386                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2387                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2388                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2389                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2390                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2391                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2392                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2393                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2394                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2395                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2396                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2397                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2398                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2399                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2400                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2401                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2402                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2403                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2404                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2405                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2406                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2407                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2408                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2409                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2410                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2411                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2412
2413                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2414                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2415                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2416                                 NUM_BANKS(ADDR_SURF_8_BANK));
2417                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2418                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2419                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2420                                 NUM_BANKS(ADDR_SURF_8_BANK));
2421                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2422                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2423                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2424                                 NUM_BANKS(ADDR_SURF_8_BANK));
2425                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2426                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2427                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2428                                 NUM_BANKS(ADDR_SURF_8_BANK));
2429                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2430                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2431                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2432                                 NUM_BANKS(ADDR_SURF_8_BANK));
2433                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2434                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2435                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2436                                 NUM_BANKS(ADDR_SURF_8_BANK));
2437                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2438                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2439                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2440                                 NUM_BANKS(ADDR_SURF_8_BANK));
2441                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2442                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2443                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2444                                 NUM_BANKS(ADDR_SURF_8_BANK));
2445                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2446                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2447                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2448                                 NUM_BANKS(ADDR_SURF_8_BANK));
2449                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2450                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2451                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2452                                  NUM_BANKS(ADDR_SURF_8_BANK));
2453                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2454                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2455                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2456                                  NUM_BANKS(ADDR_SURF_8_BANK));
2457                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2458                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2459                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2460                                  NUM_BANKS(ADDR_SURF_8_BANK));
2461                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2462                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2463                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2464                                  NUM_BANKS(ADDR_SURF_8_BANK));
2465                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2466                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2467                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2468                                  NUM_BANKS(ADDR_SURF_4_BANK));
2469
2470                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2471                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2472
2473                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2474                         if (reg_offset != 7)
2475                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2476
2477                 break;
2478         case CHIP_TONGA:
2479                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2480                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2481                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2482                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2483                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2484                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2485                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2486                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2487                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2488                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2489                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2490                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2491                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2492                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2493                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2494                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2495                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2496                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2497                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2498                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2499                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2500                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2501                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2502                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2503                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2504                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2505                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2506                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2507                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2508                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2509                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2510                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2511                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2512                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2513                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2514                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2515                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2516                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2517                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2518                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2519                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2520                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2521                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2522                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2523                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2524                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2525                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2526                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2527                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2528                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2529                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2530                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2531                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2532                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2533                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2534                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2535                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2536                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2537                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2538                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2539                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2540                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2541                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2542                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2543                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2544                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2545                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2546                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2547                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2548                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2549                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2550                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2551                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2552                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2553                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2554                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2555                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2556                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2557                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2558                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2559                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2560                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2561                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2562                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2563                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2564                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2565                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2566                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2567                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2568                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2569                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2570                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2571                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2572                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2573                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2574                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2575                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2576                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2577                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2578                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2579                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2580                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2581                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2582                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2583                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2584                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2585                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2586                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2587                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2588                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2589                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2590                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2591                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2592                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2593                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2594                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2595                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2596                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2597                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2598                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2599                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2600                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2601
2602                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2603                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2604                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2605                                 NUM_BANKS(ADDR_SURF_16_BANK));
2606                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2607                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2608                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2609                                 NUM_BANKS(ADDR_SURF_16_BANK));
2610                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2611                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2612                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2613                                 NUM_BANKS(ADDR_SURF_16_BANK));
2614                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2615                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2616                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2617                                 NUM_BANKS(ADDR_SURF_16_BANK));
2618                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2619                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2620                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2621                                 NUM_BANKS(ADDR_SURF_16_BANK));
2622                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2623                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2624                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2625                                 NUM_BANKS(ADDR_SURF_16_BANK));
2626                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2627                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2628                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2629                                 NUM_BANKS(ADDR_SURF_16_BANK));
2630                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2631                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2632                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2633                                 NUM_BANKS(ADDR_SURF_16_BANK));
2634                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2635                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2636                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2637                                 NUM_BANKS(ADDR_SURF_16_BANK));
2638                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2639                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2640                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2641                                  NUM_BANKS(ADDR_SURF_16_BANK));
2642                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2643                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2644                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2645                                  NUM_BANKS(ADDR_SURF_16_BANK));
2646                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2647                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2648                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2649                                  NUM_BANKS(ADDR_SURF_8_BANK));
2650                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2651                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2652                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2653                                  NUM_BANKS(ADDR_SURF_4_BANK));
2654                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2655                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2656                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2657                                  NUM_BANKS(ADDR_SURF_4_BANK));
2658
2659                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2660                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2661
2662                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2663                         if (reg_offset != 7)
2664                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2665
2666                 break;
2667         case CHIP_POLARIS11:
2668         case CHIP_POLARIS12:
2669                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2670                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2671                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2672                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2673                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2674                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2675                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2676                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2677                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2678                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2679                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2680                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2681                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2682                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2683                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2684                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2685                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2686                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2687                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2688                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2689                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2690                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2691                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2692                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2693                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2694                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2695                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2696                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2697                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2698                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2699                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2700                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2701                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2702                                 PIPE_CONFIG(ADDR_SURF_P4_16x16));
2703                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2704                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2705                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2706                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2707                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2708                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2709                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2710                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2711                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2712                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2713                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2714                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2715                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2716                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2717                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2718                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2719                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2720                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2721                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2722                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2723                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2724                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2725                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2726                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2727                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2728                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2729                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2730                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2731                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2732                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2733                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2734                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2735                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2736                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2737                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2738                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2739                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2740                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2741                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2742                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2743                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2744                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2745                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2746                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2747                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2748                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2749                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2750                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2751                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2752                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2753                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2754                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2755                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2756                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2757                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2758                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2759                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2760                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2761                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2762                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2763                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2764                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2765                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2766                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2767                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2768                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2769                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2770                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2771                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2772                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2773                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2774                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2775                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2776                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2777                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2778                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2779                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2780                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2781                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2782                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2783                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2784                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2785                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2786                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2787                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2788                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2789                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2790                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2791
2792                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2793                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2794                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2795                                 NUM_BANKS(ADDR_SURF_16_BANK));
2796
2797                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2798                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2799                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2800                                 NUM_BANKS(ADDR_SURF_16_BANK));
2801
2802                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2803                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2804                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2805                                 NUM_BANKS(ADDR_SURF_16_BANK));
2806
2807                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2808                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2809                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2810                                 NUM_BANKS(ADDR_SURF_16_BANK));
2811
2812                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2813                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2814                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2815                                 NUM_BANKS(ADDR_SURF_16_BANK));
2816
2817                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2818                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2819                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2820                                 NUM_BANKS(ADDR_SURF_16_BANK));
2821
2822                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2823                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2824                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2825                                 NUM_BANKS(ADDR_SURF_16_BANK));
2826
2827                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2828                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2829                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2830                                 NUM_BANKS(ADDR_SURF_16_BANK));
2831
2832                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2833                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2834                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2835                                 NUM_BANKS(ADDR_SURF_16_BANK));
2836
2837                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2838                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2839                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2840                                 NUM_BANKS(ADDR_SURF_16_BANK));
2841
2842                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2843                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2844                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2845                                 NUM_BANKS(ADDR_SURF_16_BANK));
2846
2847                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2848                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2849                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2850                                 NUM_BANKS(ADDR_SURF_16_BANK));
2851
2852                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2853                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2854                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2855                                 NUM_BANKS(ADDR_SURF_8_BANK));
2856
2857                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2858                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2859                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2860                                 NUM_BANKS(ADDR_SURF_4_BANK));
2861
2862                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2863                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2864
2865                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2866                         if (reg_offset != 7)
2867                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2868
2869                 break;
2870         case CHIP_POLARIS10:
2871                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2872                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2873                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2874                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2875                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2876                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2877                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2878                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2879                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2880                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2881                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2882                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2883                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2884                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2885                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2886                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2887                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2888                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2889                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2890                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2891                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2892                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2893                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2894                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2895                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2896                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2897                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2898                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2899                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2900                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2901                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2902                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2903                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2904                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2905                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2906                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2907                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2908                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2909                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2910                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2911                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2912                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2913                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2914                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2915                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2916                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2917                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2918                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2919                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2920                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2921                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2922                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2923                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2924                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2925                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2926                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2927                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2928                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2929                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2930                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2931                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2932                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2933                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2934                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2935                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2936                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2937                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2938                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2939                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2940                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2941                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2942                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2943                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2944                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2945                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2946                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2947                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2948                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2949                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2950                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2951                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2952                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2953                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2954                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2955                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2956                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2957                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2958                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2959                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2960                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2961                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2962                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2963                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2964                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2965                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2966                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2967                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2968                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2969                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2970                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2971                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2972                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2973                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2974                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2975                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2976                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2977                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2978                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2979                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2980                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2981                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2982                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2983                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2984                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2985                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2986                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2987                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2988                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2989                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2990                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2991                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2992                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2993
2994                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2995                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2996                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2997                                 NUM_BANKS(ADDR_SURF_16_BANK));
2998
2999                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3000                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3001                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3002                                 NUM_BANKS(ADDR_SURF_16_BANK));
3003
3004                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3005                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3006                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3007                                 NUM_BANKS(ADDR_SURF_16_BANK));
3008
3009                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3010                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3011                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3012                                 NUM_BANKS(ADDR_SURF_16_BANK));
3013
3014                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3015                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3016                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3017                                 NUM_BANKS(ADDR_SURF_16_BANK));
3018
3019                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3020                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3021                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3022                                 NUM_BANKS(ADDR_SURF_16_BANK));
3023
3024                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3025                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3026                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3027                                 NUM_BANKS(ADDR_SURF_16_BANK));
3028
3029                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3030                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3031                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3032                                 NUM_BANKS(ADDR_SURF_16_BANK));
3033
3034                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3035                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3036                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3037                                 NUM_BANKS(ADDR_SURF_16_BANK));
3038
3039                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3040                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3041                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3042                                 NUM_BANKS(ADDR_SURF_16_BANK));
3043
3044                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3045                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3046                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3047                                 NUM_BANKS(ADDR_SURF_16_BANK));
3048
3049                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3050                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3051                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3052                                 NUM_BANKS(ADDR_SURF_8_BANK));
3053
3054                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3055                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3056                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3057                                 NUM_BANKS(ADDR_SURF_4_BANK));
3058
3059                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3060                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3061                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3062                                 NUM_BANKS(ADDR_SURF_4_BANK));
3063
3064                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3065                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3066
3067                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3068                         if (reg_offset != 7)
3069                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3070
3071                 break;
3072         case CHIP_STONEY:
3073                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3074                                 PIPE_CONFIG(ADDR_SURF_P2) |
3075                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3076                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3077                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3078                                 PIPE_CONFIG(ADDR_SURF_P2) |
3079                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3080                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3081                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3082                                 PIPE_CONFIG(ADDR_SURF_P2) |
3083                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3084                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3085                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3086                                 PIPE_CONFIG(ADDR_SURF_P2) |
3087                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3088                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3089                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3090                                 PIPE_CONFIG(ADDR_SURF_P2) |
3091                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3092                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3093                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3094                                 PIPE_CONFIG(ADDR_SURF_P2) |
3095                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3096                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3097                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3098                                 PIPE_CONFIG(ADDR_SURF_P2) |
3099                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3100                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3101                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3102                                 PIPE_CONFIG(ADDR_SURF_P2));
3103                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3104                                 PIPE_CONFIG(ADDR_SURF_P2) |
3105                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3106                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3107                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3108                                  PIPE_CONFIG(ADDR_SURF_P2) |
3109                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3110                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3111                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3112                                  PIPE_CONFIG(ADDR_SURF_P2) |
3113                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3114                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3115                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3116                                  PIPE_CONFIG(ADDR_SURF_P2) |
3117                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3118                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3119                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3120                                  PIPE_CONFIG(ADDR_SURF_P2) |
3121                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3122                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3123                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3124                                  PIPE_CONFIG(ADDR_SURF_P2) |
3125                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3126                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3127                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3128                                  PIPE_CONFIG(ADDR_SURF_P2) |
3129                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3130                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3131                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3132                                  PIPE_CONFIG(ADDR_SURF_P2) |
3133                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3134                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3135                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3136                                  PIPE_CONFIG(ADDR_SURF_P2) |
3137                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3138                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3139                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3140                                  PIPE_CONFIG(ADDR_SURF_P2) |
3141                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3142                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3143                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3144                                  PIPE_CONFIG(ADDR_SURF_P2) |
3145                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3146                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3147                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3148                                  PIPE_CONFIG(ADDR_SURF_P2) |
3149                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3150                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3151                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3152                                  PIPE_CONFIG(ADDR_SURF_P2) |
3153                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3154                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3155                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3156                                  PIPE_CONFIG(ADDR_SURF_P2) |
3157                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3158                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3159                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3160                                  PIPE_CONFIG(ADDR_SURF_P2) |
3161                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3162                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3163                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3164                                  PIPE_CONFIG(ADDR_SURF_P2) |
3165                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3166                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3167                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3168                                  PIPE_CONFIG(ADDR_SURF_P2) |
3169                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3170                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3171                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3172                                  PIPE_CONFIG(ADDR_SURF_P2) |
3173                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3174                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3175
3176                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3177                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3178                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3179                                 NUM_BANKS(ADDR_SURF_8_BANK));
3180                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3181                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3182                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3183                                 NUM_BANKS(ADDR_SURF_8_BANK));
3184                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3185                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3186                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3187                                 NUM_BANKS(ADDR_SURF_8_BANK));
3188                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3189                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3190                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3191                                 NUM_BANKS(ADDR_SURF_8_BANK));
3192                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3193                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3194                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3195                                 NUM_BANKS(ADDR_SURF_8_BANK));
3196                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3197                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3198                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3199                                 NUM_BANKS(ADDR_SURF_8_BANK));
3200                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3201                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3202                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3203                                 NUM_BANKS(ADDR_SURF_8_BANK));
3204                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3205                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3206                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3207                                 NUM_BANKS(ADDR_SURF_16_BANK));
3208                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3209                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3210                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3211                                 NUM_BANKS(ADDR_SURF_16_BANK));
3212                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3213                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3214                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3215                                  NUM_BANKS(ADDR_SURF_16_BANK));
3216                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3217                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3218                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3219                                  NUM_BANKS(ADDR_SURF_16_BANK));
3220                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3221                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3222                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3223                                  NUM_BANKS(ADDR_SURF_16_BANK));
3224                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3225                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3226                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3227                                  NUM_BANKS(ADDR_SURF_16_BANK));
3228                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3229                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3230                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3231                                  NUM_BANKS(ADDR_SURF_8_BANK));
3232
3233                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3234                         if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3235                             reg_offset != 23)
3236                                 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3237
3238                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3239                         if (reg_offset != 7)
3240                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3241
3242                 break;
3243         default:
3244                 dev_warn(adev->dev,
3245                          "Unknown chip type (%d) in function gfx_v8_0_tiling_mode_table_init() falling through to CHIP_CARRIZO\n",
3246                          adev->asic_type);
3247                 fallthrough;
3248
3249         case CHIP_CARRIZO:
3250                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3251                                 PIPE_CONFIG(ADDR_SURF_P2) |
3252                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3253                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3254                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3255                                 PIPE_CONFIG(ADDR_SURF_P2) |
3256                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3257                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3258                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3259                                 PIPE_CONFIG(ADDR_SURF_P2) |
3260                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3261                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3262                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3263                                 PIPE_CONFIG(ADDR_SURF_P2) |
3264                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3265                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3266                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3267                                 PIPE_CONFIG(ADDR_SURF_P2) |
3268                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3269                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3270                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3271                                 PIPE_CONFIG(ADDR_SURF_P2) |
3272                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3273                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3274                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3275                                 PIPE_CONFIG(ADDR_SURF_P2) |
3276                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3277                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3278                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3279                                 PIPE_CONFIG(ADDR_SURF_P2));
3280                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3281                                 PIPE_CONFIG(ADDR_SURF_P2) |
3282                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3283                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3284                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3285                                  PIPE_CONFIG(ADDR_SURF_P2) |
3286                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3287                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3288                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3289                                  PIPE_CONFIG(ADDR_SURF_P2) |
3290                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3291                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3292                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3293                                  PIPE_CONFIG(ADDR_SURF_P2) |
3294                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3295                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3296                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3297                                  PIPE_CONFIG(ADDR_SURF_P2) |
3298                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3299                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3300                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3301                                  PIPE_CONFIG(ADDR_SURF_P2) |
3302                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3303                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3304                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3305                                  PIPE_CONFIG(ADDR_SURF_P2) |
3306                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3307                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3308                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3309                                  PIPE_CONFIG(ADDR_SURF_P2) |
3310                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3311                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3312                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3313                                  PIPE_CONFIG(ADDR_SURF_P2) |
3314                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3315                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3316                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3317                                  PIPE_CONFIG(ADDR_SURF_P2) |
3318                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3319                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3320                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3321                                  PIPE_CONFIG(ADDR_SURF_P2) |
3322                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3323                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3324                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3325                                  PIPE_CONFIG(ADDR_SURF_P2) |
3326                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3327                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3328                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3329                                  PIPE_CONFIG(ADDR_SURF_P2) |
3330                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3331                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3332                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3333                                  PIPE_CONFIG(ADDR_SURF_P2) |
3334                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3335                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3336                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3337                                  PIPE_CONFIG(ADDR_SURF_P2) |
3338                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3339                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3340                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3341                                  PIPE_CONFIG(ADDR_SURF_P2) |
3342                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3343                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3344                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3345                                  PIPE_CONFIG(ADDR_SURF_P2) |
3346                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3347                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3348                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3349                                  PIPE_CONFIG(ADDR_SURF_P2) |
3350                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3351                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3352
3353                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3354                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3355                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3356                                 NUM_BANKS(ADDR_SURF_8_BANK));
3357                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3358                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3359                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3360                                 NUM_BANKS(ADDR_SURF_8_BANK));
3361                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3362                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3363                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3364                                 NUM_BANKS(ADDR_SURF_8_BANK));
3365                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3366                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3367                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3368                                 NUM_BANKS(ADDR_SURF_8_BANK));
3369                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3370                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3371                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3372                                 NUM_BANKS(ADDR_SURF_8_BANK));
3373                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3374                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3375                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3376                                 NUM_BANKS(ADDR_SURF_8_BANK));
3377                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3378                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3379                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3380                                 NUM_BANKS(ADDR_SURF_8_BANK));
3381                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3382                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3383                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3384                                 NUM_BANKS(ADDR_SURF_16_BANK));
3385                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3386                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3387                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3388                                 NUM_BANKS(ADDR_SURF_16_BANK));
3389                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3390                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3391                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3392                                  NUM_BANKS(ADDR_SURF_16_BANK));
3393                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3394                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3395                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3396                                  NUM_BANKS(ADDR_SURF_16_BANK));
3397                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3398                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3399                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3400                                  NUM_BANKS(ADDR_SURF_16_BANK));
3401                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3402                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3403                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3404                                  NUM_BANKS(ADDR_SURF_16_BANK));
3405                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3406                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3407                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3408                                  NUM_BANKS(ADDR_SURF_8_BANK));
3409
3410                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3411                         if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3412                             reg_offset != 23)
3413                                 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3414
3415                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3416                         if (reg_offset != 7)
3417                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3418
3419                 break;
3420         }
3421 }
3422
3423 static void gfx_v8_0_select_se_sh(struct amdgpu_device *adev,
3424                                   u32 se_num, u32 sh_num, u32 instance)
3425 {
3426         u32 data;
3427
3428         if (instance == 0xffffffff)
3429                 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
3430         else
3431                 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
3432
3433         if (se_num == 0xffffffff)
3434                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
3435         else
3436                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
3437
3438         if (sh_num == 0xffffffff)
3439                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
3440         else
3441                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
3442
3443         WREG32(mmGRBM_GFX_INDEX, data);
3444 }
3445
3446 static void gfx_v8_0_select_me_pipe_q(struct amdgpu_device *adev,
3447                                   u32 me, u32 pipe, u32 q, u32 vm)
3448 {
3449         vi_srbm_select(adev, me, pipe, q, vm);
3450 }
3451
3452 static u32 gfx_v8_0_get_rb_active_bitmap(struct amdgpu_device *adev)
3453 {
3454         u32 data, mask;
3455
3456         data =  RREG32(mmCC_RB_BACKEND_DISABLE) |
3457                 RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3458
3459         data = REG_GET_FIELD(data, GC_USER_RB_BACKEND_DISABLE, BACKEND_DISABLE);
3460
3461         mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se /
3462                                          adev->gfx.config.max_sh_per_se);
3463
3464         return (~data) & mask;
3465 }
3466
3467 static void
3468 gfx_v8_0_raster_config(struct amdgpu_device *adev, u32 *rconf, u32 *rconf1)
3469 {
3470         switch (adev->asic_type) {
3471         case CHIP_FIJI:
3472         case CHIP_VEGAM:
3473                 *rconf |= RB_MAP_PKR0(2) | RB_MAP_PKR1(2) |
3474                           RB_XSEL2(1) | PKR_MAP(2) |
3475                           PKR_XSEL(1) | PKR_YSEL(1) |
3476                           SE_MAP(2) | SE_XSEL(2) | SE_YSEL(3);
3477                 *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(3) |
3478                            SE_PAIR_YSEL(2);
3479                 break;
3480         case CHIP_TONGA:
3481         case CHIP_POLARIS10:
3482                 *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3483                           SE_XSEL(1) | SE_YSEL(1);
3484                 *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(2) |
3485                            SE_PAIR_YSEL(2);
3486                 break;
3487         case CHIP_TOPAZ:
3488         case CHIP_CARRIZO:
3489                 *rconf |= RB_MAP_PKR0(2);
3490                 *rconf1 |= 0x0;
3491                 break;
3492         case CHIP_POLARIS11:
3493         case CHIP_POLARIS12:
3494                 *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3495                           SE_XSEL(1) | SE_YSEL(1);
3496                 *rconf1 |= 0x0;
3497                 break;
3498         case CHIP_STONEY:
3499                 *rconf |= 0x0;
3500                 *rconf1 |= 0x0;
3501                 break;
3502         default:
3503                 DRM_ERROR("unknown asic: 0x%x\n", adev->asic_type);
3504                 break;
3505         }
3506 }
3507
3508 static void
3509 gfx_v8_0_write_harvested_raster_configs(struct amdgpu_device *adev,
3510                                         u32 raster_config, u32 raster_config_1,
3511                                         unsigned rb_mask, unsigned num_rb)
3512 {
3513         unsigned sh_per_se = max_t(unsigned, adev->gfx.config.max_sh_per_se, 1);
3514         unsigned num_se = max_t(unsigned, adev->gfx.config.max_shader_engines, 1);
3515         unsigned rb_per_pkr = min_t(unsigned, num_rb / num_se / sh_per_se, 2);
3516         unsigned rb_per_se = num_rb / num_se;
3517         unsigned se_mask[4];
3518         unsigned se;
3519
3520         se_mask[0] = ((1 << rb_per_se) - 1) & rb_mask;
3521         se_mask[1] = (se_mask[0] << rb_per_se) & rb_mask;
3522         se_mask[2] = (se_mask[1] << rb_per_se) & rb_mask;
3523         se_mask[3] = (se_mask[2] << rb_per_se) & rb_mask;
3524
3525         WARN_ON(!(num_se == 1 || num_se == 2 || num_se == 4));
3526         WARN_ON(!(sh_per_se == 1 || sh_per_se == 2));
3527         WARN_ON(!(rb_per_pkr == 1 || rb_per_pkr == 2));
3528
3529         if ((num_se > 2) && ((!se_mask[0] && !se_mask[1]) ||
3530                              (!se_mask[2] && !se_mask[3]))) {
3531                 raster_config_1 &= ~SE_PAIR_MAP_MASK;
3532
3533                 if (!se_mask[0] && !se_mask[1]) {
3534                         raster_config_1 |=
3535                                 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_3);
3536                 } else {
3537                         raster_config_1 |=
3538                                 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_0);
3539                 }
3540         }
3541
3542         for (se = 0; se < num_se; se++) {
3543                 unsigned raster_config_se = raster_config;
3544                 unsigned pkr0_mask = ((1 << rb_per_pkr) - 1) << (se * rb_per_se);
3545                 unsigned pkr1_mask = pkr0_mask << rb_per_pkr;
3546                 int idx = (se / 2) * 2;
3547
3548                 if ((num_se > 1) && (!se_mask[idx] || !se_mask[idx + 1])) {
3549                         raster_config_se &= ~SE_MAP_MASK;
3550
3551                         if (!se_mask[idx]) {
3552                                 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_3);
3553                         } else {
3554                                 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_0);
3555                         }
3556                 }
3557
3558                 pkr0_mask &= rb_mask;
3559                 pkr1_mask &= rb_mask;
3560                 if (rb_per_se > 2 && (!pkr0_mask || !pkr1_mask)) {
3561                         raster_config_se &= ~PKR_MAP_MASK;
3562
3563                         if (!pkr0_mask) {
3564                                 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_3);
3565                         } else {
3566                                 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_0);
3567                         }
3568                 }
3569
3570                 if (rb_per_se >= 2) {
3571                         unsigned rb0_mask = 1 << (se * rb_per_se);
3572                         unsigned rb1_mask = rb0_mask << 1;
3573
3574                         rb0_mask &= rb_mask;
3575                         rb1_mask &= rb_mask;
3576                         if (!rb0_mask || !rb1_mask) {
3577                                 raster_config_se &= ~RB_MAP_PKR0_MASK;
3578
3579                                 if (!rb0_mask) {
3580                                         raster_config_se |=
3581                                                 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_3);
3582                                 } else {
3583                                         raster_config_se |=
3584                                                 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_0);
3585                                 }
3586                         }
3587
3588                         if (rb_per_se > 2) {
3589                                 rb0_mask = 1 << (se * rb_per_se + rb_per_pkr);
3590                                 rb1_mask = rb0_mask << 1;
3591                                 rb0_mask &= rb_mask;
3592                                 rb1_mask &= rb_mask;
3593                                 if (!rb0_mask || !rb1_mask) {
3594                                         raster_config_se &= ~RB_MAP_PKR1_MASK;
3595
3596                                         if (!rb0_mask) {
3597                                                 raster_config_se |=
3598                                                         RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_3);
3599                                         } else {
3600                                                 raster_config_se |=
3601                                                         RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_0);
3602                                         }
3603                                 }
3604                         }
3605                 }
3606
3607                 /* GRBM_GFX_INDEX has a different offset on VI */
3608                 gfx_v8_0_select_se_sh(adev, se, 0xffffffff, 0xffffffff);
3609                 WREG32(mmPA_SC_RASTER_CONFIG, raster_config_se);
3610                 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3611         }
3612
3613         /* GRBM_GFX_INDEX has a different offset on VI */
3614         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3615 }
3616
3617 static void gfx_v8_0_setup_rb(struct amdgpu_device *adev)
3618 {
3619         int i, j;
3620         u32 data;
3621         u32 raster_config = 0, raster_config_1 = 0;
3622         u32 active_rbs = 0;
3623         u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
3624                                         adev->gfx.config.max_sh_per_se;
3625         unsigned num_rb_pipes;
3626
3627         mutex_lock(&adev->grbm_idx_mutex);
3628         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3629                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3630                         gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3631                         data = gfx_v8_0_get_rb_active_bitmap(adev);
3632                         active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
3633                                                rb_bitmap_width_per_sh);
3634                 }
3635         }
3636         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3637
3638         adev->gfx.config.backend_enable_mask = active_rbs;
3639         adev->gfx.config.num_rbs = hweight32(active_rbs);
3640
3641         num_rb_pipes = min_t(unsigned, adev->gfx.config.max_backends_per_se *
3642                              adev->gfx.config.max_shader_engines, 16);
3643
3644         gfx_v8_0_raster_config(adev, &raster_config, &raster_config_1);
3645
3646         if (!adev->gfx.config.backend_enable_mask ||
3647                         adev->gfx.config.num_rbs >= num_rb_pipes) {
3648                 WREG32(mmPA_SC_RASTER_CONFIG, raster_config);
3649                 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3650         } else {
3651                 gfx_v8_0_write_harvested_raster_configs(adev, raster_config, raster_config_1,
3652                                                         adev->gfx.config.backend_enable_mask,
3653                                                         num_rb_pipes);
3654         }
3655
3656         /* cache the values for userspace */
3657         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3658                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3659                         gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3660                         adev->gfx.config.rb_config[i][j].rb_backend_disable =
3661                                 RREG32(mmCC_RB_BACKEND_DISABLE);
3662                         adev->gfx.config.rb_config[i][j].user_rb_backend_disable =
3663                                 RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3664                         adev->gfx.config.rb_config[i][j].raster_config =
3665                                 RREG32(mmPA_SC_RASTER_CONFIG);
3666                         adev->gfx.config.rb_config[i][j].raster_config_1 =
3667                                 RREG32(mmPA_SC_RASTER_CONFIG_1);
3668                 }
3669         }
3670         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3671         mutex_unlock(&adev->grbm_idx_mutex);
3672 }
3673
3674 #define DEFAULT_SH_MEM_BASES    (0x6000)
3675 /**
3676  * gfx_v8_0_init_compute_vmid - gart enable
3677  *
3678  * @adev: amdgpu_device pointer
3679  *
3680  * Initialize compute vmid sh_mem registers
3681  *
3682  */
3683 static void gfx_v8_0_init_compute_vmid(struct amdgpu_device *adev)
3684 {
3685         int i;
3686         uint32_t sh_mem_config;
3687         uint32_t sh_mem_bases;
3688
3689         /*
3690          * Configure apertures:
3691          * LDS:         0x60000000'00000000 - 0x60000001'00000000 (4GB)
3692          * Scratch:     0x60000001'00000000 - 0x60000002'00000000 (4GB)
3693          * GPUVM:       0x60010000'00000000 - 0x60020000'00000000 (1TB)
3694          */
3695         sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
3696
3697         sh_mem_config = SH_MEM_ADDRESS_MODE_HSA64 <<
3698                         SH_MEM_CONFIG__ADDRESS_MODE__SHIFT |
3699                         SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
3700                         SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT |
3701                         MTYPE_CC << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT |
3702                         SH_MEM_CONFIG__PRIVATE_ATC_MASK;
3703
3704         mutex_lock(&adev->srbm_mutex);
3705         for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
3706                 vi_srbm_select(adev, 0, 0, 0, i);
3707                 /* CP and shaders */
3708                 WREG32(mmSH_MEM_CONFIG, sh_mem_config);
3709                 WREG32(mmSH_MEM_APE1_BASE, 1);
3710                 WREG32(mmSH_MEM_APE1_LIMIT, 0);
3711                 WREG32(mmSH_MEM_BASES, sh_mem_bases);
3712         }
3713         vi_srbm_select(adev, 0, 0, 0, 0);
3714         mutex_unlock(&adev->srbm_mutex);
3715
3716         /* Initialize all compute VMIDs to have no GDS, GWS, or OA
3717            access. These should be enabled by FW for target VMIDs. */
3718         for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
3719                 WREG32(amdgpu_gds_reg_offset[i].mem_base, 0);
3720                 WREG32(amdgpu_gds_reg_offset[i].mem_size, 0);
3721                 WREG32(amdgpu_gds_reg_offset[i].gws, 0);
3722                 WREG32(amdgpu_gds_reg_offset[i].oa, 0);
3723         }
3724 }
3725
3726 static void gfx_v8_0_init_gds_vmid(struct amdgpu_device *adev)
3727 {
3728         int vmid;
3729
3730         /*
3731          * Initialize all compute and user-gfx VMIDs to have no GDS, GWS, or OA
3732          * access. Compute VMIDs should be enabled by FW for target VMIDs,
3733          * the driver can enable them for graphics. VMID0 should maintain
3734          * access so that HWS firmware can save/restore entries.
3735          */
3736         for (vmid = 1; vmid < AMDGPU_NUM_VMID; vmid++) {
3737                 WREG32(amdgpu_gds_reg_offset[vmid].mem_base, 0);
3738                 WREG32(amdgpu_gds_reg_offset[vmid].mem_size, 0);
3739                 WREG32(amdgpu_gds_reg_offset[vmid].gws, 0);
3740                 WREG32(amdgpu_gds_reg_offset[vmid].oa, 0);
3741         }
3742 }
3743
3744 static void gfx_v8_0_config_init(struct amdgpu_device *adev)
3745 {
3746         switch (adev->asic_type) {
3747         default:
3748                 adev->gfx.config.double_offchip_lds_buf = 1;
3749                 break;
3750         case CHIP_CARRIZO:
3751         case CHIP_STONEY:
3752                 adev->gfx.config.double_offchip_lds_buf = 0;
3753                 break;
3754         }
3755 }
3756
3757 static void gfx_v8_0_constants_init(struct amdgpu_device *adev)
3758 {
3759         u32 tmp, sh_static_mem_cfg;
3760         int i;
3761
3762         WREG32_FIELD(GRBM_CNTL, READ_TIMEOUT, 0xFF);
3763         WREG32(mmGB_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3764         WREG32(mmHDP_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3765         WREG32(mmDMIF_ADDR_CALC, adev->gfx.config.gb_addr_config);
3766
3767         gfx_v8_0_tiling_mode_table_init(adev);
3768         gfx_v8_0_setup_rb(adev);
3769         gfx_v8_0_get_cu_info(adev);
3770         gfx_v8_0_config_init(adev);
3771
3772         /* XXX SH_MEM regs */
3773         /* where to put LDS, scratch, GPUVM in FSA64 space */
3774         sh_static_mem_cfg = REG_SET_FIELD(0, SH_STATIC_MEM_CONFIG,
3775                                    SWIZZLE_ENABLE, 1);
3776         sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG,
3777                                    ELEMENT_SIZE, 1);
3778         sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG,
3779                                    INDEX_STRIDE, 3);
3780         WREG32(mmSH_STATIC_MEM_CONFIG, sh_static_mem_cfg);
3781
3782         mutex_lock(&adev->srbm_mutex);
3783         for (i = 0; i < adev->vm_manager.id_mgr[0].num_ids; i++) {
3784                 vi_srbm_select(adev, 0, 0, 0, i);
3785                 /* CP and shaders */
3786                 if (i == 0) {
3787                         tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_UC);
3788                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
3789                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3790                                             SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3791                         WREG32(mmSH_MEM_CONFIG, tmp);
3792                         WREG32(mmSH_MEM_BASES, 0);
3793                 } else {
3794                         tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_NC);
3795                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
3796                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3797                                             SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3798                         WREG32(mmSH_MEM_CONFIG, tmp);
3799                         tmp = adev->gmc.shared_aperture_start >> 48;
3800                         WREG32(mmSH_MEM_BASES, tmp);
3801                 }
3802
3803                 WREG32(mmSH_MEM_APE1_BASE, 1);
3804                 WREG32(mmSH_MEM_APE1_LIMIT, 0);
3805         }
3806         vi_srbm_select(adev, 0, 0, 0, 0);
3807         mutex_unlock(&adev->srbm_mutex);
3808
3809         gfx_v8_0_init_compute_vmid(adev);
3810         gfx_v8_0_init_gds_vmid(adev);
3811
3812         mutex_lock(&adev->grbm_idx_mutex);
3813         /*
3814          * making sure that the following register writes will be broadcasted
3815          * to all the shaders
3816          */
3817         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3818
3819         WREG32(mmPA_SC_FIFO_SIZE,
3820                    (adev->gfx.config.sc_prim_fifo_size_frontend <<
3821                         PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) |
3822                    (adev->gfx.config.sc_prim_fifo_size_backend <<
3823                         PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT) |
3824                    (adev->gfx.config.sc_hiz_tile_fifo_size <<
3825                         PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) |
3826                    (adev->gfx.config.sc_earlyz_tile_fifo_size <<
3827                         PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT));
3828
3829         tmp = RREG32(mmSPI_ARB_PRIORITY);
3830         tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS0, 2);
3831         tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS1, 2);
3832         tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS2, 2);
3833         tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS3, 2);
3834         WREG32(mmSPI_ARB_PRIORITY, tmp);
3835
3836         mutex_unlock(&adev->grbm_idx_mutex);
3837
3838 }
3839
3840 static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
3841 {
3842         u32 i, j, k;
3843         u32 mask;
3844
3845         mutex_lock(&adev->grbm_idx_mutex);
3846         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3847                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3848                         gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3849                         for (k = 0; k < adev->usec_timeout; k++) {
3850                                 if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0)
3851                                         break;
3852                                 udelay(1);
3853                         }
3854                         if (k == adev->usec_timeout) {
3855                                 gfx_v8_0_select_se_sh(adev, 0xffffffff,
3856                                                       0xffffffff, 0xffffffff);
3857                                 mutex_unlock(&adev->grbm_idx_mutex);
3858                                 DRM_INFO("Timeout wait for RLC serdes %u,%u\n",
3859                                          i, j);
3860                                 return;
3861                         }
3862                 }
3863         }
3864         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3865         mutex_unlock(&adev->grbm_idx_mutex);
3866
3867         mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
3868                 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
3869                 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
3870                 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
3871         for (k = 0; k < adev->usec_timeout; k++) {
3872                 if ((RREG32(mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
3873                         break;
3874                 udelay(1);
3875         }
3876 }
3877
3878 static void gfx_v8_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
3879                                                bool enable)
3880 {
3881         u32 tmp = RREG32(mmCP_INT_CNTL_RING0);
3882
3883         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
3884         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
3885         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
3886         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
3887
3888         WREG32(mmCP_INT_CNTL_RING0, tmp);
3889 }
3890
3891 static void gfx_v8_0_init_csb(struct amdgpu_device *adev)
3892 {
3893         adev->gfx.rlc.funcs->get_csb_buffer(adev, adev->gfx.rlc.cs_ptr);
3894         /* csib */
3895         WREG32(mmRLC_CSIB_ADDR_HI,
3896                         adev->gfx.rlc.clear_state_gpu_addr >> 32);
3897         WREG32(mmRLC_CSIB_ADDR_LO,
3898                         adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
3899         WREG32(mmRLC_CSIB_LENGTH,
3900                         adev->gfx.rlc.clear_state_size);
3901 }
3902
3903 static void gfx_v8_0_parse_ind_reg_list(int *register_list_format,
3904                                 int ind_offset,
3905                                 int list_size,
3906                                 int *unique_indices,
3907                                 int *indices_count,
3908                                 int max_indices,
3909                                 int *ind_start_offsets,
3910                                 int *offset_count,
3911                                 int max_offset)
3912 {
3913         int indices;
3914         bool new_entry = true;
3915
3916         for (; ind_offset < list_size; ind_offset++) {
3917
3918                 if (new_entry) {
3919                         new_entry = false;
3920                         ind_start_offsets[*offset_count] = ind_offset;
3921                         *offset_count = *offset_count + 1;
3922                         BUG_ON(*offset_count >= max_offset);
3923                 }
3924
3925                 if (register_list_format[ind_offset] == 0xFFFFFFFF) {
3926                         new_entry = true;
3927                         continue;
3928                 }
3929
3930                 ind_offset += 2;
3931
3932                 /* look for the matching indice */
3933                 for (indices = 0;
3934                         indices < *indices_count;
3935                         indices++) {
3936                         if (unique_indices[indices] ==
3937                                 register_list_format[ind_offset])
3938                                 break;
3939                 }
3940
3941                 if (indices >= *indices_count) {
3942                         unique_indices[*indices_count] =
3943                                 register_list_format[ind_offset];
3944                         indices = *indices_count;
3945                         *indices_count = *indices_count + 1;
3946                         BUG_ON(*indices_count >= max_indices);
3947                 }
3948
3949                 register_list_format[ind_offset] = indices;
3950         }
3951 }
3952
3953 static int gfx_v8_0_init_save_restore_list(struct amdgpu_device *adev)
3954 {
3955         int i, temp, data;
3956         int unique_indices[] = {0, 0, 0, 0, 0, 0, 0, 0};
3957         int indices_count = 0;
3958         int indirect_start_offsets[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
3959         int offset_count = 0;
3960
3961         int list_size;
3962         unsigned int *register_list_format =
3963                 kmemdup(adev->gfx.rlc.register_list_format,
3964                         adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
3965         if (!register_list_format)
3966                 return -ENOMEM;
3967
3968         gfx_v8_0_parse_ind_reg_list(register_list_format,
3969                                 RLC_FormatDirectRegListLength,
3970                                 adev->gfx.rlc.reg_list_format_size_bytes >> 2,
3971                                 unique_indices,
3972                                 &indices_count,
3973                                 ARRAY_SIZE(unique_indices),
3974                                 indirect_start_offsets,
3975                                 &offset_count,
3976                                 ARRAY_SIZE(indirect_start_offsets));
3977
3978         /* save and restore list */
3979         WREG32_FIELD(RLC_SRM_CNTL, AUTO_INCR_ADDR, 1);
3980
3981         WREG32(mmRLC_SRM_ARAM_ADDR, 0);
3982         for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
3983                 WREG32(mmRLC_SRM_ARAM_DATA, adev->gfx.rlc.register_restore[i]);
3984
3985         /* indirect list */
3986         WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_list_format_start);
3987         for (i = 0; i < adev->gfx.rlc.reg_list_format_size_bytes >> 2; i++)
3988                 WREG32(mmRLC_GPM_SCRATCH_DATA, register_list_format[i]);
3989
3990         list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
3991         list_size = list_size >> 1;
3992         WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_restore_list_size);
3993         WREG32(mmRLC_GPM_SCRATCH_DATA, list_size);
3994
3995         /* starting offsets starts */
3996         WREG32(mmRLC_GPM_SCRATCH_ADDR,
3997                 adev->gfx.rlc.starting_offsets_start);
3998         for (i = 0; i < ARRAY_SIZE(indirect_start_offsets); i++)
3999                 WREG32(mmRLC_GPM_SCRATCH_DATA,
4000                                 indirect_start_offsets[i]);
4001
4002         /* unique indices */
4003         temp = mmRLC_SRM_INDEX_CNTL_ADDR_0;
4004         data = mmRLC_SRM_INDEX_CNTL_DATA_0;
4005         for (i = 0; i < ARRAY_SIZE(unique_indices); i++) {
4006                 if (unique_indices[i] != 0) {
4007                         WREG32(temp + i, unique_indices[i] & 0x3FFFF);
4008                         WREG32(data + i, unique_indices[i] >> 20);
4009                 }
4010         }
4011         kfree(register_list_format);
4012
4013         return 0;
4014 }
4015
4016 static void gfx_v8_0_enable_save_restore_machine(struct amdgpu_device *adev)
4017 {
4018         WREG32_FIELD(RLC_SRM_CNTL, SRM_ENABLE, 1);
4019 }
4020
4021 static void gfx_v8_0_init_power_gating(struct amdgpu_device *adev)
4022 {
4023         uint32_t data;
4024
4025         WREG32_FIELD(CP_RB_WPTR_POLL_CNTL, IDLE_POLL_COUNT, 0x60);
4026
4027         data = REG_SET_FIELD(0, RLC_PG_DELAY, POWER_UP_DELAY, 0x10);
4028         data = REG_SET_FIELD(data, RLC_PG_DELAY, POWER_DOWN_DELAY, 0x10);
4029         data = REG_SET_FIELD(data, RLC_PG_DELAY, CMD_PROPAGATE_DELAY, 0x10);
4030         data = REG_SET_FIELD(data, RLC_PG_DELAY, MEM_SLEEP_DELAY, 0x10);
4031         WREG32(mmRLC_PG_DELAY, data);
4032
4033         WREG32_FIELD(RLC_PG_DELAY_2, SERDES_CMD_DELAY, 0x3);
4034         WREG32_FIELD(RLC_AUTO_PG_CTRL, GRBM_REG_SAVE_GFX_IDLE_THRESHOLD, 0x55f0);
4035
4036 }
4037
4038 static void cz_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
4039                                                 bool enable)
4040 {
4041         WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PU_ENABLE, enable ? 1 : 0);
4042 }
4043
4044 static void cz_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
4045                                                   bool enable)
4046 {
4047         WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PD_ENABLE, enable ? 1 : 0);
4048 }
4049
4050 static void cz_enable_cp_power_gating(struct amdgpu_device *adev, bool enable)
4051 {
4052         WREG32_FIELD(RLC_PG_CNTL, CP_PG_DISABLE, enable ? 0 : 1);
4053 }
4054
4055 static void gfx_v8_0_init_pg(struct amdgpu_device *adev)
4056 {
4057         if ((adev->asic_type == CHIP_CARRIZO) ||
4058             (adev->asic_type == CHIP_STONEY)) {
4059                 gfx_v8_0_init_csb(adev);
4060                 gfx_v8_0_init_save_restore_list(adev);
4061                 gfx_v8_0_enable_save_restore_machine(adev);
4062                 WREG32(mmRLC_JUMP_TABLE_RESTORE, adev->gfx.rlc.cp_table_gpu_addr >> 8);
4063                 gfx_v8_0_init_power_gating(adev);
4064                 WREG32(mmRLC_PG_ALWAYS_ON_CU_MASK, adev->gfx.cu_info.ao_cu_mask);
4065         } else if ((adev->asic_type == CHIP_POLARIS11) ||
4066                    (adev->asic_type == CHIP_POLARIS12) ||
4067                    (adev->asic_type == CHIP_VEGAM)) {
4068                 gfx_v8_0_init_csb(adev);
4069                 gfx_v8_0_init_save_restore_list(adev);
4070                 gfx_v8_0_enable_save_restore_machine(adev);
4071                 gfx_v8_0_init_power_gating(adev);
4072         }
4073
4074 }
4075
4076 static void gfx_v8_0_rlc_stop(struct amdgpu_device *adev)
4077 {
4078         WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 0);
4079
4080         gfx_v8_0_enable_gui_idle_interrupt(adev, false);
4081         gfx_v8_0_wait_for_rlc_serdes(adev);
4082 }
4083
4084 static void gfx_v8_0_rlc_reset(struct amdgpu_device *adev)
4085 {
4086         WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
4087         udelay(50);
4088
4089         WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
4090         udelay(50);
4091 }
4092
4093 static void gfx_v8_0_rlc_start(struct amdgpu_device *adev)
4094 {
4095         WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 1);
4096
4097         /* carrizo do enable cp interrupt after cp inited */
4098         if (!(adev->flags & AMD_IS_APU))
4099                 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
4100
4101         udelay(50);
4102 }
4103
4104 static int gfx_v8_0_rlc_resume(struct amdgpu_device *adev)
4105 {
4106         if (amdgpu_sriov_vf(adev)) {
4107                 gfx_v8_0_init_csb(adev);
4108                 return 0;
4109         }
4110
4111         adev->gfx.rlc.funcs->stop(adev);
4112         adev->gfx.rlc.funcs->reset(adev);
4113         gfx_v8_0_init_pg(adev);
4114         adev->gfx.rlc.funcs->start(adev);
4115
4116         return 0;
4117 }
4118
4119 static void gfx_v8_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
4120 {
4121         u32 tmp = RREG32(mmCP_ME_CNTL);
4122
4123         if (enable) {
4124                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 0);
4125                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 0);
4126                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 0);
4127         } else {
4128                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 1);
4129                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 1);
4130                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 1);
4131         }
4132         WREG32(mmCP_ME_CNTL, tmp);
4133         udelay(50);
4134 }
4135
4136 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev)
4137 {
4138         u32 count = 0;
4139         const struct cs_section_def *sect = NULL;
4140         const struct cs_extent_def *ext = NULL;
4141
4142         /* begin clear state */
4143         count += 2;
4144         /* context control state */
4145         count += 3;
4146
4147         for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4148                 for (ext = sect->section; ext->extent != NULL; ++ext) {
4149                         if (sect->id == SECT_CONTEXT)
4150                                 count += 2 + ext->reg_count;
4151                         else
4152                                 return 0;
4153                 }
4154         }
4155         /* pa_sc_raster_config/pa_sc_raster_config1 */
4156         count += 4;
4157         /* end clear state */
4158         count += 2;
4159         /* clear state */
4160         count += 2;
4161
4162         return count;
4163 }
4164
4165 static int gfx_v8_0_cp_gfx_start(struct amdgpu_device *adev)
4166 {
4167         struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
4168         const struct cs_section_def *sect = NULL;
4169         const struct cs_extent_def *ext = NULL;
4170         int r, i;
4171
4172         /* init the CP */
4173         WREG32(mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
4174         WREG32(mmCP_ENDIAN_SWAP, 0);
4175         WREG32(mmCP_DEVICE_ID, 1);
4176
4177         gfx_v8_0_cp_gfx_enable(adev, true);
4178
4179         r = amdgpu_ring_alloc(ring, gfx_v8_0_get_csb_size(adev) + 4);
4180         if (r) {
4181                 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
4182                 return r;
4183         }
4184
4185         /* clear state buffer */
4186         amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4187         amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
4188
4189         amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4190         amdgpu_ring_write(ring, 0x80000000);
4191         amdgpu_ring_write(ring, 0x80000000);
4192
4193         for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4194                 for (ext = sect->section; ext->extent != NULL; ++ext) {
4195                         if (sect->id == SECT_CONTEXT) {
4196                                 amdgpu_ring_write(ring,
4197                                        PACKET3(PACKET3_SET_CONTEXT_REG,
4198                                                ext->reg_count));
4199                                 amdgpu_ring_write(ring,
4200                                        ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
4201                                 for (i = 0; i < ext->reg_count; i++)
4202                                         amdgpu_ring_write(ring, ext->extent[i]);
4203                         }
4204                 }
4205         }
4206
4207         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
4208         amdgpu_ring_write(ring, mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
4209         amdgpu_ring_write(ring, adev->gfx.config.rb_config[0][0].raster_config);
4210         amdgpu_ring_write(ring, adev->gfx.config.rb_config[0][0].raster_config_1);
4211
4212         amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4213         amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
4214
4215         amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
4216         amdgpu_ring_write(ring, 0);
4217
4218         /* init the CE partitions */
4219         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
4220         amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
4221         amdgpu_ring_write(ring, 0x8000);
4222         amdgpu_ring_write(ring, 0x8000);
4223
4224         amdgpu_ring_commit(ring);
4225
4226         return 0;
4227 }
4228 static void gfx_v8_0_set_cpg_door_bell(struct amdgpu_device *adev, struct amdgpu_ring *ring)
4229 {
4230         u32 tmp;
4231         /* no gfx doorbells on iceland */
4232         if (adev->asic_type == CHIP_TOPAZ)
4233                 return;
4234
4235         tmp = RREG32(mmCP_RB_DOORBELL_CONTROL);
4236
4237         if (ring->use_doorbell) {
4238                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4239                                 DOORBELL_OFFSET, ring->doorbell_index);
4240                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4241                                                 DOORBELL_HIT, 0);
4242                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4243                                             DOORBELL_EN, 1);
4244         } else {
4245                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0);
4246         }
4247
4248         WREG32(mmCP_RB_DOORBELL_CONTROL, tmp);
4249
4250         if (adev->flags & AMD_IS_APU)
4251                 return;
4252
4253         tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
4254                                         DOORBELL_RANGE_LOWER,
4255                                         adev->doorbell_index.gfx_ring0);
4256         WREG32(mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
4257
4258         WREG32(mmCP_RB_DOORBELL_RANGE_UPPER,
4259                 CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
4260 }
4261
4262 static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev)
4263 {
4264         struct amdgpu_ring *ring;
4265         u32 tmp;
4266         u32 rb_bufsz;
4267         u64 rb_addr, rptr_addr, wptr_gpu_addr;
4268
4269         /* Set the write pointer delay */
4270         WREG32(mmCP_RB_WPTR_DELAY, 0);
4271
4272         /* set the RB to use vmid 0 */
4273         WREG32(mmCP_RB_VMID, 0);
4274
4275         /* Set ring buffer size */
4276         ring = &adev->gfx.gfx_ring[0];
4277         rb_bufsz = order_base_2(ring->ring_size / 8);
4278         tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
4279         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
4280         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MTYPE, 3);
4281         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MIN_IB_AVAILSZ, 1);
4282 #ifdef __BIG_ENDIAN
4283         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
4284 #endif
4285         WREG32(mmCP_RB0_CNTL, tmp);
4286
4287         /* Initialize the ring buffer's read and write pointers */
4288         WREG32(mmCP_RB0_CNTL, tmp | CP_RB0_CNTL__RB_RPTR_WR_ENA_MASK);
4289         ring->wptr = 0;
4290         WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
4291
4292         /* set the wb address wether it's enabled or not */
4293         rptr_addr = ring->rptr_gpu_addr;
4294         WREG32(mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
4295         WREG32(mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 0xFF);
4296
4297         wptr_gpu_addr = ring->wptr_gpu_addr;
4298         WREG32(mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr));
4299         WREG32(mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr));
4300         mdelay(1);
4301         WREG32(mmCP_RB0_CNTL, tmp);
4302
4303         rb_addr = ring->gpu_addr >> 8;
4304         WREG32(mmCP_RB0_BASE, rb_addr);
4305         WREG32(mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
4306
4307         gfx_v8_0_set_cpg_door_bell(adev, ring);
4308         /* start the ring */
4309         amdgpu_ring_clear_ring(ring);
4310         gfx_v8_0_cp_gfx_start(adev);
4311         ring->sched.ready = true;
4312
4313         return 0;
4314 }
4315
4316 static void gfx_v8_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
4317 {
4318         if (enable) {
4319                 WREG32(mmCP_MEC_CNTL, 0);
4320         } else {
4321                 WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
4322                 adev->gfx.kiq.ring.sched.ready = false;
4323         }
4324         udelay(50);
4325 }
4326
4327 /* KIQ functions */
4328 static void gfx_v8_0_kiq_setting(struct amdgpu_ring *ring)
4329 {
4330         uint32_t tmp;
4331         struct amdgpu_device *adev = ring->adev;
4332
4333         /* tell RLC which is KIQ queue */
4334         tmp = RREG32(mmRLC_CP_SCHEDULERS);
4335         tmp &= 0xffffff00;
4336         tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
4337         WREG32(mmRLC_CP_SCHEDULERS, tmp);
4338         tmp |= 0x80;
4339         WREG32(mmRLC_CP_SCHEDULERS, tmp);
4340 }
4341
4342 static int gfx_v8_0_kiq_kcq_enable(struct amdgpu_device *adev)
4343 {
4344         struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
4345         uint64_t queue_mask = 0;
4346         int r, i;
4347
4348         for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) {
4349                 if (!test_bit(i, adev->gfx.mec.queue_bitmap))
4350                         continue;
4351
4352                 /* This situation may be hit in the future if a new HW
4353                  * generation exposes more than 64 queues. If so, the
4354                  * definition of queue_mask needs updating */
4355                 if (WARN_ON(i >= (sizeof(queue_mask)*8))) {
4356                         DRM_ERROR("Invalid KCQ enabled: %d\n", i);
4357                         break;
4358                 }
4359
4360                 queue_mask |= (1ull << i);
4361         }
4362
4363         r = amdgpu_ring_alloc(kiq_ring, (8 * adev->gfx.num_compute_rings) + 8);
4364         if (r) {
4365                 DRM_ERROR("Failed to lock KIQ (%d).\n", r);
4366                 return r;
4367         }
4368         /* set resources */
4369         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
4370         amdgpu_ring_write(kiq_ring, 0); /* vmid_mask:0 queue_type:0 (KIQ) */
4371         amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask)); /* queue mask lo */
4372         amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask)); /* queue mask hi */
4373         amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */
4374         amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */
4375         amdgpu_ring_write(kiq_ring, 0); /* oac mask */
4376         amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */
4377         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4378                 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4379                 uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
4380                 uint64_t wptr_addr = ring->wptr_gpu_addr;
4381
4382                 /* map queues */
4383                 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
4384                 /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
4385                 amdgpu_ring_write(kiq_ring,
4386                                   PACKET3_MAP_QUEUES_NUM_QUEUES(1));
4387                 amdgpu_ring_write(kiq_ring,
4388                                   PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index) |
4389                                   PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
4390                                   PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
4391                                   PACKET3_MAP_QUEUES_ME(ring->me == 1 ? 0 : 1)); /* doorbell */
4392                 amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
4393                 amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
4394                 amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
4395                 amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
4396         }
4397
4398         amdgpu_ring_commit(kiq_ring);
4399
4400         return 0;
4401 }
4402
4403 static int gfx_v8_0_deactivate_hqd(struct amdgpu_device *adev, u32 req)
4404 {
4405         int i, r = 0;
4406
4407         if (RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK) {
4408                 WREG32_FIELD(CP_HQD_DEQUEUE_REQUEST, DEQUEUE_REQ, req);
4409                 for (i = 0; i < adev->usec_timeout; i++) {
4410                         if (!(RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK))
4411                                 break;
4412                         udelay(1);
4413                 }
4414                 if (i == adev->usec_timeout)
4415                         r = -ETIMEDOUT;
4416         }
4417         WREG32(mmCP_HQD_DEQUEUE_REQUEST, 0);
4418         WREG32(mmCP_HQD_PQ_RPTR, 0);
4419         WREG32(mmCP_HQD_PQ_WPTR, 0);
4420
4421         return r;
4422 }
4423
4424 static void gfx_v8_0_mqd_set_priority(struct amdgpu_ring *ring, struct vi_mqd *mqd)
4425 {
4426         struct amdgpu_device *adev = ring->adev;
4427
4428         if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
4429                 if (amdgpu_gfx_is_high_priority_compute_queue(adev, ring)) {
4430                         mqd->cp_hqd_pipe_priority = AMDGPU_GFX_PIPE_PRIO_HIGH;
4431                         mqd->cp_hqd_queue_priority =
4432                                 AMDGPU_GFX_QUEUE_PRIORITY_MAXIMUM;
4433                 }
4434         }
4435 }
4436
4437 static int gfx_v8_0_mqd_init(struct amdgpu_ring *ring)
4438 {
4439         struct amdgpu_device *adev = ring->adev;
4440         struct vi_mqd *mqd = ring->mqd_ptr;
4441         uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
4442         uint32_t tmp;
4443
4444         mqd->header = 0xC0310800;
4445         mqd->compute_pipelinestat_enable = 0x00000001;
4446         mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
4447         mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
4448         mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
4449         mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
4450         mqd->compute_misc_reserved = 0x00000003;
4451         mqd->dynamic_cu_mask_addr_lo = lower_32_bits(ring->mqd_gpu_addr
4452                                                      + offsetof(struct vi_mqd_allocation, dynamic_cu_mask));
4453         mqd->dynamic_cu_mask_addr_hi = upper_32_bits(ring->mqd_gpu_addr
4454                                                      + offsetof(struct vi_mqd_allocation, dynamic_cu_mask));
4455         eop_base_addr = ring->eop_gpu_addr >> 8;
4456         mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
4457         mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
4458
4459         /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4460         tmp = RREG32(mmCP_HQD_EOP_CONTROL);
4461         tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
4462                         (order_base_2(GFX8_MEC_HPD_SIZE / 4) - 1));
4463
4464         mqd->cp_hqd_eop_control = tmp;
4465
4466         /* enable doorbell? */
4467         tmp = REG_SET_FIELD(RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL),
4468                             CP_HQD_PQ_DOORBELL_CONTROL,
4469                             DOORBELL_EN,
4470                             ring->use_doorbell ? 1 : 0);
4471
4472         mqd->cp_hqd_pq_doorbell_control = tmp;
4473
4474         /* set the pointer to the MQD */
4475         mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
4476         mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
4477
4478         /* set MQD vmid to 0 */
4479         tmp = RREG32(mmCP_MQD_CONTROL);
4480         tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
4481         mqd->cp_mqd_control = tmp;
4482
4483         /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4484         hqd_gpu_addr = ring->gpu_addr >> 8;
4485         mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
4486         mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4487
4488         /* set up the HQD, this is similar to CP_RB0_CNTL */
4489         tmp = RREG32(mmCP_HQD_PQ_CONTROL);
4490         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
4491                             (order_base_2(ring->ring_size / 4) - 1));
4492         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
4493                         (order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1));
4494 #ifdef __BIG_ENDIAN
4495         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
4496 #endif
4497         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
4498         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
4499         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
4500         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
4501         mqd->cp_hqd_pq_control = tmp;
4502
4503         /* set the wb address whether it's enabled or not */
4504         wb_gpu_addr = ring->rptr_gpu_addr;
4505         mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
4506         mqd->cp_hqd_pq_rptr_report_addr_hi =
4507                 upper_32_bits(wb_gpu_addr) & 0xffff;
4508
4509         /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
4510         wb_gpu_addr = ring->wptr_gpu_addr;
4511         mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
4512         mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4513
4514         tmp = 0;
4515         /* enable the doorbell if requested */
4516         if (ring->use_doorbell) {
4517                 tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
4518                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4519                                 DOORBELL_OFFSET, ring->doorbell_index);
4520
4521                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4522                                          DOORBELL_EN, 1);
4523                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4524                                          DOORBELL_SOURCE, 0);
4525                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4526                                          DOORBELL_HIT, 0);
4527         }
4528
4529         mqd->cp_hqd_pq_doorbell_control = tmp;
4530
4531         /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4532         ring->wptr = 0;
4533         mqd->cp_hqd_pq_wptr = ring->wptr;
4534         mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
4535
4536         /* set the vmid for the queue */
4537         mqd->cp_hqd_vmid = 0;
4538
4539         tmp = RREG32(mmCP_HQD_PERSISTENT_STATE);
4540         tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
4541         mqd->cp_hqd_persistent_state = tmp;
4542
4543         /* set MTYPE */
4544         tmp = RREG32(mmCP_HQD_IB_CONTROL);
4545         tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
4546         tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MTYPE, 3);
4547         mqd->cp_hqd_ib_control = tmp;
4548
4549         tmp = RREG32(mmCP_HQD_IQ_TIMER);
4550         tmp = REG_SET_FIELD(tmp, CP_HQD_IQ_TIMER, MTYPE, 3);
4551         mqd->cp_hqd_iq_timer = tmp;
4552
4553         tmp = RREG32(mmCP_HQD_CTX_SAVE_CONTROL);
4554         tmp = REG_SET_FIELD(tmp, CP_HQD_CTX_SAVE_CONTROL, MTYPE, 3);
4555         mqd->cp_hqd_ctx_save_control = tmp;
4556
4557         /* defaults */
4558         mqd->cp_hqd_eop_rptr = RREG32(mmCP_HQD_EOP_RPTR);
4559         mqd->cp_hqd_eop_wptr = RREG32(mmCP_HQD_EOP_WPTR);
4560         mqd->cp_hqd_ctx_save_base_addr_lo = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_LO);
4561         mqd->cp_hqd_ctx_save_base_addr_hi = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_HI);
4562         mqd->cp_hqd_cntl_stack_offset = RREG32(mmCP_HQD_CNTL_STACK_OFFSET);
4563         mqd->cp_hqd_cntl_stack_size = RREG32(mmCP_HQD_CNTL_STACK_SIZE);
4564         mqd->cp_hqd_wg_state_offset = RREG32(mmCP_HQD_WG_STATE_OFFSET);
4565         mqd->cp_hqd_ctx_save_size = RREG32(mmCP_HQD_CTX_SAVE_SIZE);
4566         mqd->cp_hqd_eop_done_events = RREG32(mmCP_HQD_EOP_EVENTS);
4567         mqd->cp_hqd_error = RREG32(mmCP_HQD_ERROR);
4568         mqd->cp_hqd_eop_wptr_mem = RREG32(mmCP_HQD_EOP_WPTR_MEM);
4569         mqd->cp_hqd_eop_dones = RREG32(mmCP_HQD_EOP_DONES);
4570
4571         /* set static priority for a queue/ring */
4572         gfx_v8_0_mqd_set_priority(ring, mqd);
4573         mqd->cp_hqd_quantum = RREG32(mmCP_HQD_QUANTUM);
4574
4575         /* map_queues packet doesn't need activate the queue,
4576          * so only kiq need set this field.
4577          */
4578         if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)
4579                 mqd->cp_hqd_active = 1;
4580
4581         return 0;
4582 }
4583
4584 static int gfx_v8_0_mqd_commit(struct amdgpu_device *adev,
4585                         struct vi_mqd *mqd)
4586 {
4587         uint32_t mqd_reg;
4588         uint32_t *mqd_data;
4589
4590         /* HQD registers extend from mmCP_MQD_BASE_ADDR to mmCP_HQD_ERROR */
4591         mqd_data = &mqd->cp_mqd_base_addr_lo;
4592
4593         /* disable wptr polling */
4594         WREG32_FIELD(CP_PQ_WPTR_POLL_CNTL, EN, 0);
4595
4596         /* program all HQD registers */
4597         for (mqd_reg = mmCP_HQD_VMID; mqd_reg <= mmCP_HQD_EOP_CONTROL; mqd_reg++)
4598                 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4599
4600         /* Tonga errata: EOP RPTR/WPTR should be left unmodified.
4601          * This is safe since EOP RPTR==WPTR for any inactive HQD
4602          * on ASICs that do not support context-save.
4603          * EOP writes/reads can start anywhere in the ring.
4604          */
4605         if (adev->asic_type != CHIP_TONGA) {
4606                 WREG32(mmCP_HQD_EOP_RPTR, mqd->cp_hqd_eop_rptr);
4607                 WREG32(mmCP_HQD_EOP_WPTR, mqd->cp_hqd_eop_wptr);
4608                 WREG32(mmCP_HQD_EOP_WPTR_MEM, mqd->cp_hqd_eop_wptr_mem);
4609         }
4610
4611         for (mqd_reg = mmCP_HQD_EOP_EVENTS; mqd_reg <= mmCP_HQD_ERROR; mqd_reg++)
4612                 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4613
4614         /* activate the HQD */
4615         for (mqd_reg = mmCP_MQD_BASE_ADDR; mqd_reg <= mmCP_HQD_ACTIVE; mqd_reg++)
4616                 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4617
4618         return 0;
4619 }
4620
4621 static int gfx_v8_0_kiq_init_queue(struct amdgpu_ring *ring)
4622 {
4623         struct amdgpu_device *adev = ring->adev;
4624         struct vi_mqd *mqd = ring->mqd_ptr;
4625         int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS;
4626
4627         gfx_v8_0_kiq_setting(ring);
4628
4629         if (amdgpu_in_reset(adev)) { /* for GPU_RESET case */
4630                 /* reset MQD to a clean status */
4631                 if (adev->gfx.mec.mqd_backup[mqd_idx])
4632                         memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation));
4633
4634                 /* reset ring buffer */
4635                 ring->wptr = 0;
4636                 amdgpu_ring_clear_ring(ring);
4637                 mutex_lock(&adev->srbm_mutex);
4638                 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4639                 gfx_v8_0_mqd_commit(adev, mqd);
4640                 vi_srbm_select(adev, 0, 0, 0, 0);
4641                 mutex_unlock(&adev->srbm_mutex);
4642         } else {
4643                 memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation));
4644                 ((struct vi_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
4645                 ((struct vi_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
4646                 mutex_lock(&adev->srbm_mutex);
4647                 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4648                 gfx_v8_0_mqd_init(ring);
4649                 gfx_v8_0_mqd_commit(adev, mqd);
4650                 vi_srbm_select(adev, 0, 0, 0, 0);
4651                 mutex_unlock(&adev->srbm_mutex);
4652
4653                 if (adev->gfx.mec.mqd_backup[mqd_idx])
4654                         memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation));
4655         }
4656
4657         return 0;
4658 }
4659
4660 static int gfx_v8_0_kcq_init_queue(struct amdgpu_ring *ring)
4661 {
4662         struct amdgpu_device *adev = ring->adev;
4663         struct vi_mqd *mqd = ring->mqd_ptr;
4664         int mqd_idx = ring - &adev->gfx.compute_ring[0];
4665
4666         if (!amdgpu_in_reset(adev) && !adev->in_suspend) {
4667                 memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation));
4668                 ((struct vi_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
4669                 ((struct vi_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
4670                 mutex_lock(&adev->srbm_mutex);
4671                 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4672                 gfx_v8_0_mqd_init(ring);
4673                 vi_srbm_select(adev, 0, 0, 0, 0);
4674                 mutex_unlock(&adev->srbm_mutex);
4675
4676                 if (adev->gfx.mec.mqd_backup[mqd_idx])
4677                         memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation));
4678         } else if (amdgpu_in_reset(adev)) { /* for GPU_RESET case */
4679                 /* reset MQD to a clean status */
4680                 if (adev->gfx.mec.mqd_backup[mqd_idx])
4681                         memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation));
4682                 /* reset ring buffer */
4683                 ring->wptr = 0;
4684                 amdgpu_ring_clear_ring(ring);
4685         } else {
4686                 amdgpu_ring_clear_ring(ring);
4687         }
4688         return 0;
4689 }
4690
4691 static void gfx_v8_0_set_mec_doorbell_range(struct amdgpu_device *adev)
4692 {
4693         if (adev->asic_type > CHIP_TONGA) {
4694                 WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER, adev->doorbell_index.kiq << 2);
4695                 WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER, adev->doorbell_index.mec_ring7 << 2);
4696         }
4697         /* enable doorbells */
4698         WREG32_FIELD(CP_PQ_STATUS, DOORBELL_ENABLE, 1);
4699 }
4700
4701 static int gfx_v8_0_kiq_resume(struct amdgpu_device *adev)
4702 {
4703         struct amdgpu_ring *ring;
4704         int r;
4705
4706         ring = &adev->gfx.kiq.ring;
4707
4708         r = amdgpu_bo_reserve(ring->mqd_obj, false);
4709         if (unlikely(r != 0))
4710                 return r;
4711
4712         r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr);
4713         if (unlikely(r != 0))
4714                 return r;
4715
4716         gfx_v8_0_kiq_init_queue(ring);
4717         amdgpu_bo_kunmap(ring->mqd_obj);
4718         ring->mqd_ptr = NULL;
4719         amdgpu_bo_unreserve(ring->mqd_obj);
4720         ring->sched.ready = true;
4721         return 0;
4722 }
4723
4724 static int gfx_v8_0_kcq_resume(struct amdgpu_device *adev)
4725 {
4726         struct amdgpu_ring *ring = NULL;
4727         int r = 0, i;
4728
4729         gfx_v8_0_cp_compute_enable(adev, true);
4730
4731         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4732                 ring = &adev->gfx.compute_ring[i];
4733
4734                 r = amdgpu_bo_reserve(ring->mqd_obj, false);
4735                 if (unlikely(r != 0))
4736                         goto done;
4737                 r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr);
4738                 if (!r) {
4739                         r = gfx_v8_0_kcq_init_queue(ring);
4740                         amdgpu_bo_kunmap(ring->mqd_obj);
4741                         ring->mqd_ptr = NULL;
4742                 }
4743                 amdgpu_bo_unreserve(ring->mqd_obj);
4744                 if (r)
4745                         goto done;
4746         }
4747
4748         gfx_v8_0_set_mec_doorbell_range(adev);
4749
4750         r = gfx_v8_0_kiq_kcq_enable(adev);
4751         if (r)
4752                 goto done;
4753
4754 done:
4755         return r;
4756 }
4757
4758 static int gfx_v8_0_cp_test_all_rings(struct amdgpu_device *adev)
4759 {
4760         int r, i;
4761         struct amdgpu_ring *ring;
4762
4763         /* collect all the ring_tests here, gfx, kiq, compute */
4764         ring = &adev->gfx.gfx_ring[0];
4765         r = amdgpu_ring_test_helper(ring);
4766         if (r)
4767                 return r;
4768
4769         ring = &adev->gfx.kiq.ring;
4770         r = amdgpu_ring_test_helper(ring);
4771         if (r)
4772                 return r;
4773
4774         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4775                 ring = &adev->gfx.compute_ring[i];
4776                 amdgpu_ring_test_helper(ring);
4777         }
4778
4779         return 0;
4780 }
4781
4782 static int gfx_v8_0_cp_resume(struct amdgpu_device *adev)
4783 {
4784         int r;
4785
4786         if (!(adev->flags & AMD_IS_APU))
4787                 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
4788
4789         r = gfx_v8_0_kiq_resume(adev);
4790         if (r)
4791                 return r;
4792
4793         r = gfx_v8_0_cp_gfx_resume(adev);
4794         if (r)
4795                 return r;
4796
4797         r = gfx_v8_0_kcq_resume(adev);
4798         if (r)
4799                 return r;
4800
4801         r = gfx_v8_0_cp_test_all_rings(adev);
4802         if (r)
4803                 return r;
4804
4805         gfx_v8_0_enable_gui_idle_interrupt(adev, true);
4806
4807         return 0;
4808 }
4809
4810 static void gfx_v8_0_cp_enable(struct amdgpu_device *adev, bool enable)
4811 {
4812         gfx_v8_0_cp_gfx_enable(adev, enable);
4813         gfx_v8_0_cp_compute_enable(adev, enable);
4814 }
4815
4816 static int gfx_v8_0_hw_init(void *handle)
4817 {
4818         int r;
4819         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4820
4821         gfx_v8_0_init_golden_registers(adev);
4822         gfx_v8_0_constants_init(adev);
4823
4824         r = adev->gfx.rlc.funcs->resume(adev);
4825         if (r)
4826                 return r;
4827
4828         r = gfx_v8_0_cp_resume(adev);
4829
4830         return r;
4831 }
4832
4833 static int gfx_v8_0_kcq_disable(struct amdgpu_device *adev)
4834 {
4835         int r, i;
4836         struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
4837
4838         r = amdgpu_ring_alloc(kiq_ring, 6 * adev->gfx.num_compute_rings);
4839         if (r)
4840                 DRM_ERROR("Failed to lock KIQ (%d).\n", r);
4841
4842         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4843                 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4844
4845                 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
4846                 amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
4847                                                 PACKET3_UNMAP_QUEUES_ACTION(1) | /* RESET_QUEUES */
4848                                                 PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) |
4849                                                 PACKET3_UNMAP_QUEUES_ENGINE_SEL(0) |
4850                                                 PACKET3_UNMAP_QUEUES_NUM_QUEUES(1));
4851                 amdgpu_ring_write(kiq_ring, PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
4852                 amdgpu_ring_write(kiq_ring, 0);
4853                 amdgpu_ring_write(kiq_ring, 0);
4854                 amdgpu_ring_write(kiq_ring, 0);
4855         }
4856         r = amdgpu_ring_test_helper(kiq_ring);
4857         if (r)
4858                 DRM_ERROR("KCQ disable failed\n");
4859
4860         return r;
4861 }
4862
4863 static bool gfx_v8_0_is_idle(void *handle)
4864 {
4865         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4866
4867         if (REG_GET_FIELD(RREG32(mmGRBM_STATUS), GRBM_STATUS, GUI_ACTIVE)
4868                 || RREG32(mmGRBM_STATUS2) != 0x8)
4869                 return false;
4870         else
4871                 return true;
4872 }
4873
4874 static bool gfx_v8_0_rlc_is_idle(void *handle)
4875 {
4876         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4877
4878         if (RREG32(mmGRBM_STATUS2) != 0x8)
4879                 return false;
4880         else
4881                 return true;
4882 }
4883
4884 static int gfx_v8_0_wait_for_rlc_idle(void *handle)
4885 {
4886         unsigned int i;
4887         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4888
4889         for (i = 0; i < adev->usec_timeout; i++) {
4890                 if (gfx_v8_0_rlc_is_idle(handle))
4891                         return 0;
4892
4893                 udelay(1);
4894         }
4895         return -ETIMEDOUT;
4896 }
4897
4898 static int gfx_v8_0_wait_for_idle(void *handle)
4899 {
4900         unsigned int i;
4901         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4902
4903         for (i = 0; i < adev->usec_timeout; i++) {
4904                 if (gfx_v8_0_is_idle(handle))
4905                         return 0;
4906
4907                 udelay(1);
4908         }
4909         return -ETIMEDOUT;
4910 }
4911
4912 static int gfx_v8_0_hw_fini(void *handle)
4913 {
4914         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4915
4916         amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
4917         amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
4918
4919         amdgpu_irq_put(adev, &adev->gfx.cp_ecc_error_irq, 0);
4920
4921         amdgpu_irq_put(adev, &adev->gfx.sq_irq, 0);
4922
4923         /* disable KCQ to avoid CPC touch memory not valid anymore */
4924         gfx_v8_0_kcq_disable(adev);
4925
4926         if (amdgpu_sriov_vf(adev)) {
4927                 pr_debug("For SRIOV client, shouldn't do anything.\n");
4928                 return 0;
4929         }
4930         amdgpu_gfx_rlc_enter_safe_mode(adev);
4931         if (!gfx_v8_0_wait_for_idle(adev))
4932                 gfx_v8_0_cp_enable(adev, false);
4933         else
4934                 pr_err("cp is busy, skip halt cp\n");
4935         if (!gfx_v8_0_wait_for_rlc_idle(adev))
4936                 adev->gfx.rlc.funcs->stop(adev);
4937         else
4938                 pr_err("rlc is busy, skip halt rlc\n");
4939         amdgpu_gfx_rlc_exit_safe_mode(adev);
4940
4941         return 0;
4942 }
4943
4944 static int gfx_v8_0_suspend(void *handle)
4945 {
4946         return gfx_v8_0_hw_fini(handle);
4947 }
4948
4949 static int gfx_v8_0_resume(void *handle)
4950 {
4951         return gfx_v8_0_hw_init(handle);
4952 }
4953
4954 static bool gfx_v8_0_check_soft_reset(void *handle)
4955 {
4956         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4957         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
4958         u32 tmp;
4959
4960         /* GRBM_STATUS */
4961         tmp = RREG32(mmGRBM_STATUS);
4962         if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
4963                    GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
4964                    GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
4965                    GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
4966                    GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
4967                    GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK |
4968                    GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
4969                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4970                                                 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
4971                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4972                                                 GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
4973                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
4974                                                 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
4975         }
4976
4977         /* GRBM_STATUS2 */
4978         tmp = RREG32(mmGRBM_STATUS2);
4979         if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
4980                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4981                                                 GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
4982
4983         if (REG_GET_FIELD(tmp, GRBM_STATUS2, CPF_BUSY) ||
4984             REG_GET_FIELD(tmp, GRBM_STATUS2, CPC_BUSY) ||
4985             REG_GET_FIELD(tmp, GRBM_STATUS2, CPG_BUSY)) {
4986                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
4987                                                 SOFT_RESET_CPF, 1);
4988                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
4989                                                 SOFT_RESET_CPC, 1);
4990                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
4991                                                 SOFT_RESET_CPG, 1);
4992                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET,
4993                                                 SOFT_RESET_GRBM, 1);
4994         }
4995
4996         /* SRBM_STATUS */
4997         tmp = RREG32(mmSRBM_STATUS);
4998         if (REG_GET_FIELD(tmp, SRBM_STATUS, GRBM_RQ_PENDING))
4999                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5000                                                 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
5001         if (REG_GET_FIELD(tmp, SRBM_STATUS, SEM_BUSY))
5002                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5003                                                 SRBM_SOFT_RESET, SOFT_RESET_SEM, 1);
5004
5005         if (grbm_soft_reset || srbm_soft_reset) {
5006                 adev->gfx.grbm_soft_reset = grbm_soft_reset;
5007                 adev->gfx.srbm_soft_reset = srbm_soft_reset;
5008                 return true;
5009         } else {
5010                 adev->gfx.grbm_soft_reset = 0;
5011                 adev->gfx.srbm_soft_reset = 0;
5012                 return false;
5013         }
5014 }
5015
5016 static int gfx_v8_0_pre_soft_reset(void *handle)
5017 {
5018         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5019         u32 grbm_soft_reset = 0;
5020
5021         if ((!adev->gfx.grbm_soft_reset) &&
5022             (!adev->gfx.srbm_soft_reset))
5023                 return 0;
5024
5025         grbm_soft_reset = adev->gfx.grbm_soft_reset;
5026
5027         /* stop the rlc */
5028         adev->gfx.rlc.funcs->stop(adev);
5029
5030         if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5031             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
5032                 /* Disable GFX parsing/prefetching */
5033                 gfx_v8_0_cp_gfx_enable(adev, false);
5034
5035         if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5036             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
5037             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
5038             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
5039                 int i;
5040
5041                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5042                         struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5043
5044                         mutex_lock(&adev->srbm_mutex);
5045                         vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5046                         gfx_v8_0_deactivate_hqd(adev, 2);
5047                         vi_srbm_select(adev, 0, 0, 0, 0);
5048                         mutex_unlock(&adev->srbm_mutex);
5049                 }
5050                 /* Disable MEC parsing/prefetching */
5051                 gfx_v8_0_cp_compute_enable(adev, false);
5052         }
5053
5054         return 0;
5055 }
5056
5057 static int gfx_v8_0_soft_reset(void *handle)
5058 {
5059         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5060         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5061         u32 tmp;
5062
5063         if ((!adev->gfx.grbm_soft_reset) &&
5064             (!adev->gfx.srbm_soft_reset))
5065                 return 0;
5066
5067         grbm_soft_reset = adev->gfx.grbm_soft_reset;
5068         srbm_soft_reset = adev->gfx.srbm_soft_reset;
5069
5070         if (grbm_soft_reset || srbm_soft_reset) {
5071                 tmp = RREG32(mmGMCON_DEBUG);
5072                 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 1);
5073                 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 1);
5074                 WREG32(mmGMCON_DEBUG, tmp);
5075                 udelay(50);
5076         }
5077
5078         if (grbm_soft_reset) {
5079                 tmp = RREG32(mmGRBM_SOFT_RESET);
5080                 tmp |= grbm_soft_reset;
5081                 dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
5082                 WREG32(mmGRBM_SOFT_RESET, tmp);
5083                 tmp = RREG32(mmGRBM_SOFT_RESET);
5084
5085                 udelay(50);
5086
5087                 tmp &= ~grbm_soft_reset;
5088                 WREG32(mmGRBM_SOFT_RESET, tmp);
5089                 tmp = RREG32(mmGRBM_SOFT_RESET);
5090         }
5091
5092         if (srbm_soft_reset) {
5093                 tmp = RREG32(mmSRBM_SOFT_RESET);
5094                 tmp |= srbm_soft_reset;
5095                 dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
5096                 WREG32(mmSRBM_SOFT_RESET, tmp);
5097                 tmp = RREG32(mmSRBM_SOFT_RESET);
5098
5099                 udelay(50);
5100
5101                 tmp &= ~srbm_soft_reset;
5102                 WREG32(mmSRBM_SOFT_RESET, tmp);
5103                 tmp = RREG32(mmSRBM_SOFT_RESET);
5104         }
5105
5106         if (grbm_soft_reset || srbm_soft_reset) {
5107                 tmp = RREG32(mmGMCON_DEBUG);
5108                 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 0);
5109                 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 0);
5110                 WREG32(mmGMCON_DEBUG, tmp);
5111         }
5112
5113         /* Wait a little for things to settle down */
5114         udelay(50);
5115
5116         return 0;
5117 }
5118
5119 static int gfx_v8_0_post_soft_reset(void *handle)
5120 {
5121         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5122         u32 grbm_soft_reset = 0;
5123
5124         if ((!adev->gfx.grbm_soft_reset) &&
5125             (!adev->gfx.srbm_soft_reset))
5126                 return 0;
5127
5128         grbm_soft_reset = adev->gfx.grbm_soft_reset;
5129
5130         if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5131             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
5132             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
5133             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
5134                 int i;
5135
5136                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5137                         struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5138
5139                         mutex_lock(&adev->srbm_mutex);
5140                         vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5141                         gfx_v8_0_deactivate_hqd(adev, 2);
5142                         vi_srbm_select(adev, 0, 0, 0, 0);
5143                         mutex_unlock(&adev->srbm_mutex);
5144                 }
5145                 gfx_v8_0_kiq_resume(adev);
5146                 gfx_v8_0_kcq_resume(adev);
5147         }
5148
5149         if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5150             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
5151                 gfx_v8_0_cp_gfx_resume(adev);
5152
5153         gfx_v8_0_cp_test_all_rings(adev);
5154
5155         adev->gfx.rlc.funcs->start(adev);
5156
5157         return 0;
5158 }
5159
5160 /**
5161  * gfx_v8_0_get_gpu_clock_counter - return GPU clock counter snapshot
5162  *
5163  * @adev: amdgpu_device pointer
5164  *
5165  * Fetches a GPU clock counter snapshot.
5166  * Returns the 64 bit clock counter snapshot.
5167  */
5168 static uint64_t gfx_v8_0_get_gpu_clock_counter(struct amdgpu_device *adev)
5169 {
5170         uint64_t clock;
5171
5172         mutex_lock(&adev->gfx.gpu_clock_mutex);
5173         WREG32(mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
5174         clock = (uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_LSB) |
5175                 ((uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
5176         mutex_unlock(&adev->gfx.gpu_clock_mutex);
5177         return clock;
5178 }
5179
5180 static void gfx_v8_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
5181                                           uint32_t vmid,
5182                                           uint32_t gds_base, uint32_t gds_size,
5183                                           uint32_t gws_base, uint32_t gws_size,
5184                                           uint32_t oa_base, uint32_t oa_size)
5185 {
5186         /* GDS Base */
5187         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5188         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5189                                 WRITE_DATA_DST_SEL(0)));
5190         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_base);
5191         amdgpu_ring_write(ring, 0);
5192         amdgpu_ring_write(ring, gds_base);
5193
5194         /* GDS Size */
5195         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5196         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5197                                 WRITE_DATA_DST_SEL(0)));
5198         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_size);
5199         amdgpu_ring_write(ring, 0);
5200         amdgpu_ring_write(ring, gds_size);
5201
5202         /* GWS */
5203         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5204         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5205                                 WRITE_DATA_DST_SEL(0)));
5206         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].gws);
5207         amdgpu_ring_write(ring, 0);
5208         amdgpu_ring_write(ring, gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
5209
5210         /* OA */
5211         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5212         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5213                                 WRITE_DATA_DST_SEL(0)));
5214         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].oa);
5215         amdgpu_ring_write(ring, 0);
5216         amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << oa_base));
5217 }
5218
5219 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
5220 {
5221         WREG32(mmSQ_IND_INDEX,
5222                 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
5223                 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
5224                 (address << SQ_IND_INDEX__INDEX__SHIFT) |
5225                 (SQ_IND_INDEX__FORCE_READ_MASK));
5226         return RREG32(mmSQ_IND_DATA);
5227 }
5228
5229 static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
5230                            uint32_t wave, uint32_t thread,
5231                            uint32_t regno, uint32_t num, uint32_t *out)
5232 {
5233         WREG32(mmSQ_IND_INDEX,
5234                 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
5235                 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
5236                 (regno << SQ_IND_INDEX__INDEX__SHIFT) |
5237                 (thread << SQ_IND_INDEX__THREAD_ID__SHIFT) |
5238                 (SQ_IND_INDEX__FORCE_READ_MASK) |
5239                 (SQ_IND_INDEX__AUTO_INCR_MASK));
5240         while (num--)
5241                 *(out++) = RREG32(mmSQ_IND_DATA);
5242 }
5243
5244 static void gfx_v8_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
5245 {
5246         /* type 0 wave data */
5247         dst[(*no_fields)++] = 0;
5248         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS);
5249         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO);
5250         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI);
5251         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO);
5252         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI);
5253         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID);
5254         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0);
5255         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1);
5256         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC);
5257         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC);
5258         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS);
5259         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS);
5260         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_LO);
5261         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_HI);
5262         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_LO);
5263         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_HI);
5264         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0);
5265         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0);
5266         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_MODE);
5267 }
5268
5269 static void gfx_v8_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
5270                                      uint32_t wave, uint32_t start,
5271                                      uint32_t size, uint32_t *dst)
5272 {
5273         wave_read_regs(
5274                 adev, simd, wave, 0,
5275                 start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
5276 }
5277
5278
5279 static const struct amdgpu_gfx_funcs gfx_v8_0_gfx_funcs = {
5280         .get_gpu_clock_counter = &gfx_v8_0_get_gpu_clock_counter,
5281         .select_se_sh = &gfx_v8_0_select_se_sh,
5282         .read_wave_data = &gfx_v8_0_read_wave_data,
5283         .read_wave_sgprs = &gfx_v8_0_read_wave_sgprs,
5284         .select_me_pipe_q = &gfx_v8_0_select_me_pipe_q
5285 };
5286
5287 static int gfx_v8_0_early_init(void *handle)
5288 {
5289         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5290
5291         adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS;
5292         adev->gfx.num_compute_rings = min(amdgpu_gfx_get_num_kcq(adev),
5293                                           AMDGPU_MAX_COMPUTE_RINGS);
5294         adev->gfx.funcs = &gfx_v8_0_gfx_funcs;
5295         gfx_v8_0_set_ring_funcs(adev);
5296         gfx_v8_0_set_irq_funcs(adev);
5297         gfx_v8_0_set_gds_init(adev);
5298         gfx_v8_0_set_rlc_funcs(adev);
5299
5300         return 0;
5301 }
5302
5303 static int gfx_v8_0_late_init(void *handle)
5304 {
5305         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5306         int r;
5307
5308         r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
5309         if (r)
5310                 return r;
5311
5312         r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
5313         if (r)
5314                 return r;
5315
5316         /* requires IBs so do in late init after IB pool is initialized */
5317         r = gfx_v8_0_do_edc_gpr_workarounds(adev);
5318         if (r)
5319                 return r;
5320
5321         r = amdgpu_irq_get(adev, &adev->gfx.cp_ecc_error_irq, 0);
5322         if (r) {
5323                 DRM_ERROR("amdgpu_irq_get() failed to get IRQ for EDC, r: %d.\n", r);
5324                 return r;
5325         }
5326
5327         r = amdgpu_irq_get(adev, &adev->gfx.sq_irq, 0);
5328         if (r) {
5329                 DRM_ERROR(
5330                         "amdgpu_irq_get() failed to get IRQ for SQ, r: %d.\n",
5331                         r);
5332                 return r;
5333         }
5334
5335         return 0;
5336 }
5337
5338 static void gfx_v8_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
5339                                                        bool enable)
5340 {
5341         if ((adev->asic_type == CHIP_POLARIS11) ||
5342             (adev->asic_type == CHIP_POLARIS12) ||
5343             (adev->asic_type == CHIP_VEGAM))
5344                 /* Send msg to SMU via Powerplay */
5345                 amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, enable);
5346
5347         WREG32_FIELD(RLC_PG_CNTL, STATIC_PER_CU_PG_ENABLE, enable ? 1 : 0);
5348 }
5349
5350 static void gfx_v8_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
5351                                                         bool enable)
5352 {
5353         WREG32_FIELD(RLC_PG_CNTL, DYN_PER_CU_PG_ENABLE, enable ? 1 : 0);
5354 }
5355
5356 static void polaris11_enable_gfx_quick_mg_power_gating(struct amdgpu_device *adev,
5357                 bool enable)
5358 {
5359         WREG32_FIELD(RLC_PG_CNTL, QUICK_PG_ENABLE, enable ? 1 : 0);
5360 }
5361
5362 static void cz_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
5363                                           bool enable)
5364 {
5365         WREG32_FIELD(RLC_PG_CNTL, GFX_POWER_GATING_ENABLE, enable ? 1 : 0);
5366 }
5367
5368 static void cz_enable_gfx_pipeline_power_gating(struct amdgpu_device *adev,
5369                                                 bool enable)
5370 {
5371         WREG32_FIELD(RLC_PG_CNTL, GFX_PIPELINE_PG_ENABLE, enable ? 1 : 0);
5372
5373         /* Read any GFX register to wake up GFX. */
5374         if (!enable)
5375                 RREG32(mmDB_RENDER_CONTROL);
5376 }
5377
5378 static void cz_update_gfx_cg_power_gating(struct amdgpu_device *adev,
5379                                           bool enable)
5380 {
5381         if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
5382                 cz_enable_gfx_cg_power_gating(adev, true);
5383                 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
5384                         cz_enable_gfx_pipeline_power_gating(adev, true);
5385         } else {
5386                 cz_enable_gfx_cg_power_gating(adev, false);
5387                 cz_enable_gfx_pipeline_power_gating(adev, false);
5388         }
5389 }
5390
5391 static int gfx_v8_0_set_powergating_state(void *handle,
5392                                           enum amd_powergating_state state)
5393 {
5394         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5395         bool enable = (state == AMD_PG_STATE_GATE);
5396
5397         if (amdgpu_sriov_vf(adev))
5398                 return 0;
5399
5400         if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_SMG |
5401                                 AMD_PG_SUPPORT_RLC_SMU_HS |
5402                                 AMD_PG_SUPPORT_CP |
5403                                 AMD_PG_SUPPORT_GFX_DMG))
5404                 amdgpu_gfx_rlc_enter_safe_mode(adev);
5405         switch (adev->asic_type) {
5406         case CHIP_CARRIZO:
5407         case CHIP_STONEY:
5408
5409                 if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
5410                         cz_enable_sck_slow_down_on_power_up(adev, true);
5411                         cz_enable_sck_slow_down_on_power_down(adev, true);
5412                 } else {
5413                         cz_enable_sck_slow_down_on_power_up(adev, false);
5414                         cz_enable_sck_slow_down_on_power_down(adev, false);
5415                 }
5416                 if (adev->pg_flags & AMD_PG_SUPPORT_CP)
5417                         cz_enable_cp_power_gating(adev, true);
5418                 else
5419                         cz_enable_cp_power_gating(adev, false);
5420
5421                 cz_update_gfx_cg_power_gating(adev, enable);
5422
5423                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5424                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5425                 else
5426                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5427
5428                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5429                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5430                 else
5431                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5432                 break;
5433         case CHIP_POLARIS11:
5434         case CHIP_POLARIS12:
5435         case CHIP_VEGAM:
5436                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5437                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5438                 else
5439                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5440
5441                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5442                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5443                 else
5444                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5445
5446                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_QUICK_MG) && enable)
5447                         polaris11_enable_gfx_quick_mg_power_gating(adev, true);
5448                 else
5449                         polaris11_enable_gfx_quick_mg_power_gating(adev, false);
5450                 break;
5451         default:
5452                 break;
5453         }
5454         if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_SMG |
5455                                 AMD_PG_SUPPORT_RLC_SMU_HS |
5456                                 AMD_PG_SUPPORT_CP |
5457                                 AMD_PG_SUPPORT_GFX_DMG))
5458                 amdgpu_gfx_rlc_exit_safe_mode(adev);
5459         return 0;
5460 }
5461
5462 static void gfx_v8_0_get_clockgating_state(void *handle, u64 *flags)
5463 {
5464         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5465         int data;
5466
5467         if (amdgpu_sriov_vf(adev))
5468                 *flags = 0;
5469
5470         /* AMD_CG_SUPPORT_GFX_MGCG */
5471         data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5472         if (!(data & RLC_CGTT_MGCG_OVERRIDE__CPF_MASK))
5473                 *flags |= AMD_CG_SUPPORT_GFX_MGCG;
5474
5475         /* AMD_CG_SUPPORT_GFX_CGLG */
5476         data = RREG32(mmRLC_CGCG_CGLS_CTRL);
5477         if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
5478                 *flags |= AMD_CG_SUPPORT_GFX_CGCG;
5479
5480         /* AMD_CG_SUPPORT_GFX_CGLS */
5481         if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
5482                 *flags |= AMD_CG_SUPPORT_GFX_CGLS;
5483
5484         /* AMD_CG_SUPPORT_GFX_CGTS */
5485         data = RREG32(mmCGTS_SM_CTRL_REG);
5486         if (!(data & CGTS_SM_CTRL_REG__OVERRIDE_MASK))
5487                 *flags |= AMD_CG_SUPPORT_GFX_CGTS;
5488
5489         /* AMD_CG_SUPPORT_GFX_CGTS_LS */
5490         if (!(data & CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK))
5491                 *flags |= AMD_CG_SUPPORT_GFX_CGTS_LS;
5492
5493         /* AMD_CG_SUPPORT_GFX_RLC_LS */
5494         data = RREG32(mmRLC_MEM_SLP_CNTL);
5495         if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK)
5496                 *flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS;
5497
5498         /* AMD_CG_SUPPORT_GFX_CP_LS */
5499         data = RREG32(mmCP_MEM_SLP_CNTL);
5500         if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK)
5501                 *flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS;
5502 }
5503
5504 static void gfx_v8_0_send_serdes_cmd(struct amdgpu_device *adev,
5505                                      uint32_t reg_addr, uint32_t cmd)
5506 {
5507         uint32_t data;
5508
5509         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
5510
5511         WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5512         WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5513
5514         data = RREG32(mmRLC_SERDES_WR_CTRL);
5515         if (adev->asic_type == CHIP_STONEY)
5516                 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5517                           RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5518                           RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5519                           RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5520                           RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5521                           RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5522                           RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5523                           RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5524                           RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5525         else
5526                 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5527                           RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5528                           RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5529                           RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5530                           RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5531                           RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5532                           RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5533                           RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5534                           RLC_SERDES_WR_CTRL__BPM_DATA_MASK |
5535                           RLC_SERDES_WR_CTRL__REG_ADDR_MASK |
5536                           RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5537         data |= (RLC_SERDES_WR_CTRL__RSVD_BPM_ADDR_MASK |
5538                  (cmd << RLC_SERDES_WR_CTRL__BPM_DATA__SHIFT) |
5539                  (reg_addr << RLC_SERDES_WR_CTRL__REG_ADDR__SHIFT) |
5540                  (0xff << RLC_SERDES_WR_CTRL__BPM_ADDR__SHIFT));
5541
5542         WREG32(mmRLC_SERDES_WR_CTRL, data);
5543 }
5544
5545 #define MSG_ENTER_RLC_SAFE_MODE     1
5546 #define MSG_EXIT_RLC_SAFE_MODE      0
5547 #define RLC_GPR_REG2__REQ_MASK 0x00000001
5548 #define RLC_GPR_REG2__REQ__SHIFT 0
5549 #define RLC_GPR_REG2__MESSAGE__SHIFT 0x00000001
5550 #define RLC_GPR_REG2__MESSAGE_MASK 0x0000001e
5551
5552 static bool gfx_v8_0_is_rlc_enabled(struct amdgpu_device *adev)
5553 {
5554         uint32_t rlc_setting;
5555
5556         rlc_setting = RREG32(mmRLC_CNTL);
5557         if (!(rlc_setting & RLC_CNTL__RLC_ENABLE_F32_MASK))
5558                 return false;
5559
5560         return true;
5561 }
5562
5563 static void gfx_v8_0_set_safe_mode(struct amdgpu_device *adev)
5564 {
5565         uint32_t data;
5566         unsigned i;
5567         data = RREG32(mmRLC_CNTL);
5568         data |= RLC_SAFE_MODE__CMD_MASK;
5569         data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5570         data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
5571         WREG32(mmRLC_SAFE_MODE, data);
5572
5573         /* wait for RLC_SAFE_MODE */
5574         for (i = 0; i < adev->usec_timeout; i++) {
5575                 if ((RREG32(mmRLC_GPM_STAT) &
5576                      (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5577                       RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) ==
5578                     (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5579                      RLC_GPM_STAT__GFX_POWER_STATUS_MASK))
5580                         break;
5581                 udelay(1);
5582         }
5583         for (i = 0; i < adev->usec_timeout; i++) {
5584                 if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
5585                         break;
5586                 udelay(1);
5587         }
5588 }
5589
5590 static void gfx_v8_0_unset_safe_mode(struct amdgpu_device *adev)
5591 {
5592         uint32_t data;
5593         unsigned i;
5594
5595         data = RREG32(mmRLC_CNTL);
5596         data |= RLC_SAFE_MODE__CMD_MASK;
5597         data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5598         WREG32(mmRLC_SAFE_MODE, data);
5599
5600         for (i = 0; i < adev->usec_timeout; i++) {
5601                 if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
5602                         break;
5603                 udelay(1);
5604         }
5605 }
5606
5607 static void gfx_v8_0_update_spm_vmid(struct amdgpu_device *adev, unsigned vmid)
5608 {
5609         u32 data;
5610
5611         amdgpu_gfx_off_ctrl(adev, false);
5612
5613         if (amdgpu_sriov_is_pp_one_vf(adev))
5614                 data = RREG32_NO_KIQ(mmRLC_SPM_VMID);
5615         else
5616                 data = RREG32(mmRLC_SPM_VMID);
5617
5618         data &= ~RLC_SPM_VMID__RLC_SPM_VMID_MASK;
5619         data |= (vmid & RLC_SPM_VMID__RLC_SPM_VMID_MASK) << RLC_SPM_VMID__RLC_SPM_VMID__SHIFT;
5620
5621         if (amdgpu_sriov_is_pp_one_vf(adev))
5622                 WREG32_NO_KIQ(mmRLC_SPM_VMID, data);
5623         else
5624                 WREG32(mmRLC_SPM_VMID, data);
5625
5626         amdgpu_gfx_off_ctrl(adev, true);
5627 }
5628
5629 static const struct amdgpu_rlc_funcs iceland_rlc_funcs = {
5630         .is_rlc_enabled = gfx_v8_0_is_rlc_enabled,
5631         .set_safe_mode = gfx_v8_0_set_safe_mode,
5632         .unset_safe_mode = gfx_v8_0_unset_safe_mode,
5633         .init = gfx_v8_0_rlc_init,
5634         .get_csb_size = gfx_v8_0_get_csb_size,
5635         .get_csb_buffer = gfx_v8_0_get_csb_buffer,
5636         .get_cp_table_num = gfx_v8_0_cp_jump_table_num,
5637         .resume = gfx_v8_0_rlc_resume,
5638         .stop = gfx_v8_0_rlc_stop,
5639         .reset = gfx_v8_0_rlc_reset,
5640         .start = gfx_v8_0_rlc_start,
5641         .update_spm_vmid = gfx_v8_0_update_spm_vmid
5642 };
5643
5644 static void gfx_v8_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
5645                                                       bool enable)
5646 {
5647         uint32_t temp, data;
5648
5649         amdgpu_gfx_rlc_enter_safe_mode(adev);
5650
5651         /* It is disabled by HW by default */
5652         if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
5653                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5654                         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS)
5655                                 /* 1 - RLC memory Light sleep */
5656                                 WREG32_FIELD(RLC_MEM_SLP_CNTL, RLC_MEM_LS_EN, 1);
5657
5658                         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS)
5659                                 WREG32_FIELD(CP_MEM_SLP_CNTL, CP_MEM_LS_EN, 1);
5660                 }
5661
5662                 /* 3 - RLC_CGTT_MGCG_OVERRIDE */
5663                 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5664                 if (adev->flags & AMD_IS_APU)
5665                         data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5666                                   RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5667                                   RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK);
5668                 else
5669                         data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5670                                   RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5671                                   RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5672                                   RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5673
5674                 if (temp != data)
5675                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5676
5677                 /* 4 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5678                 gfx_v8_0_wait_for_rlc_serdes(adev);
5679
5680                 /* 5 - clear mgcg override */
5681                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5682
5683                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS) {
5684                         /* 6 - Enable CGTS(Tree Shade) MGCG /MGLS */
5685                         temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5686                         data &= ~(CGTS_SM_CTRL_REG__SM_MODE_MASK);
5687                         data |= (0x2 << CGTS_SM_CTRL_REG__SM_MODE__SHIFT);
5688                         data |= CGTS_SM_CTRL_REG__SM_MODE_ENABLE_MASK;
5689                         data &= ~CGTS_SM_CTRL_REG__OVERRIDE_MASK;
5690                         if ((adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) &&
5691                             (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS_LS))
5692                                 data &= ~CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK;
5693                         data |= CGTS_SM_CTRL_REG__ON_MONITOR_ADD_EN_MASK;
5694                         data |= (0x96 << CGTS_SM_CTRL_REG__ON_MONITOR_ADD__SHIFT);
5695                         if (temp != data)
5696                                 WREG32(mmCGTS_SM_CTRL_REG, data);
5697                 }
5698                 udelay(50);
5699
5700                 /* 7 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5701                 gfx_v8_0_wait_for_rlc_serdes(adev);
5702         } else {
5703                 /* 1 - MGCG_OVERRIDE[0] for CP and MGCG_OVERRIDE[1] for RLC */
5704                 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5705                 data |= (RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5706                                 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5707                                 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5708                                 RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5709                 if (temp != data)
5710                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5711
5712                 /* 2 - disable MGLS in RLC */
5713                 data = RREG32(mmRLC_MEM_SLP_CNTL);
5714                 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
5715                         data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
5716                         WREG32(mmRLC_MEM_SLP_CNTL, data);
5717                 }
5718
5719                 /* 3 - disable MGLS in CP */
5720                 data = RREG32(mmCP_MEM_SLP_CNTL);
5721                 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
5722                         data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
5723                         WREG32(mmCP_MEM_SLP_CNTL, data);
5724                 }
5725
5726                 /* 4 - Disable CGTS(Tree Shade) MGCG and MGLS */
5727                 temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5728                 data |= (CGTS_SM_CTRL_REG__OVERRIDE_MASK |
5729                                 CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK);
5730                 if (temp != data)
5731                         WREG32(mmCGTS_SM_CTRL_REG, data);
5732
5733                 /* 5 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5734                 gfx_v8_0_wait_for_rlc_serdes(adev);
5735
5736                 /* 6 - set mgcg override */
5737                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, SET_BPM_SERDES_CMD);
5738
5739                 udelay(50);
5740
5741                 /* 7- wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5742                 gfx_v8_0_wait_for_rlc_serdes(adev);
5743         }
5744
5745         amdgpu_gfx_rlc_exit_safe_mode(adev);
5746 }
5747
5748 static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
5749                                                       bool enable)
5750 {
5751         uint32_t temp, temp1, data, data1;
5752
5753         temp = data = RREG32(mmRLC_CGCG_CGLS_CTRL);
5754
5755         amdgpu_gfx_rlc_enter_safe_mode(adev);
5756
5757         if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
5758                 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5759                 data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK;
5760                 if (temp1 != data1)
5761                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5762
5763                 /* : wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5764                 gfx_v8_0_wait_for_rlc_serdes(adev);
5765
5766                 /* 2 - clear cgcg override */
5767                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5768
5769                 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5770                 gfx_v8_0_wait_for_rlc_serdes(adev);
5771
5772                 /* 3 - write cmd to set CGLS */
5773                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, SET_BPM_SERDES_CMD);
5774
5775                 /* 4 - enable cgcg */
5776                 data |= RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
5777
5778                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5779                         /* enable cgls*/
5780                         data |= RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5781
5782                         temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5783                         data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK;
5784
5785                         if (temp1 != data1)
5786                                 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5787                 } else {
5788                         data &= ~RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5789                 }
5790
5791                 if (temp != data)
5792                         WREG32(mmRLC_CGCG_CGLS_CTRL, data);
5793
5794                 /* 5 enable cntx_empty_int_enable/cntx_busy_int_enable/
5795                  * Cmp_busy/GFX_Idle interrupts
5796                  */
5797                 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
5798         } else {
5799                 /* disable cntx_empty_int_enable & GFX Idle interrupt */
5800                 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
5801
5802                 /* TEST CGCG */
5803                 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5804                 data1 |= (RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK |
5805                                 RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK);
5806                 if (temp1 != data1)
5807                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5808
5809                 /* read gfx register to wake up cgcg */
5810                 RREG32(mmCB_CGTT_SCLK_CTRL);
5811                 RREG32(mmCB_CGTT_SCLK_CTRL);
5812                 RREG32(mmCB_CGTT_SCLK_CTRL);
5813                 RREG32(mmCB_CGTT_SCLK_CTRL);
5814
5815                 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5816                 gfx_v8_0_wait_for_rlc_serdes(adev);
5817
5818                 /* write cmd to Set CGCG Override */
5819                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, SET_BPM_SERDES_CMD);
5820
5821                 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5822                 gfx_v8_0_wait_for_rlc_serdes(adev);
5823
5824                 /* write cmd to Clear CGLS */
5825                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, CLE_BPM_SERDES_CMD);
5826
5827                 /* disable cgcg, cgls should be disabled too. */
5828                 data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
5829                           RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
5830                 if (temp != data)
5831                         WREG32(mmRLC_CGCG_CGLS_CTRL, data);
5832                 /* enable interrupts again for PG */
5833                 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
5834         }
5835
5836         gfx_v8_0_wait_for_rlc_serdes(adev);
5837
5838         amdgpu_gfx_rlc_exit_safe_mode(adev);
5839 }
5840 static int gfx_v8_0_update_gfx_clock_gating(struct amdgpu_device *adev,
5841                                             bool enable)
5842 {
5843         if (enable) {
5844                 /* CGCG/CGLS should be enabled after MGCG/MGLS/TS(CG/LS)
5845                  * ===  MGCG + MGLS + TS(CG/LS) ===
5846                  */
5847                 gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
5848                 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
5849         } else {
5850                 /* CGCG/CGLS should be disabled before MGCG/MGLS/TS(CG/LS)
5851                  * ===  CGCG + CGLS ===
5852                  */
5853                 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
5854                 gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
5855         }
5856         return 0;
5857 }
5858
5859 static int gfx_v8_0_tonga_update_gfx_clock_gating(struct amdgpu_device *adev,
5860                                           enum amd_clockgating_state state)
5861 {
5862         uint32_t msg_id, pp_state = 0;
5863         uint32_t pp_support_state = 0;
5864
5865         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) {
5866                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5867                         pp_support_state = PP_STATE_SUPPORT_LS;
5868                         pp_state = PP_STATE_LS;
5869                 }
5870                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
5871                         pp_support_state |= PP_STATE_SUPPORT_CG;
5872                         pp_state |= PP_STATE_CG;
5873                 }
5874                 if (state == AMD_CG_STATE_UNGATE)
5875                         pp_state = 0;
5876
5877                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5878                                 PP_BLOCK_GFX_CG,
5879                                 pp_support_state,
5880                                 pp_state);
5881                 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5882         }
5883
5884         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) {
5885                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5886                         pp_support_state = PP_STATE_SUPPORT_LS;
5887                         pp_state = PP_STATE_LS;
5888                 }
5889
5890                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
5891                         pp_support_state |= PP_STATE_SUPPORT_CG;
5892                         pp_state |= PP_STATE_CG;
5893                 }
5894
5895                 if (state == AMD_CG_STATE_UNGATE)
5896                         pp_state = 0;
5897
5898                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5899                                 PP_BLOCK_GFX_MG,
5900                                 pp_support_state,
5901                                 pp_state);
5902                 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5903         }
5904
5905         return 0;
5906 }
5907
5908 static int gfx_v8_0_polaris_update_gfx_clock_gating(struct amdgpu_device *adev,
5909                                           enum amd_clockgating_state state)
5910 {
5911
5912         uint32_t msg_id, pp_state = 0;
5913         uint32_t pp_support_state = 0;
5914
5915         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) {
5916                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5917                         pp_support_state = PP_STATE_SUPPORT_LS;
5918                         pp_state = PP_STATE_LS;
5919                 }
5920                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
5921                         pp_support_state |= PP_STATE_SUPPORT_CG;
5922                         pp_state |= PP_STATE_CG;
5923                 }
5924                 if (state == AMD_CG_STATE_UNGATE)
5925                         pp_state = 0;
5926
5927                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5928                                 PP_BLOCK_GFX_CG,
5929                                 pp_support_state,
5930                                 pp_state);
5931                 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5932         }
5933
5934         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_3D_CGCG | AMD_CG_SUPPORT_GFX_3D_CGLS)) {
5935                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) {
5936                         pp_support_state = PP_STATE_SUPPORT_LS;
5937                         pp_state = PP_STATE_LS;
5938                 }
5939                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG) {
5940                         pp_support_state |= PP_STATE_SUPPORT_CG;
5941                         pp_state |= PP_STATE_CG;
5942                 }
5943                 if (state == AMD_CG_STATE_UNGATE)
5944                         pp_state = 0;
5945
5946                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5947                                 PP_BLOCK_GFX_3D,
5948                                 pp_support_state,
5949                                 pp_state);
5950                 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5951         }
5952
5953         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) {
5954                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5955                         pp_support_state = PP_STATE_SUPPORT_LS;
5956                         pp_state = PP_STATE_LS;
5957                 }
5958
5959                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
5960                         pp_support_state |= PP_STATE_SUPPORT_CG;
5961                         pp_state |= PP_STATE_CG;
5962                 }
5963
5964                 if (state == AMD_CG_STATE_UNGATE)
5965                         pp_state = 0;
5966
5967                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5968                                 PP_BLOCK_GFX_MG,
5969                                 pp_support_state,
5970                                 pp_state);
5971                 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5972         }
5973
5974         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
5975                 pp_support_state = PP_STATE_SUPPORT_LS;
5976
5977                 if (state == AMD_CG_STATE_UNGATE)
5978                         pp_state = 0;
5979                 else
5980                         pp_state = PP_STATE_LS;
5981
5982                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5983                                 PP_BLOCK_GFX_RLC,
5984                                 pp_support_state,
5985                                 pp_state);
5986                 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5987         }
5988
5989         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
5990                 pp_support_state = PP_STATE_SUPPORT_LS;
5991
5992                 if (state == AMD_CG_STATE_UNGATE)
5993                         pp_state = 0;
5994                 else
5995                         pp_state = PP_STATE_LS;
5996                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5997                         PP_BLOCK_GFX_CP,
5998                         pp_support_state,
5999                         pp_state);
6000                 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
6001         }
6002
6003         return 0;
6004 }
6005
6006 static int gfx_v8_0_set_clockgating_state(void *handle,
6007                                           enum amd_clockgating_state state)
6008 {
6009         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
6010
6011         if (amdgpu_sriov_vf(adev))
6012                 return 0;
6013
6014         switch (adev->asic_type) {
6015         case CHIP_FIJI:
6016         case CHIP_CARRIZO:
6017         case CHIP_STONEY:
6018                 gfx_v8_0_update_gfx_clock_gating(adev,
6019                                                  state == AMD_CG_STATE_GATE);
6020                 break;
6021         case CHIP_TONGA:
6022                 gfx_v8_0_tonga_update_gfx_clock_gating(adev, state);
6023                 break;
6024         case CHIP_POLARIS10:
6025         case CHIP_POLARIS11:
6026         case CHIP_POLARIS12:
6027         case CHIP_VEGAM:
6028                 gfx_v8_0_polaris_update_gfx_clock_gating(adev, state);
6029                 break;
6030         default:
6031                 break;
6032         }
6033         return 0;
6034 }
6035
6036 static u64 gfx_v8_0_ring_get_rptr(struct amdgpu_ring *ring)
6037 {
6038         return *ring->rptr_cpu_addr;
6039 }
6040
6041 static u64 gfx_v8_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
6042 {
6043         struct amdgpu_device *adev = ring->adev;
6044
6045         if (ring->use_doorbell)
6046                 /* XXX check if swapping is necessary on BE */
6047                 return *ring->wptr_cpu_addr;
6048         else
6049                 return RREG32(mmCP_RB0_WPTR);
6050 }
6051
6052 static void gfx_v8_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
6053 {
6054         struct amdgpu_device *adev = ring->adev;
6055
6056         if (ring->use_doorbell) {
6057                 /* XXX check if swapping is necessary on BE */
6058                 *ring->wptr_cpu_addr = lower_32_bits(ring->wptr);
6059                 WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
6060         } else {
6061                 WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
6062                 (void)RREG32(mmCP_RB0_WPTR);
6063         }
6064 }
6065
6066 static void gfx_v8_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
6067 {
6068         u32 ref_and_mask, reg_mem_engine;
6069
6070         if ((ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) ||
6071             (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)) {
6072                 switch (ring->me) {
6073                 case 1:
6074                         ref_and_mask = GPU_HDP_FLUSH_DONE__CP2_MASK << ring->pipe;
6075                         break;
6076                 case 2:
6077                         ref_and_mask = GPU_HDP_FLUSH_DONE__CP6_MASK << ring->pipe;
6078                         break;
6079                 default:
6080                         return;
6081                 }
6082                 reg_mem_engine = 0;
6083         } else {
6084                 ref_and_mask = GPU_HDP_FLUSH_DONE__CP0_MASK;
6085                 reg_mem_engine = WAIT_REG_MEM_ENGINE(1); /* pfp */
6086         }
6087
6088         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6089         amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
6090                                  WAIT_REG_MEM_FUNCTION(3) |  /* == */
6091                                  reg_mem_engine));
6092         amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_REQ);
6093         amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_DONE);
6094         amdgpu_ring_write(ring, ref_and_mask);
6095         amdgpu_ring_write(ring, ref_and_mask);
6096         amdgpu_ring_write(ring, 0x20); /* poll interval */
6097 }
6098
6099 static void gfx_v8_0_ring_emit_vgt_flush(struct amdgpu_ring *ring)
6100 {
6101         amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
6102         amdgpu_ring_write(ring, EVENT_TYPE(VS_PARTIAL_FLUSH) |
6103                 EVENT_INDEX(4));
6104
6105         amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
6106         amdgpu_ring_write(ring, EVENT_TYPE(VGT_FLUSH) |
6107                 EVENT_INDEX(0));
6108 }
6109
6110 static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
6111                                         struct amdgpu_job *job,
6112                                         struct amdgpu_ib *ib,
6113                                         uint32_t flags)
6114 {
6115         unsigned vmid = AMDGPU_JOB_GET_VMID(job);
6116         u32 header, control = 0;
6117
6118         if (ib->flags & AMDGPU_IB_FLAG_CE)
6119                 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
6120         else
6121                 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
6122
6123         control |= ib->length_dw | (vmid << 24);
6124
6125         if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) {
6126                 control |= INDIRECT_BUFFER_PRE_ENB(1);
6127
6128                 if (!(ib->flags & AMDGPU_IB_FLAG_CE) && vmid)
6129                         gfx_v8_0_ring_emit_de_meta(ring);
6130         }
6131
6132         amdgpu_ring_write(ring, header);
6133         amdgpu_ring_write(ring,
6134 #ifdef __BIG_ENDIAN
6135                           (2 << 0) |
6136 #endif
6137                           (ib->gpu_addr & 0xFFFFFFFC));
6138         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6139         amdgpu_ring_write(ring, control);
6140 }
6141
6142 static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
6143                                           struct amdgpu_job *job,
6144                                           struct amdgpu_ib *ib,
6145                                           uint32_t flags)
6146 {
6147         unsigned vmid = AMDGPU_JOB_GET_VMID(job);
6148         u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
6149
6150         /* Currently, there is a high possibility to get wave ID mismatch
6151          * between ME and GDS, leading to a hw deadlock, because ME generates
6152          * different wave IDs than the GDS expects. This situation happens
6153          * randomly when at least 5 compute pipes use GDS ordered append.
6154          * The wave IDs generated by ME are also wrong after suspend/resume.
6155          * Those are probably bugs somewhere else in the kernel driver.
6156          *
6157          * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and
6158          * GDS to 0 for this ring (me/pipe).
6159          */
6160         if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) {
6161                 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
6162                 amdgpu_ring_write(ring, mmGDS_COMPUTE_MAX_WAVE_ID - PACKET3_SET_CONFIG_REG_START);
6163                 amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id);
6164         }
6165
6166         amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
6167         amdgpu_ring_write(ring,
6168 #ifdef __BIG_ENDIAN
6169                                 (2 << 0) |
6170 #endif
6171                                 (ib->gpu_addr & 0xFFFFFFFC));
6172         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6173         amdgpu_ring_write(ring, control);
6174 }
6175
6176 static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr,
6177                                          u64 seq, unsigned flags)
6178 {
6179         bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6180         bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6181
6182         /* Workaround for cache flush problems. First send a dummy EOP
6183          * event down the pipe with seq one below.
6184          */
6185         amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
6186         amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6187                                  EOP_TC_ACTION_EN |
6188                                  EOP_TC_WB_ACTION_EN |
6189                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6190                                  EVENT_INDEX(5)));
6191         amdgpu_ring_write(ring, addr & 0xfffffffc);
6192         amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
6193                                 DATA_SEL(1) | INT_SEL(0));
6194         amdgpu_ring_write(ring, lower_32_bits(seq - 1));
6195         amdgpu_ring_write(ring, upper_32_bits(seq - 1));
6196
6197         /* Then send the real EOP event down the pipe:
6198          * EVENT_WRITE_EOP - flush caches, send int */
6199         amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
6200         amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6201                                  EOP_TC_ACTION_EN |
6202                                  EOP_TC_WB_ACTION_EN |
6203                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6204                                  EVENT_INDEX(5)));
6205         amdgpu_ring_write(ring, addr & 0xfffffffc);
6206         amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
6207                           DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
6208         amdgpu_ring_write(ring, lower_32_bits(seq));
6209         amdgpu_ring_write(ring, upper_32_bits(seq));
6210
6211 }
6212
6213 static void gfx_v8_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
6214 {
6215         int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
6216         uint32_t seq = ring->fence_drv.sync_seq;
6217         uint64_t addr = ring->fence_drv.gpu_addr;
6218
6219         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6220         amdgpu_ring_write(ring, (WAIT_REG_MEM_MEM_SPACE(1) | /* memory */
6221                                  WAIT_REG_MEM_FUNCTION(3) | /* equal */
6222                                  WAIT_REG_MEM_ENGINE(usepfp))); /* pfp or me */
6223         amdgpu_ring_write(ring, addr & 0xfffffffc);
6224         amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
6225         amdgpu_ring_write(ring, seq);
6226         amdgpu_ring_write(ring, 0xffffffff);
6227         amdgpu_ring_write(ring, 4); /* poll interval */
6228 }
6229
6230 static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
6231                                         unsigned vmid, uint64_t pd_addr)
6232 {
6233         int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
6234
6235         amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
6236
6237         /* wait for the invalidate to complete */
6238         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6239         amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
6240                                  WAIT_REG_MEM_FUNCTION(0) |  /* always */
6241                                  WAIT_REG_MEM_ENGINE(0))); /* me */
6242         amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
6243         amdgpu_ring_write(ring, 0);
6244         amdgpu_ring_write(ring, 0); /* ref */
6245         amdgpu_ring_write(ring, 0); /* mask */
6246         amdgpu_ring_write(ring, 0x20); /* poll interval */
6247
6248         /* compute doesn't have PFP */
6249         if (usepfp) {
6250                 /* sync PFP to ME, otherwise we might get invalid PFP reads */
6251                 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
6252                 amdgpu_ring_write(ring, 0x0);
6253         }
6254 }
6255
6256 static u64 gfx_v8_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
6257 {
6258         return *ring->wptr_cpu_addr;
6259 }
6260
6261 static void gfx_v8_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
6262 {
6263         struct amdgpu_device *adev = ring->adev;
6264
6265         /* XXX check if swapping is necessary on BE */
6266         *ring->wptr_cpu_addr = lower_32_bits(ring->wptr);
6267         WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
6268 }
6269
6270 static void gfx_v8_0_ring_emit_fence_compute(struct amdgpu_ring *ring,
6271                                              u64 addr, u64 seq,
6272                                              unsigned flags)
6273 {
6274         bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6275         bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6276
6277         /* RELEASE_MEM - flush caches, send int */
6278         amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
6279         amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6280                                  EOP_TC_ACTION_EN |
6281                                  EOP_TC_WB_ACTION_EN |
6282                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6283                                  EVENT_INDEX(5)));
6284         amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
6285         amdgpu_ring_write(ring, addr & 0xfffffffc);
6286         amdgpu_ring_write(ring, upper_32_bits(addr));
6287         amdgpu_ring_write(ring, lower_32_bits(seq));
6288         amdgpu_ring_write(ring, upper_32_bits(seq));
6289 }
6290
6291 static void gfx_v8_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
6292                                          u64 seq, unsigned int flags)
6293 {
6294         /* we only allocate 32bit for each seq wb address */
6295         BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
6296
6297         /* write fence seq to the "addr" */
6298         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6299         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6300                                  WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
6301         amdgpu_ring_write(ring, lower_32_bits(addr));
6302         amdgpu_ring_write(ring, upper_32_bits(addr));
6303         amdgpu_ring_write(ring, lower_32_bits(seq));
6304
6305         if (flags & AMDGPU_FENCE_FLAG_INT) {
6306                 /* set register to trigger INT */
6307                 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6308                 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6309                                          WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
6310                 amdgpu_ring_write(ring, mmCPC_INT_STATUS);
6311                 amdgpu_ring_write(ring, 0);
6312                 amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
6313         }
6314 }
6315
6316 static void gfx_v8_ring_emit_sb(struct amdgpu_ring *ring)
6317 {
6318         amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
6319         amdgpu_ring_write(ring, 0);
6320 }
6321
6322 static void gfx_v8_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
6323 {
6324         uint32_t dw2 = 0;
6325
6326         if (amdgpu_sriov_vf(ring->adev))
6327                 gfx_v8_0_ring_emit_ce_meta(ring);
6328
6329         dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
6330         if (flags & AMDGPU_HAVE_CTX_SWITCH) {
6331                 gfx_v8_0_ring_emit_vgt_flush(ring);
6332                 /* set load_global_config & load_global_uconfig */
6333                 dw2 |= 0x8001;
6334                 /* set load_cs_sh_regs */
6335                 dw2 |= 0x01000000;
6336                 /* set load_per_context_state & load_gfx_sh_regs for GFX */
6337                 dw2 |= 0x10002;
6338
6339                 /* set load_ce_ram if preamble presented */
6340                 if (AMDGPU_PREAMBLE_IB_PRESENT & flags)
6341                         dw2 |= 0x10000000;
6342         } else {
6343                 /* still load_ce_ram if this is the first time preamble presented
6344                  * although there is no context switch happens.
6345                  */
6346                 if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags)
6347                         dw2 |= 0x10000000;
6348         }
6349
6350         amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
6351         amdgpu_ring_write(ring, dw2);
6352         amdgpu_ring_write(ring, 0);
6353 }
6354
6355 static unsigned gfx_v8_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring)
6356 {
6357         unsigned ret;
6358
6359         amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
6360         amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
6361         amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
6362         amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */
6363         ret = ring->wptr & ring->buf_mask;
6364         amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */
6365         return ret;
6366 }
6367
6368 static void gfx_v8_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset)
6369 {
6370         unsigned cur;
6371
6372         BUG_ON(offset > ring->buf_mask);
6373         BUG_ON(ring->ring[offset] != 0x55aa55aa);
6374
6375         cur = (ring->wptr & ring->buf_mask) - 1;
6376         if (likely(cur > offset))
6377                 ring->ring[offset] = cur - offset;
6378         else
6379                 ring->ring[offset] = (ring->ring_size >> 2) - offset + cur;
6380 }
6381
6382 static void gfx_v8_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg,
6383                                     uint32_t reg_val_offs)
6384 {
6385         struct amdgpu_device *adev = ring->adev;
6386
6387         amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
6388         amdgpu_ring_write(ring, 0 |     /* src: register*/
6389                                 (5 << 8) |      /* dst: memory */
6390                                 (1 << 20));     /* write confirm */
6391         amdgpu_ring_write(ring, reg);
6392         amdgpu_ring_write(ring, 0);
6393         amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
6394                                 reg_val_offs * 4));
6395         amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
6396                                 reg_val_offs * 4));
6397 }
6398
6399 static void gfx_v8_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
6400                                   uint32_t val)
6401 {
6402         uint32_t cmd;
6403
6404         switch (ring->funcs->type) {
6405         case AMDGPU_RING_TYPE_GFX:
6406                 cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM;
6407                 break;
6408         case AMDGPU_RING_TYPE_KIQ:
6409                 cmd = 1 << 16; /* no inc addr */
6410                 break;
6411         default:
6412                 cmd = WR_CONFIRM;
6413                 break;
6414         }
6415
6416         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6417         amdgpu_ring_write(ring, cmd);
6418         amdgpu_ring_write(ring, reg);
6419         amdgpu_ring_write(ring, 0);
6420         amdgpu_ring_write(ring, val);
6421 }
6422
6423 static void gfx_v8_0_ring_soft_recovery(struct amdgpu_ring *ring, unsigned vmid)
6424 {
6425         struct amdgpu_device *adev = ring->adev;
6426         uint32_t value = 0;
6427
6428         value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03);
6429         value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01);
6430         value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1);
6431         value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid);
6432         WREG32(mmSQ_CMD, value);
6433 }
6434
6435 static void gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
6436                                                  enum amdgpu_interrupt_state state)
6437 {
6438         WREG32_FIELD(CP_INT_CNTL_RING0, TIME_STAMP_INT_ENABLE,
6439                      state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6440 }
6441
6442 static void gfx_v8_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
6443                                                      int me, int pipe,
6444                                                      enum amdgpu_interrupt_state state)
6445 {
6446         u32 mec_int_cntl, mec_int_cntl_reg;
6447
6448         /*
6449          * amdgpu controls only the first MEC. That's why this function only
6450          * handles the setting of interrupts for this specific MEC. All other
6451          * pipes' interrupts are set by amdkfd.
6452          */
6453
6454         if (me == 1) {
6455                 switch (pipe) {
6456                 case 0:
6457                         mec_int_cntl_reg = mmCP_ME1_PIPE0_INT_CNTL;
6458                         break;
6459                 case 1:
6460                         mec_int_cntl_reg = mmCP_ME1_PIPE1_INT_CNTL;
6461                         break;
6462                 case 2:
6463                         mec_int_cntl_reg = mmCP_ME1_PIPE2_INT_CNTL;
6464                         break;
6465                 case 3:
6466                         mec_int_cntl_reg = mmCP_ME1_PIPE3_INT_CNTL;
6467                         break;
6468                 default:
6469                         DRM_DEBUG("invalid pipe %d\n", pipe);
6470                         return;
6471                 }
6472         } else {
6473                 DRM_DEBUG("invalid me %d\n", me);
6474                 return;
6475         }
6476
6477         switch (state) {
6478         case AMDGPU_IRQ_STATE_DISABLE:
6479                 mec_int_cntl = RREG32(mec_int_cntl_reg);
6480                 mec_int_cntl &= ~CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK;
6481                 WREG32(mec_int_cntl_reg, mec_int_cntl);
6482                 break;
6483         case AMDGPU_IRQ_STATE_ENABLE:
6484                 mec_int_cntl = RREG32(mec_int_cntl_reg);
6485                 mec_int_cntl |= CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK;
6486                 WREG32(mec_int_cntl_reg, mec_int_cntl);
6487                 break;
6488         default:
6489                 break;
6490         }
6491 }
6492
6493 static int gfx_v8_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
6494                                              struct amdgpu_irq_src *source,
6495                                              unsigned type,
6496                                              enum amdgpu_interrupt_state state)
6497 {
6498         WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_REG_INT_ENABLE,
6499                      state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6500
6501         return 0;
6502 }
6503
6504 static int gfx_v8_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
6505                                               struct amdgpu_irq_src *source,
6506                                               unsigned type,
6507                                               enum amdgpu_interrupt_state state)
6508 {
6509         WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_INSTR_INT_ENABLE,
6510                      state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6511
6512         return 0;
6513 }
6514
6515 static int gfx_v8_0_set_eop_interrupt_state(struct amdgpu_device *adev,
6516                                             struct amdgpu_irq_src *src,
6517                                             unsigned type,
6518                                             enum amdgpu_interrupt_state state)
6519 {
6520         switch (type) {
6521         case AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP:
6522                 gfx_v8_0_set_gfx_eop_interrupt_state(adev, state);
6523                 break;
6524         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
6525                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
6526                 break;
6527         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
6528                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
6529                 break;
6530         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
6531                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
6532                 break;
6533         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
6534                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
6535                 break;
6536         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
6537                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
6538                 break;
6539         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
6540                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
6541                 break;
6542         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
6543                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
6544                 break;
6545         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
6546                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
6547                 break;
6548         default:
6549                 break;
6550         }
6551         return 0;
6552 }
6553
6554 static int gfx_v8_0_set_cp_ecc_int_state(struct amdgpu_device *adev,
6555                                          struct amdgpu_irq_src *source,
6556                                          unsigned int type,
6557                                          enum amdgpu_interrupt_state state)
6558 {
6559         int enable_flag;
6560
6561         switch (state) {
6562         case AMDGPU_IRQ_STATE_DISABLE:
6563                 enable_flag = 0;
6564                 break;
6565
6566         case AMDGPU_IRQ_STATE_ENABLE:
6567                 enable_flag = 1;
6568                 break;
6569
6570         default:
6571                 return -EINVAL;
6572         }
6573
6574         WREG32_FIELD(CP_INT_CNTL, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6575         WREG32_FIELD(CP_INT_CNTL_RING0, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6576         WREG32_FIELD(CP_INT_CNTL_RING1, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6577         WREG32_FIELD(CP_INT_CNTL_RING2, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6578         WREG32_FIELD(CPC_INT_CNTL, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6579         WREG32_FIELD(CP_ME1_PIPE0_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6580                      enable_flag);
6581         WREG32_FIELD(CP_ME1_PIPE1_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6582                      enable_flag);
6583         WREG32_FIELD(CP_ME1_PIPE2_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6584                      enable_flag);
6585         WREG32_FIELD(CP_ME1_PIPE3_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6586                      enable_flag);
6587         WREG32_FIELD(CP_ME2_PIPE0_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6588                      enable_flag);
6589         WREG32_FIELD(CP_ME2_PIPE1_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6590                      enable_flag);
6591         WREG32_FIELD(CP_ME2_PIPE2_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6592                      enable_flag);
6593         WREG32_FIELD(CP_ME2_PIPE3_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6594                      enable_flag);
6595
6596         return 0;
6597 }
6598
6599 static int gfx_v8_0_set_sq_int_state(struct amdgpu_device *adev,
6600                                      struct amdgpu_irq_src *source,
6601                                      unsigned int type,
6602                                      enum amdgpu_interrupt_state state)
6603 {
6604         int enable_flag;
6605
6606         switch (state) {
6607         case AMDGPU_IRQ_STATE_DISABLE:
6608                 enable_flag = 1;
6609                 break;
6610
6611         case AMDGPU_IRQ_STATE_ENABLE:
6612                 enable_flag = 0;
6613                 break;
6614
6615         default:
6616                 return -EINVAL;
6617         }
6618
6619         WREG32_FIELD(SQ_INTERRUPT_MSG_CTRL, STALL,
6620                      enable_flag);
6621
6622         return 0;
6623 }
6624
6625 static int gfx_v8_0_eop_irq(struct amdgpu_device *adev,
6626                             struct amdgpu_irq_src *source,
6627                             struct amdgpu_iv_entry *entry)
6628 {
6629         int i;
6630         u8 me_id, pipe_id, queue_id;
6631         struct amdgpu_ring *ring;
6632
6633         DRM_DEBUG("IH: CP EOP\n");
6634         me_id = (entry->ring_id & 0x0c) >> 2;
6635         pipe_id = (entry->ring_id & 0x03) >> 0;
6636         queue_id = (entry->ring_id & 0x70) >> 4;
6637
6638         switch (me_id) {
6639         case 0:
6640                 amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
6641                 break;
6642         case 1:
6643         case 2:
6644                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
6645                         ring = &adev->gfx.compute_ring[i];
6646                         /* Per-queue interrupt is supported for MEC starting from VI.
6647                           * The interrupt can only be enabled/disabled per pipe instead of per queue.
6648                           */
6649                         if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
6650                                 amdgpu_fence_process(ring);
6651                 }
6652                 break;
6653         }
6654         return 0;
6655 }
6656
6657 static void gfx_v8_0_fault(struct amdgpu_device *adev,
6658                            struct amdgpu_iv_entry *entry)
6659 {
6660         u8 me_id, pipe_id, queue_id;
6661         struct amdgpu_ring *ring;
6662         int i;
6663
6664         me_id = (entry->ring_id & 0x0c) >> 2;
6665         pipe_id = (entry->ring_id & 0x03) >> 0;
6666         queue_id = (entry->ring_id & 0x70) >> 4;
6667
6668         switch (me_id) {
6669         case 0:
6670                 drm_sched_fault(&adev->gfx.gfx_ring[0].sched);
6671                 break;
6672         case 1:
6673         case 2:
6674                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
6675                         ring = &adev->gfx.compute_ring[i];
6676                         if (ring->me == me_id && ring->pipe == pipe_id &&
6677                             ring->queue == queue_id)
6678                                 drm_sched_fault(&ring->sched);
6679                 }
6680                 break;
6681         }
6682 }
6683
6684 static int gfx_v8_0_priv_reg_irq(struct amdgpu_device *adev,
6685                                  struct amdgpu_irq_src *source,
6686                                  struct amdgpu_iv_entry *entry)
6687 {
6688         DRM_ERROR("Illegal register access in command stream\n");
6689         gfx_v8_0_fault(adev, entry);
6690         return 0;
6691 }
6692
6693 static int gfx_v8_0_priv_inst_irq(struct amdgpu_device *adev,
6694                                   struct amdgpu_irq_src *source,
6695                                   struct amdgpu_iv_entry *entry)
6696 {
6697         DRM_ERROR("Illegal instruction in command stream\n");
6698         gfx_v8_0_fault(adev, entry);
6699         return 0;
6700 }
6701
6702 static int gfx_v8_0_cp_ecc_error_irq(struct amdgpu_device *adev,
6703                                      struct amdgpu_irq_src *source,
6704                                      struct amdgpu_iv_entry *entry)
6705 {
6706         DRM_ERROR("CP EDC/ECC error detected.");
6707         return 0;
6708 }
6709
6710 static void gfx_v8_0_parse_sq_irq(struct amdgpu_device *adev, unsigned ih_data,
6711                                   bool from_wq)
6712 {
6713         u32 enc, se_id, sh_id, cu_id;
6714         char type[20];
6715         int sq_edc_source = -1;
6716
6717         enc = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_CMN, ENCODING);
6718         se_id = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_CMN, SE_ID);
6719
6720         switch (enc) {
6721                 case 0:
6722                         DRM_INFO("SQ general purpose intr detected:"
6723                                         "se_id %d, immed_overflow %d, host_reg_overflow %d,"
6724                                         "host_cmd_overflow %d, cmd_timestamp %d,"
6725                                         "reg_timestamp %d, thread_trace_buff_full %d,"
6726                                         "wlt %d, thread_trace %d.\n",
6727                                         se_id,
6728                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, IMMED_OVERFLOW),
6729                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, HOST_REG_OVERFLOW),
6730                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, HOST_CMD_OVERFLOW),
6731                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, CMD_TIMESTAMP),
6732                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, REG_TIMESTAMP),
6733                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, THREAD_TRACE_BUF_FULL),
6734                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, WLT),
6735                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, THREAD_TRACE)
6736                                         );
6737                         break;
6738                 case 1:
6739                 case 2:
6740
6741                         cu_id = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, CU_ID);
6742                         sh_id = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, SH_ID);
6743
6744                         /*
6745                          * This function can be called either directly from ISR
6746                          * or from BH in which case we can access SQ_EDC_INFO
6747                          * instance
6748                          */
6749                         if (from_wq) {
6750                                 mutex_lock(&adev->grbm_idx_mutex);
6751                                 gfx_v8_0_select_se_sh(adev, se_id, sh_id, cu_id);
6752
6753                                 sq_edc_source = REG_GET_FIELD(RREG32(mmSQ_EDC_INFO), SQ_EDC_INFO, SOURCE);
6754
6755                                 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
6756                                 mutex_unlock(&adev->grbm_idx_mutex);
6757                         }
6758
6759                         if (enc == 1)
6760                                 sprintf(type, "instruction intr");
6761                         else
6762                                 sprintf(type, "EDC/ECC error");
6763
6764                         DRM_INFO(
6765                                 "SQ %s detected: "
6766                                         "se_id %d, sh_id %d, cu_id %d, simd_id %d, wave_id %d, vm_id %d "
6767                                         "trap %s, sq_ed_info.source %s.\n",
6768                                         type, se_id, sh_id, cu_id,
6769                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, SIMD_ID),
6770                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, WAVE_ID),
6771                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, VM_ID),
6772                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, PRIV) ? "true" : "false",
6773                                         (sq_edc_source != -1) ? sq_edc_source_names[sq_edc_source] : "unavailable"
6774                                 );
6775                         break;
6776                 default:
6777                         DRM_ERROR("SQ invalid encoding type\n.");
6778         }
6779 }
6780
6781 static void gfx_v8_0_sq_irq_work_func(struct work_struct *work)
6782 {
6783
6784         struct amdgpu_device *adev = container_of(work, struct amdgpu_device, gfx.sq_work.work);
6785         struct sq_work *sq_work = container_of(work, struct sq_work, work);
6786
6787         gfx_v8_0_parse_sq_irq(adev, sq_work->ih_data, true);
6788 }
6789
6790 static int gfx_v8_0_sq_irq(struct amdgpu_device *adev,
6791                            struct amdgpu_irq_src *source,
6792                            struct amdgpu_iv_entry *entry)
6793 {
6794         unsigned ih_data = entry->src_data[0];
6795
6796         /*
6797          * Try to submit work so SQ_EDC_INFO can be accessed from
6798          * BH. If previous work submission hasn't finished yet
6799          * just print whatever info is possible directly from the ISR.
6800          */
6801         if (work_pending(&adev->gfx.sq_work.work)) {
6802                 gfx_v8_0_parse_sq_irq(adev, ih_data, false);
6803         } else {
6804                 adev->gfx.sq_work.ih_data = ih_data;
6805                 schedule_work(&adev->gfx.sq_work.work);
6806         }
6807
6808         return 0;
6809 }
6810
6811 static void gfx_v8_0_emit_mem_sync(struct amdgpu_ring *ring)
6812 {
6813         amdgpu_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
6814         amdgpu_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
6815                           PACKET3_TC_ACTION_ENA |
6816                           PACKET3_SH_KCACHE_ACTION_ENA |
6817                           PACKET3_SH_ICACHE_ACTION_ENA |
6818                           PACKET3_TC_WB_ACTION_ENA);  /* CP_COHER_CNTL */
6819         amdgpu_ring_write(ring, 0xffffffff);  /* CP_COHER_SIZE */
6820         amdgpu_ring_write(ring, 0);  /* CP_COHER_BASE */
6821         amdgpu_ring_write(ring, 0x0000000A); /* poll interval */
6822 }
6823
6824 static void gfx_v8_0_emit_mem_sync_compute(struct amdgpu_ring *ring)
6825 {
6826         amdgpu_ring_write(ring, PACKET3(PACKET3_ACQUIRE_MEM, 5));
6827         amdgpu_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
6828                           PACKET3_TC_ACTION_ENA |
6829                           PACKET3_SH_KCACHE_ACTION_ENA |
6830                           PACKET3_SH_ICACHE_ACTION_ENA |
6831                           PACKET3_TC_WB_ACTION_ENA);  /* CP_COHER_CNTL */
6832         amdgpu_ring_write(ring, 0xffffffff);    /* CP_COHER_SIZE */
6833         amdgpu_ring_write(ring, 0xff);          /* CP_COHER_SIZE_HI */
6834         amdgpu_ring_write(ring, 0);             /* CP_COHER_BASE */
6835         amdgpu_ring_write(ring, 0);             /* CP_COHER_BASE_HI */
6836         amdgpu_ring_write(ring, 0x0000000A);    /* poll interval */
6837 }
6838
6839
6840 /* mmSPI_WCL_PIPE_PERCENT_CS[0-7]_DEFAULT values are same */
6841 #define mmSPI_WCL_PIPE_PERCENT_CS_DEFAULT       0x0000007f
6842 static void gfx_v8_0_emit_wave_limit_cs(struct amdgpu_ring *ring,
6843                                         uint32_t pipe, bool enable)
6844 {
6845         uint32_t val;
6846         uint32_t wcl_cs_reg;
6847
6848         val = enable ? 0x1 : mmSPI_WCL_PIPE_PERCENT_CS_DEFAULT;
6849
6850         switch (pipe) {
6851         case 0:
6852                 wcl_cs_reg = mmSPI_WCL_PIPE_PERCENT_CS0;
6853                 break;
6854         case 1:
6855                 wcl_cs_reg = mmSPI_WCL_PIPE_PERCENT_CS1;
6856                 break;
6857         case 2:
6858                 wcl_cs_reg = mmSPI_WCL_PIPE_PERCENT_CS2;
6859                 break;
6860         case 3:
6861                 wcl_cs_reg = mmSPI_WCL_PIPE_PERCENT_CS3;
6862                 break;
6863         default:
6864                 DRM_DEBUG("invalid pipe %d\n", pipe);
6865                 return;
6866         }
6867
6868         amdgpu_ring_emit_wreg(ring, wcl_cs_reg, val);
6869
6870 }
6871
6872 #define mmSPI_WCL_PIPE_PERCENT_GFX_DEFAULT      0x07ffffff
6873 static void gfx_v8_0_emit_wave_limit(struct amdgpu_ring *ring, bool enable)
6874 {
6875         struct amdgpu_device *adev = ring->adev;
6876         uint32_t val;
6877         int i;
6878
6879         /* mmSPI_WCL_PIPE_PERCENT_GFX is 7 bit multiplier register to limit
6880          * number of gfx waves. Setting 5 bit will make sure gfx only gets
6881          * around 25% of gpu resources.
6882          */
6883         val = enable ? 0x1f : mmSPI_WCL_PIPE_PERCENT_GFX_DEFAULT;
6884         amdgpu_ring_emit_wreg(ring, mmSPI_WCL_PIPE_PERCENT_GFX, val);
6885
6886         /* Restrict waves for normal/low priority compute queues as well
6887          * to get best QoS for high priority compute jobs.
6888          *
6889          * amdgpu controls only 1st ME(0-3 CS pipes).
6890          */
6891         for (i = 0; i < adev->gfx.mec.num_pipe_per_mec; i++) {
6892                 if (i != ring->pipe)
6893                         gfx_v8_0_emit_wave_limit_cs(ring, i, enable);
6894
6895         }
6896
6897 }
6898
6899 static const struct amd_ip_funcs gfx_v8_0_ip_funcs = {
6900         .name = "gfx_v8_0",
6901         .early_init = gfx_v8_0_early_init,
6902         .late_init = gfx_v8_0_late_init,
6903         .sw_init = gfx_v8_0_sw_init,
6904         .sw_fini = gfx_v8_0_sw_fini,
6905         .hw_init = gfx_v8_0_hw_init,
6906         .hw_fini = gfx_v8_0_hw_fini,
6907         .suspend = gfx_v8_0_suspend,
6908         .resume = gfx_v8_0_resume,
6909         .is_idle = gfx_v8_0_is_idle,
6910         .wait_for_idle = gfx_v8_0_wait_for_idle,
6911         .check_soft_reset = gfx_v8_0_check_soft_reset,
6912         .pre_soft_reset = gfx_v8_0_pre_soft_reset,
6913         .soft_reset = gfx_v8_0_soft_reset,
6914         .post_soft_reset = gfx_v8_0_post_soft_reset,
6915         .set_clockgating_state = gfx_v8_0_set_clockgating_state,
6916         .set_powergating_state = gfx_v8_0_set_powergating_state,
6917         .get_clockgating_state = gfx_v8_0_get_clockgating_state,
6918 };
6919
6920 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
6921         .type = AMDGPU_RING_TYPE_GFX,
6922         .align_mask = 0xff,
6923         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6924         .support_64bit_ptrs = false,
6925         .get_rptr = gfx_v8_0_ring_get_rptr,
6926         .get_wptr = gfx_v8_0_ring_get_wptr_gfx,
6927         .set_wptr = gfx_v8_0_ring_set_wptr_gfx,
6928         .emit_frame_size = /* maximum 215dw if count 16 IBs in */
6929                 5 +  /* COND_EXEC */
6930                 7 +  /* PIPELINE_SYNC */
6931                 VI_FLUSH_GPU_TLB_NUM_WREG * 5 + 9 + /* VM_FLUSH */
6932                 12 +  /* FENCE for VM_FLUSH */
6933                 20 + /* GDS switch */
6934                 4 + /* double SWITCH_BUFFER,
6935                        the first COND_EXEC jump to the place just
6936                            prior to this double SWITCH_BUFFER  */
6937                 5 + /* COND_EXEC */
6938                 7 +      /*     HDP_flush */
6939                 4 +      /*     VGT_flush */
6940                 14 + /* CE_META */
6941                 31 + /* DE_META */
6942                 3 + /* CNTX_CTRL */
6943                 5 + /* HDP_INVL */
6944                 12 + 12 + /* FENCE x2 */
6945                 2 + /* SWITCH_BUFFER */
6946                 5, /* SURFACE_SYNC */
6947         .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_gfx */
6948         .emit_ib = gfx_v8_0_ring_emit_ib_gfx,
6949         .emit_fence = gfx_v8_0_ring_emit_fence_gfx,
6950         .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
6951         .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
6952         .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
6953         .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
6954         .test_ring = gfx_v8_0_ring_test_ring,
6955         .test_ib = gfx_v8_0_ring_test_ib,
6956         .insert_nop = amdgpu_ring_insert_nop,
6957         .pad_ib = amdgpu_ring_generic_pad_ib,
6958         .emit_switch_buffer = gfx_v8_ring_emit_sb,
6959         .emit_cntxcntl = gfx_v8_ring_emit_cntxcntl,
6960         .init_cond_exec = gfx_v8_0_ring_emit_init_cond_exec,
6961         .patch_cond_exec = gfx_v8_0_ring_emit_patch_cond_exec,
6962         .emit_wreg = gfx_v8_0_ring_emit_wreg,
6963         .soft_recovery = gfx_v8_0_ring_soft_recovery,
6964         .emit_mem_sync = gfx_v8_0_emit_mem_sync,
6965 };
6966
6967 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
6968         .type = AMDGPU_RING_TYPE_COMPUTE,
6969         .align_mask = 0xff,
6970         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6971         .support_64bit_ptrs = false,
6972         .get_rptr = gfx_v8_0_ring_get_rptr,
6973         .get_wptr = gfx_v8_0_ring_get_wptr_compute,
6974         .set_wptr = gfx_v8_0_ring_set_wptr_compute,
6975         .emit_frame_size =
6976                 20 + /* gfx_v8_0_ring_emit_gds_switch */
6977                 7 + /* gfx_v8_0_ring_emit_hdp_flush */
6978                 5 + /* hdp_invalidate */
6979                 7 + /* gfx_v8_0_ring_emit_pipeline_sync */
6980                 VI_FLUSH_GPU_TLB_NUM_WREG * 5 + 7 + /* gfx_v8_0_ring_emit_vm_flush */
6981                 7 + 7 + 7 + /* gfx_v8_0_ring_emit_fence_compute x3 for user fence, vm fence */
6982                 7 + /* gfx_v8_0_emit_mem_sync_compute */
6983                 5 + /* gfx_v8_0_emit_wave_limit for updating mmSPI_WCL_PIPE_PERCENT_GFX register */
6984                 15, /* for updating 3 mmSPI_WCL_PIPE_PERCENT_CS registers */
6985         .emit_ib_size = 7, /* gfx_v8_0_ring_emit_ib_compute */
6986         .emit_ib = gfx_v8_0_ring_emit_ib_compute,
6987         .emit_fence = gfx_v8_0_ring_emit_fence_compute,
6988         .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
6989         .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
6990         .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
6991         .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
6992         .test_ring = gfx_v8_0_ring_test_ring,
6993         .test_ib = gfx_v8_0_ring_test_ib,
6994         .insert_nop = amdgpu_ring_insert_nop,
6995         .pad_ib = amdgpu_ring_generic_pad_ib,
6996         .emit_wreg = gfx_v8_0_ring_emit_wreg,
6997         .emit_mem_sync = gfx_v8_0_emit_mem_sync_compute,
6998         .emit_wave_limit = gfx_v8_0_emit_wave_limit,
6999 };
7000
7001 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_kiq = {
7002         .type = AMDGPU_RING_TYPE_KIQ,
7003         .align_mask = 0xff,
7004         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
7005         .support_64bit_ptrs = false,
7006         .get_rptr = gfx_v8_0_ring_get_rptr,
7007         .get_wptr = gfx_v8_0_ring_get_wptr_compute,
7008         .set_wptr = gfx_v8_0_ring_set_wptr_compute,
7009         .emit_frame_size =
7010                 20 + /* gfx_v8_0_ring_emit_gds_switch */
7011                 7 + /* gfx_v8_0_ring_emit_hdp_flush */
7012                 5 + /* hdp_invalidate */
7013                 7 + /* gfx_v8_0_ring_emit_pipeline_sync */
7014                 17 + /* gfx_v8_0_ring_emit_vm_flush */
7015                 7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_kiq x3 for user fence, vm fence */
7016         .emit_ib_size = 7, /* gfx_v8_0_ring_emit_ib_compute */
7017         .emit_fence = gfx_v8_0_ring_emit_fence_kiq,
7018         .test_ring = gfx_v8_0_ring_test_ring,
7019         .insert_nop = amdgpu_ring_insert_nop,
7020         .pad_ib = amdgpu_ring_generic_pad_ib,
7021         .emit_rreg = gfx_v8_0_ring_emit_rreg,
7022         .emit_wreg = gfx_v8_0_ring_emit_wreg,
7023 };
7024
7025 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev)
7026 {
7027         int i;
7028
7029         adev->gfx.kiq.ring.funcs = &gfx_v8_0_ring_funcs_kiq;
7030
7031         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
7032                 adev->gfx.gfx_ring[i].funcs = &gfx_v8_0_ring_funcs_gfx;
7033
7034         for (i = 0; i < adev->gfx.num_compute_rings; i++)
7035                 adev->gfx.compute_ring[i].funcs = &gfx_v8_0_ring_funcs_compute;
7036 }
7037
7038 static const struct amdgpu_irq_src_funcs gfx_v8_0_eop_irq_funcs = {
7039         .set = gfx_v8_0_set_eop_interrupt_state,
7040         .process = gfx_v8_0_eop_irq,
7041 };
7042
7043 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_reg_irq_funcs = {
7044         .set = gfx_v8_0_set_priv_reg_fault_state,
7045         .process = gfx_v8_0_priv_reg_irq,
7046 };
7047
7048 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_inst_irq_funcs = {
7049         .set = gfx_v8_0_set_priv_inst_fault_state,
7050         .process = gfx_v8_0_priv_inst_irq,
7051 };
7052
7053 static const struct amdgpu_irq_src_funcs gfx_v8_0_cp_ecc_error_irq_funcs = {
7054         .set = gfx_v8_0_set_cp_ecc_int_state,
7055         .process = gfx_v8_0_cp_ecc_error_irq,
7056 };
7057
7058 static const struct amdgpu_irq_src_funcs gfx_v8_0_sq_irq_funcs = {
7059         .set = gfx_v8_0_set_sq_int_state,
7060         .process = gfx_v8_0_sq_irq,
7061 };
7062
7063 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev)
7064 {
7065         adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
7066         adev->gfx.eop_irq.funcs = &gfx_v8_0_eop_irq_funcs;
7067
7068         adev->gfx.priv_reg_irq.num_types = 1;
7069         adev->gfx.priv_reg_irq.funcs = &gfx_v8_0_priv_reg_irq_funcs;
7070
7071         adev->gfx.priv_inst_irq.num_types = 1;
7072         adev->gfx.priv_inst_irq.funcs = &gfx_v8_0_priv_inst_irq_funcs;
7073
7074         adev->gfx.cp_ecc_error_irq.num_types = 1;
7075         adev->gfx.cp_ecc_error_irq.funcs = &gfx_v8_0_cp_ecc_error_irq_funcs;
7076
7077         adev->gfx.sq_irq.num_types = 1;
7078         adev->gfx.sq_irq.funcs = &gfx_v8_0_sq_irq_funcs;
7079 }
7080
7081 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev)
7082 {
7083         adev->gfx.rlc.funcs = &iceland_rlc_funcs;
7084 }
7085
7086 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev)
7087 {
7088         /* init asci gds info */
7089         adev->gds.gds_size = RREG32(mmGDS_VMID0_SIZE);
7090         adev->gds.gws_size = 64;
7091         adev->gds.oa_size = 16;
7092         adev->gds.gds_compute_max_wave_id = RREG32(mmGDS_COMPUTE_MAX_WAVE_ID);
7093 }
7094
7095 static void gfx_v8_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
7096                                                  u32 bitmap)
7097 {
7098         u32 data;
7099
7100         if (!bitmap)
7101                 return;
7102
7103         data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
7104         data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
7105
7106         WREG32(mmGC_USER_SHADER_ARRAY_CONFIG, data);
7107 }
7108
7109 static u32 gfx_v8_0_get_cu_active_bitmap(struct amdgpu_device *adev)
7110 {
7111         u32 data, mask;
7112
7113         data =  RREG32(mmCC_GC_SHADER_ARRAY_CONFIG) |
7114                 RREG32(mmGC_USER_SHADER_ARRAY_CONFIG);
7115
7116         mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh);
7117
7118         return ~REG_GET_FIELD(data, CC_GC_SHADER_ARRAY_CONFIG, INACTIVE_CUS) & mask;
7119 }
7120
7121 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev)
7122 {
7123         int i, j, k, counter, active_cu_number = 0;
7124         u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
7125         struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
7126         unsigned disable_masks[4 * 2];
7127         u32 ao_cu_num;
7128
7129         memset(cu_info, 0, sizeof(*cu_info));
7130
7131         if (adev->flags & AMD_IS_APU)
7132                 ao_cu_num = 2;
7133         else
7134                 ao_cu_num = adev->gfx.config.max_cu_per_sh;
7135
7136         amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2);
7137
7138         mutex_lock(&adev->grbm_idx_mutex);
7139         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
7140                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
7141                         mask = 1;
7142                         ao_bitmap = 0;
7143                         counter = 0;
7144                         gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
7145                         if (i < 4 && j < 2)
7146                                 gfx_v8_0_set_user_cu_inactive_bitmap(
7147                                         adev, disable_masks[i * 2 + j]);
7148                         bitmap = gfx_v8_0_get_cu_active_bitmap(adev);
7149                         cu_info->bitmap[i][j] = bitmap;
7150
7151                         for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
7152                                 if (bitmap & mask) {
7153                                         if (counter < ao_cu_num)
7154                                                 ao_bitmap |= mask;
7155                                         counter ++;
7156                                 }
7157                                 mask <<= 1;
7158                         }
7159                         active_cu_number += counter;
7160                         if (i < 2 && j < 2)
7161                                 ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
7162                         cu_info->ao_cu_bitmap[i][j] = ao_bitmap;
7163                 }
7164         }
7165         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
7166         mutex_unlock(&adev->grbm_idx_mutex);
7167
7168         cu_info->number = active_cu_number;
7169         cu_info->ao_cu_mask = ao_cu_mask;
7170         cu_info->simd_per_cu = NUM_SIMD_PER_CU;
7171         cu_info->max_waves_per_simd = 10;
7172         cu_info->max_scratch_slots_per_cu = 32;
7173         cu_info->wave_front_size = 64;
7174         cu_info->lds_size = 64;
7175 }
7176
7177 const struct amdgpu_ip_block_version gfx_v8_0_ip_block =
7178 {
7179         .type = AMD_IP_BLOCK_TYPE_GFX,
7180         .major = 8,
7181         .minor = 0,
7182         .rev = 0,
7183         .funcs = &gfx_v8_0_ip_funcs,
7184 };
7185
7186 const struct amdgpu_ip_block_version gfx_v8_1_ip_block =
7187 {
7188         .type = AMD_IP_BLOCK_TYPE_GFX,
7189         .major = 8,
7190         .minor = 1,
7191         .rev = 0,
7192         .funcs = &gfx_v8_0_ip_funcs,
7193 };
7194
7195 static void gfx_v8_0_ring_emit_ce_meta(struct amdgpu_ring *ring)
7196 {
7197         uint64_t ce_payload_addr;
7198         int cnt_ce;
7199         union {
7200                 struct vi_ce_ib_state regular;
7201                 struct vi_ce_ib_state_chained_ib chained;
7202         } ce_payload = {};
7203
7204         if (ring->adev->virt.chained_ib_support) {
7205                 ce_payload_addr = amdgpu_csa_vaddr(ring->adev) +
7206                         offsetof(struct vi_gfx_meta_data_chained_ib, ce_payload);
7207                 cnt_ce = (sizeof(ce_payload.chained) >> 2) + 4 - 2;
7208         } else {
7209                 ce_payload_addr = amdgpu_csa_vaddr(ring->adev) +
7210                         offsetof(struct vi_gfx_meta_data, ce_payload);
7211                 cnt_ce = (sizeof(ce_payload.regular) >> 2) + 4 - 2;
7212         }
7213
7214         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_ce));
7215         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
7216                                 WRITE_DATA_DST_SEL(8) |
7217                                 WR_CONFIRM) |
7218                                 WRITE_DATA_CACHE_POLICY(0));
7219         amdgpu_ring_write(ring, lower_32_bits(ce_payload_addr));
7220         amdgpu_ring_write(ring, upper_32_bits(ce_payload_addr));
7221         amdgpu_ring_write_multiple(ring, (void *)&ce_payload, cnt_ce - 2);
7222 }
7223
7224 static void gfx_v8_0_ring_emit_de_meta(struct amdgpu_ring *ring)
7225 {
7226         uint64_t de_payload_addr, gds_addr, csa_addr;
7227         int cnt_de;
7228         union {
7229                 struct vi_de_ib_state regular;
7230                 struct vi_de_ib_state_chained_ib chained;
7231         } de_payload = {};
7232
7233         csa_addr = amdgpu_csa_vaddr(ring->adev);
7234         gds_addr = csa_addr + 4096;
7235         if (ring->adev->virt.chained_ib_support) {
7236                 de_payload.chained.gds_backup_addrlo = lower_32_bits(gds_addr);
7237                 de_payload.chained.gds_backup_addrhi = upper_32_bits(gds_addr);
7238                 de_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data_chained_ib, de_payload);
7239                 cnt_de = (sizeof(de_payload.chained) >> 2) + 4 - 2;
7240         } else {
7241                 de_payload.regular.gds_backup_addrlo = lower_32_bits(gds_addr);
7242                 de_payload.regular.gds_backup_addrhi = upper_32_bits(gds_addr);
7243                 de_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data, de_payload);
7244                 cnt_de = (sizeof(de_payload.regular) >> 2) + 4 - 2;
7245         }
7246
7247         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_de));
7248         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
7249                                 WRITE_DATA_DST_SEL(8) |
7250                                 WR_CONFIRM) |
7251                                 WRITE_DATA_CACHE_POLICY(0));
7252         amdgpu_ring_write(ring, lower_32_bits(de_payload_addr));
7253         amdgpu_ring_write(ring, upper_32_bits(de_payload_addr));
7254         amdgpu_ring_write_multiple(ring, (void *)&de_payload, cnt_de - 2);
7255 }
This page took 0.467293 seconds and 4 git commands to generate.