]> Git Repo - linux.git/blob - drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
drm/amd: Load MES microcode during early_init
[linux.git] / drivers / gpu / drm / amd / amdgpu / gfx_v8_0.c
1 /*
2  * Copyright 2014 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23
24 #include <linux/delay.h>
25 #include <linux/kernel.h>
26 #include <linux/firmware.h>
27 #include <linux/module.h>
28 #include <linux/pci.h>
29
30 #include "amdgpu.h"
31 #include "amdgpu_gfx.h"
32 #include "amdgpu_ring.h"
33 #include "vi.h"
34 #include "vi_structs.h"
35 #include "vid.h"
36 #include "amdgpu_ucode.h"
37 #include "amdgpu_atombios.h"
38 #include "atombios_i2c.h"
39 #include "clearstate_vi.h"
40
41 #include "gmc/gmc_8_2_d.h"
42 #include "gmc/gmc_8_2_sh_mask.h"
43
44 #include "oss/oss_3_0_d.h"
45 #include "oss/oss_3_0_sh_mask.h"
46
47 #include "bif/bif_5_0_d.h"
48 #include "bif/bif_5_0_sh_mask.h"
49 #include "gca/gfx_8_0_d.h"
50 #include "gca/gfx_8_0_enum.h"
51 #include "gca/gfx_8_0_sh_mask.h"
52
53 #include "dce/dce_10_0_d.h"
54 #include "dce/dce_10_0_sh_mask.h"
55
56 #include "smu/smu_7_1_3_d.h"
57
58 #include "ivsrcid/ivsrcid_vislands30.h"
59
60 #define GFX8_NUM_GFX_RINGS     1
61 #define GFX8_MEC_HPD_SIZE 4096
62
63 #define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001
64 #define CARRIZO_GB_ADDR_CONFIG_GOLDEN 0x22010001
65 #define POLARIS11_GB_ADDR_CONFIG_GOLDEN 0x22011002
66 #define TONGA_GB_ADDR_CONFIG_GOLDEN 0x22011003
67
68 #define ARRAY_MODE(x)                                   ((x) << GB_TILE_MODE0__ARRAY_MODE__SHIFT)
69 #define PIPE_CONFIG(x)                                  ((x) << GB_TILE_MODE0__PIPE_CONFIG__SHIFT)
70 #define TILE_SPLIT(x)                                   ((x) << GB_TILE_MODE0__TILE_SPLIT__SHIFT)
71 #define MICRO_TILE_MODE_NEW(x)                          ((x) << GB_TILE_MODE0__MICRO_TILE_MODE_NEW__SHIFT)
72 #define SAMPLE_SPLIT(x)                                 ((x) << GB_TILE_MODE0__SAMPLE_SPLIT__SHIFT)
73 #define BANK_WIDTH(x)                                   ((x) << GB_MACROTILE_MODE0__BANK_WIDTH__SHIFT)
74 #define BANK_HEIGHT(x)                                  ((x) << GB_MACROTILE_MODE0__BANK_HEIGHT__SHIFT)
75 #define MACRO_TILE_ASPECT(x)                            ((x) << GB_MACROTILE_MODE0__MACRO_TILE_ASPECT__SHIFT)
76 #define NUM_BANKS(x)                                    ((x) << GB_MACROTILE_MODE0__NUM_BANKS__SHIFT)
77
78 #define RLC_CGTT_MGCG_OVERRIDE__CPF_MASK            0x00000001L
79 #define RLC_CGTT_MGCG_OVERRIDE__RLC_MASK            0x00000002L
80 #define RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK           0x00000004L
81 #define RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK           0x00000008L
82 #define RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK           0x00000010L
83 #define RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK           0x00000020L
84
85 /* BPM SERDES CMD */
86 #define SET_BPM_SERDES_CMD    1
87 #define CLE_BPM_SERDES_CMD    0
88
89 /* BPM Register Address*/
90 enum {
91         BPM_REG_CGLS_EN = 0,        /* Enable/Disable CGLS */
92         BPM_REG_CGLS_ON,            /* ON/OFF CGLS: shall be controlled by RLC FW */
93         BPM_REG_CGCG_OVERRIDE,      /* Set/Clear CGCG Override */
94         BPM_REG_MGCG_OVERRIDE,      /* Set/Clear MGCG Override */
95         BPM_REG_FGCG_OVERRIDE,      /* Set/Clear FGCG Override */
96         BPM_REG_FGCG_MAX
97 };
98
99 #define RLC_FormatDirectRegListLength        14
100
101 MODULE_FIRMWARE("amdgpu/carrizo_ce.bin");
102 MODULE_FIRMWARE("amdgpu/carrizo_pfp.bin");
103 MODULE_FIRMWARE("amdgpu/carrizo_me.bin");
104 MODULE_FIRMWARE("amdgpu/carrizo_mec.bin");
105 MODULE_FIRMWARE("amdgpu/carrizo_mec2.bin");
106 MODULE_FIRMWARE("amdgpu/carrizo_rlc.bin");
107
108 MODULE_FIRMWARE("amdgpu/stoney_ce.bin");
109 MODULE_FIRMWARE("amdgpu/stoney_pfp.bin");
110 MODULE_FIRMWARE("amdgpu/stoney_me.bin");
111 MODULE_FIRMWARE("amdgpu/stoney_mec.bin");
112 MODULE_FIRMWARE("amdgpu/stoney_rlc.bin");
113
114 MODULE_FIRMWARE("amdgpu/tonga_ce.bin");
115 MODULE_FIRMWARE("amdgpu/tonga_pfp.bin");
116 MODULE_FIRMWARE("amdgpu/tonga_me.bin");
117 MODULE_FIRMWARE("amdgpu/tonga_mec.bin");
118 MODULE_FIRMWARE("amdgpu/tonga_mec2.bin");
119 MODULE_FIRMWARE("amdgpu/tonga_rlc.bin");
120
121 MODULE_FIRMWARE("amdgpu/topaz_ce.bin");
122 MODULE_FIRMWARE("amdgpu/topaz_pfp.bin");
123 MODULE_FIRMWARE("amdgpu/topaz_me.bin");
124 MODULE_FIRMWARE("amdgpu/topaz_mec.bin");
125 MODULE_FIRMWARE("amdgpu/topaz_rlc.bin");
126
127 MODULE_FIRMWARE("amdgpu/fiji_ce.bin");
128 MODULE_FIRMWARE("amdgpu/fiji_pfp.bin");
129 MODULE_FIRMWARE("amdgpu/fiji_me.bin");
130 MODULE_FIRMWARE("amdgpu/fiji_mec.bin");
131 MODULE_FIRMWARE("amdgpu/fiji_mec2.bin");
132 MODULE_FIRMWARE("amdgpu/fiji_rlc.bin");
133
134 MODULE_FIRMWARE("amdgpu/polaris10_ce.bin");
135 MODULE_FIRMWARE("amdgpu/polaris10_ce_2.bin");
136 MODULE_FIRMWARE("amdgpu/polaris10_pfp.bin");
137 MODULE_FIRMWARE("amdgpu/polaris10_pfp_2.bin");
138 MODULE_FIRMWARE("amdgpu/polaris10_me.bin");
139 MODULE_FIRMWARE("amdgpu/polaris10_me_2.bin");
140 MODULE_FIRMWARE("amdgpu/polaris10_mec.bin");
141 MODULE_FIRMWARE("amdgpu/polaris10_mec_2.bin");
142 MODULE_FIRMWARE("amdgpu/polaris10_mec2.bin");
143 MODULE_FIRMWARE("amdgpu/polaris10_mec2_2.bin");
144 MODULE_FIRMWARE("amdgpu/polaris10_rlc.bin");
145
146 MODULE_FIRMWARE("amdgpu/polaris11_ce.bin");
147 MODULE_FIRMWARE("amdgpu/polaris11_ce_2.bin");
148 MODULE_FIRMWARE("amdgpu/polaris11_pfp.bin");
149 MODULE_FIRMWARE("amdgpu/polaris11_pfp_2.bin");
150 MODULE_FIRMWARE("amdgpu/polaris11_me.bin");
151 MODULE_FIRMWARE("amdgpu/polaris11_me_2.bin");
152 MODULE_FIRMWARE("amdgpu/polaris11_mec.bin");
153 MODULE_FIRMWARE("amdgpu/polaris11_mec_2.bin");
154 MODULE_FIRMWARE("amdgpu/polaris11_mec2.bin");
155 MODULE_FIRMWARE("amdgpu/polaris11_mec2_2.bin");
156 MODULE_FIRMWARE("amdgpu/polaris11_rlc.bin");
157
158 MODULE_FIRMWARE("amdgpu/polaris12_ce.bin");
159 MODULE_FIRMWARE("amdgpu/polaris12_ce_2.bin");
160 MODULE_FIRMWARE("amdgpu/polaris12_pfp.bin");
161 MODULE_FIRMWARE("amdgpu/polaris12_pfp_2.bin");
162 MODULE_FIRMWARE("amdgpu/polaris12_me.bin");
163 MODULE_FIRMWARE("amdgpu/polaris12_me_2.bin");
164 MODULE_FIRMWARE("amdgpu/polaris12_mec.bin");
165 MODULE_FIRMWARE("amdgpu/polaris12_mec_2.bin");
166 MODULE_FIRMWARE("amdgpu/polaris12_mec2.bin");
167 MODULE_FIRMWARE("amdgpu/polaris12_mec2_2.bin");
168 MODULE_FIRMWARE("amdgpu/polaris12_rlc.bin");
169
170 MODULE_FIRMWARE("amdgpu/vegam_ce.bin");
171 MODULE_FIRMWARE("amdgpu/vegam_pfp.bin");
172 MODULE_FIRMWARE("amdgpu/vegam_me.bin");
173 MODULE_FIRMWARE("amdgpu/vegam_mec.bin");
174 MODULE_FIRMWARE("amdgpu/vegam_mec2.bin");
175 MODULE_FIRMWARE("amdgpu/vegam_rlc.bin");
176
177 static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] =
178 {
179         {mmGDS_VMID0_BASE, mmGDS_VMID0_SIZE, mmGDS_GWS_VMID0, mmGDS_OA_VMID0},
180         {mmGDS_VMID1_BASE, mmGDS_VMID1_SIZE, mmGDS_GWS_VMID1, mmGDS_OA_VMID1},
181         {mmGDS_VMID2_BASE, mmGDS_VMID2_SIZE, mmGDS_GWS_VMID2, mmGDS_OA_VMID2},
182         {mmGDS_VMID3_BASE, mmGDS_VMID3_SIZE, mmGDS_GWS_VMID3, mmGDS_OA_VMID3},
183         {mmGDS_VMID4_BASE, mmGDS_VMID4_SIZE, mmGDS_GWS_VMID4, mmGDS_OA_VMID4},
184         {mmGDS_VMID5_BASE, mmGDS_VMID5_SIZE, mmGDS_GWS_VMID5, mmGDS_OA_VMID5},
185         {mmGDS_VMID6_BASE, mmGDS_VMID6_SIZE, mmGDS_GWS_VMID6, mmGDS_OA_VMID6},
186         {mmGDS_VMID7_BASE, mmGDS_VMID7_SIZE, mmGDS_GWS_VMID7, mmGDS_OA_VMID7},
187         {mmGDS_VMID8_BASE, mmGDS_VMID8_SIZE, mmGDS_GWS_VMID8, mmGDS_OA_VMID8},
188         {mmGDS_VMID9_BASE, mmGDS_VMID9_SIZE, mmGDS_GWS_VMID9, mmGDS_OA_VMID9},
189         {mmGDS_VMID10_BASE, mmGDS_VMID10_SIZE, mmGDS_GWS_VMID10, mmGDS_OA_VMID10},
190         {mmGDS_VMID11_BASE, mmGDS_VMID11_SIZE, mmGDS_GWS_VMID11, mmGDS_OA_VMID11},
191         {mmGDS_VMID12_BASE, mmGDS_VMID12_SIZE, mmGDS_GWS_VMID12, mmGDS_OA_VMID12},
192         {mmGDS_VMID13_BASE, mmGDS_VMID13_SIZE, mmGDS_GWS_VMID13, mmGDS_OA_VMID13},
193         {mmGDS_VMID14_BASE, mmGDS_VMID14_SIZE, mmGDS_GWS_VMID14, mmGDS_OA_VMID14},
194         {mmGDS_VMID15_BASE, mmGDS_VMID15_SIZE, mmGDS_GWS_VMID15, mmGDS_OA_VMID15}
195 };
196
197 static const u32 golden_settings_tonga_a11[] =
198 {
199         mmCB_HW_CONTROL, 0xfffdf3cf, 0x00007208,
200         mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
201         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
202         mmGB_GPU_ID, 0x0000000f, 0x00000000,
203         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
204         mmPA_SC_FIFO_DEPTH_CNTL, 0x000003ff, 0x000000fc,
205         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
206         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
207         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
208         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
209         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
210         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
211         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000002fb,
212         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x0000543b,
213         mmTCP_CHAN_STEER_LO, 0xffffffff, 0xa9210876,
214         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
215 };
216
217 static const u32 tonga_golden_common_all[] =
218 {
219         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
220         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
221         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
222         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
223         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
224         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
225         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
226         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
227 };
228
229 static const u32 tonga_mgcg_cgcg_init[] =
230 {
231         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
232         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
233         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
234         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
235         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
236         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
237         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
238         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
239         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
240         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
241         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
242         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
243         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
244         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
245         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
246         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
247         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
248         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
249         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
250         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
251         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
252         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
253         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
254         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
255         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
256         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
257         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
258         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
259         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
260         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
261         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
262         mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
263         mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
264         mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
265         mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
266         mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
267         mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
268         mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
269         mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
270         mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
271         mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
272         mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
273         mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
274         mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
275         mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
276         mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
277         mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
278         mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
279         mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
280         mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
281         mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
282         mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
283         mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
284         mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
285         mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
286         mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
287         mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
288         mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
289         mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
290         mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
291         mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
292         mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
293         mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
294         mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
295         mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
296         mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
297         mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
298         mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
299         mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
300         mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
301         mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
302         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
303         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
304         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
305         mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
306 };
307
308 static const u32 golden_settings_vegam_a11[] =
309 {
310         mmCB_HW_CONTROL, 0x0001f3cf, 0x00007208,
311         mmCB_HW_CONTROL_2, 0x0f000000, 0x0d000000,
312         mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
313         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
314         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
315         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
316         mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x3a00161a,
317         mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002e,
318         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
319         mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
320         mmSQ_CONFIG, 0x07f80000, 0x01180000,
321         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
322         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
323         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7,
324         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
325         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x32761054,
326         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
327 };
328
329 static const u32 vegam_golden_common_all[] =
330 {
331         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
332         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
333         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
334         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
335         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
336         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
337 };
338
339 static const u32 golden_settings_polaris11_a11[] =
340 {
341         mmCB_HW_CONTROL, 0x0000f3cf, 0x00007208,
342         mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
343         mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
344         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
345         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
346         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
347         mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
348         mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
349         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
350         mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
351         mmSQ_CONFIG, 0x07f80000, 0x01180000,
352         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
353         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
354         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f3,
355         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
356         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003210,
357         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
358 };
359
360 static const u32 polaris11_golden_common_all[] =
361 {
362         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
363         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011002,
364         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
365         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
366         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
367         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
368 };
369
370 static const u32 golden_settings_polaris10_a11[] =
371 {
372         mmATC_MISC_CG, 0x000c0fc0, 0x000c0200,
373         mmCB_HW_CONTROL, 0x0001f3cf, 0x00007208,
374         mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
375         mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
376         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
377         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
378         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
379         mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
380         mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002a,
381         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
382         mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
383         mmSQ_CONFIG, 0x07f80000, 0x07180000,
384         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
385         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
386         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7,
387         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
388         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
389 };
390
391 static const u32 polaris10_golden_common_all[] =
392 {
393         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
394         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
395         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
396         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
397         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
398         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
399         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
400         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
401 };
402
403 static const u32 fiji_golden_common_all[] =
404 {
405         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
406         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x3a00161a,
407         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002e,
408         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
409         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
410         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
411         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
412         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
413         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
414         mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x00000009,
415 };
416
417 static const u32 golden_settings_fiji_a10[] =
418 {
419         mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
420         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
421         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
422         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
423         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
424         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
425         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
426         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
427         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
428         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000ff,
429         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
430 };
431
432 static const u32 fiji_mgcg_cgcg_init[] =
433 {
434         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
435         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
436         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
437         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
438         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
439         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
440         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
441         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
442         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
443         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
444         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
445         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
446         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
447         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
448         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
449         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
450         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
451         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
452         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
453         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
454         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
455         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
456         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
457         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
458         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
459         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
460         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
461         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
462         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
463         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
464         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
465         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
466         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
467         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
468         mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
469 };
470
471 static const u32 golden_settings_iceland_a11[] =
472 {
473         mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
474         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
475         mmDB_DEBUG3, 0xc0000000, 0xc0000000,
476         mmGB_GPU_ID, 0x0000000f, 0x00000000,
477         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
478         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
479         mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x00000002,
480         mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
481         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
482         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
483         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
484         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
485         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
486         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f1,
487         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
488         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010,
489 };
490
491 static const u32 iceland_golden_common_all[] =
492 {
493         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
494         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
495         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
496         mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
497         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
498         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
499         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
500         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
501 };
502
503 static const u32 iceland_mgcg_cgcg_init[] =
504 {
505         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
506         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
507         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
508         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
509         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0xc0000100,
510         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0xc0000100,
511         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0xc0000100,
512         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
513         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
514         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
515         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
516         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
517         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
518         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
519         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
520         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
521         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
522         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
523         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
524         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
525         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
526         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
527         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0xff000100,
528         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
529         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
530         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
531         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
532         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
533         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
534         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
535         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
536         mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
537         mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
538         mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
539         mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
540         mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
541         mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
542         mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
543         mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
544         mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
545         mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
546         mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
547         mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
548         mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
549         mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
550         mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
551         mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
552         mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
553         mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
554         mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
555         mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
556         mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
557         mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
558         mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
559         mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
560         mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
561         mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
562         mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
563         mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
564         mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
565         mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
566         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
567         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
568         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
569 };
570
571 static const u32 cz_golden_settings_a11[] =
572 {
573         mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
574         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
575         mmGB_GPU_ID, 0x0000000f, 0x00000000,
576         mmPA_SC_ENHANCE, 0xffffffff, 0x00000001,
577         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
578         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
579         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
580         mmTA_CNTL_AUX, 0x000f000f, 0x00010000,
581         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
582         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
583         mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f3,
584         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00001302
585 };
586
587 static const u32 cz_golden_common_all[] =
588 {
589         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
590         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
591         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
592         mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
593         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
594         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
595         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
596         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
597 };
598
599 static const u32 cz_mgcg_cgcg_init[] =
600 {
601         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
602         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
603         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
604         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
605         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
606         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
607         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x00000100,
608         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
609         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
610         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
611         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
612         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
613         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
614         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
615         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
616         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
617         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
618         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
619         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
620         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
621         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
622         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
623         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
624         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
625         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
626         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
627         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
628         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
629         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
630         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
631         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
632         mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
633         mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
634         mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
635         mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
636         mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
637         mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
638         mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
639         mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
640         mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
641         mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
642         mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
643         mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
644         mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
645         mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
646         mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
647         mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
648         mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
649         mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
650         mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
651         mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
652         mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
653         mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
654         mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
655         mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
656         mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
657         mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
658         mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
659         mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
660         mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
661         mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
662         mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
663         mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
664         mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
665         mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
666         mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
667         mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
668         mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
669         mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
670         mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
671         mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
672         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
673         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
674         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
675         mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
676 };
677
678 static const u32 stoney_golden_settings_a11[] =
679 {
680         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
681         mmGB_GPU_ID, 0x0000000f, 0x00000000,
682         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
683         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
684         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
685         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
686         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
687         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
688         mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f1,
689         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x10101010,
690 };
691
692 static const u32 stoney_golden_common_all[] =
693 {
694         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
695         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000000,
696         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
697         mmGB_ADDR_CONFIG, 0xffffffff, 0x12010001,
698         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
699         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
700         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
701         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
702 };
703
704 static const u32 stoney_mgcg_cgcg_init[] =
705 {
706         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
707         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
708         mmCP_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
709         mmRLC_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
710         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96940200,
711 };
712
713
714 static const char * const sq_edc_source_names[] = {
715         "SQ_EDC_INFO_SOURCE_INVALID: No EDC error has occurred",
716         "SQ_EDC_INFO_SOURCE_INST: EDC source is Instruction Fetch",
717         "SQ_EDC_INFO_SOURCE_SGPR: EDC source is SGPR or SQC data return",
718         "SQ_EDC_INFO_SOURCE_VGPR: EDC source is VGPR",
719         "SQ_EDC_INFO_SOURCE_LDS: EDC source is LDS",
720         "SQ_EDC_INFO_SOURCE_GDS: EDC source is GDS",
721         "SQ_EDC_INFO_SOURCE_TA: EDC source is TA",
722 };
723
724 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev);
725 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev);
726 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev);
727 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev);
728 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev);
729 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev);
730 static void gfx_v8_0_ring_emit_ce_meta(struct amdgpu_ring *ring);
731 static void gfx_v8_0_ring_emit_de_meta(struct amdgpu_ring *ring);
732
733 #define CG_ACLK_CNTL__ACLK_DIVIDER_MASK                    0x0000007fL
734 #define CG_ACLK_CNTL__ACLK_DIVIDER__SHIFT                  0x00000000L
735
736 static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev)
737 {
738         uint32_t data;
739
740         switch (adev->asic_type) {
741         case CHIP_TOPAZ:
742                 amdgpu_device_program_register_sequence(adev,
743                                                         iceland_mgcg_cgcg_init,
744                                                         ARRAY_SIZE(iceland_mgcg_cgcg_init));
745                 amdgpu_device_program_register_sequence(adev,
746                                                         golden_settings_iceland_a11,
747                                                         ARRAY_SIZE(golden_settings_iceland_a11));
748                 amdgpu_device_program_register_sequence(adev,
749                                                         iceland_golden_common_all,
750                                                         ARRAY_SIZE(iceland_golden_common_all));
751                 break;
752         case CHIP_FIJI:
753                 amdgpu_device_program_register_sequence(adev,
754                                                         fiji_mgcg_cgcg_init,
755                                                         ARRAY_SIZE(fiji_mgcg_cgcg_init));
756                 amdgpu_device_program_register_sequence(adev,
757                                                         golden_settings_fiji_a10,
758                                                         ARRAY_SIZE(golden_settings_fiji_a10));
759                 amdgpu_device_program_register_sequence(adev,
760                                                         fiji_golden_common_all,
761                                                         ARRAY_SIZE(fiji_golden_common_all));
762                 break;
763
764         case CHIP_TONGA:
765                 amdgpu_device_program_register_sequence(adev,
766                                                         tonga_mgcg_cgcg_init,
767                                                         ARRAY_SIZE(tonga_mgcg_cgcg_init));
768                 amdgpu_device_program_register_sequence(adev,
769                                                         golden_settings_tonga_a11,
770                                                         ARRAY_SIZE(golden_settings_tonga_a11));
771                 amdgpu_device_program_register_sequence(adev,
772                                                         tonga_golden_common_all,
773                                                         ARRAY_SIZE(tonga_golden_common_all));
774                 break;
775         case CHIP_VEGAM:
776                 amdgpu_device_program_register_sequence(adev,
777                                                         golden_settings_vegam_a11,
778                                                         ARRAY_SIZE(golden_settings_vegam_a11));
779                 amdgpu_device_program_register_sequence(adev,
780                                                         vegam_golden_common_all,
781                                                         ARRAY_SIZE(vegam_golden_common_all));
782                 break;
783         case CHIP_POLARIS11:
784         case CHIP_POLARIS12:
785                 amdgpu_device_program_register_sequence(adev,
786                                                         golden_settings_polaris11_a11,
787                                                         ARRAY_SIZE(golden_settings_polaris11_a11));
788                 amdgpu_device_program_register_sequence(adev,
789                                                         polaris11_golden_common_all,
790                                                         ARRAY_SIZE(polaris11_golden_common_all));
791                 break;
792         case CHIP_POLARIS10:
793                 amdgpu_device_program_register_sequence(adev,
794                                                         golden_settings_polaris10_a11,
795                                                         ARRAY_SIZE(golden_settings_polaris10_a11));
796                 amdgpu_device_program_register_sequence(adev,
797                                                         polaris10_golden_common_all,
798                                                         ARRAY_SIZE(polaris10_golden_common_all));
799                 data = RREG32_SMC(ixCG_ACLK_CNTL);
800                 data &= ~CG_ACLK_CNTL__ACLK_DIVIDER_MASK;
801                 data |= 0x18 << CG_ACLK_CNTL__ACLK_DIVIDER__SHIFT;
802                 WREG32_SMC(ixCG_ACLK_CNTL, data);
803                 if ((adev->pdev->device == 0x67DF) && (adev->pdev->revision == 0xc7) &&
804                     ((adev->pdev->subsystem_device == 0xb37 && adev->pdev->subsystem_vendor == 0x1002) ||
805                      (adev->pdev->subsystem_device == 0x4a8 && adev->pdev->subsystem_vendor == 0x1043) ||
806                      (adev->pdev->subsystem_device == 0x9480 && adev->pdev->subsystem_vendor == 0x1680))) {
807                         amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1E, 0xDD);
808                         amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1F, 0xD0);
809                 }
810                 break;
811         case CHIP_CARRIZO:
812                 amdgpu_device_program_register_sequence(adev,
813                                                         cz_mgcg_cgcg_init,
814                                                         ARRAY_SIZE(cz_mgcg_cgcg_init));
815                 amdgpu_device_program_register_sequence(adev,
816                                                         cz_golden_settings_a11,
817                                                         ARRAY_SIZE(cz_golden_settings_a11));
818                 amdgpu_device_program_register_sequence(adev,
819                                                         cz_golden_common_all,
820                                                         ARRAY_SIZE(cz_golden_common_all));
821                 break;
822         case CHIP_STONEY:
823                 amdgpu_device_program_register_sequence(adev,
824                                                         stoney_mgcg_cgcg_init,
825                                                         ARRAY_SIZE(stoney_mgcg_cgcg_init));
826                 amdgpu_device_program_register_sequence(adev,
827                                                         stoney_golden_settings_a11,
828                                                         ARRAY_SIZE(stoney_golden_settings_a11));
829                 amdgpu_device_program_register_sequence(adev,
830                                                         stoney_golden_common_all,
831                                                         ARRAY_SIZE(stoney_golden_common_all));
832                 break;
833         default:
834                 break;
835         }
836 }
837
838 static int gfx_v8_0_ring_test_ring(struct amdgpu_ring *ring)
839 {
840         struct amdgpu_device *adev = ring->adev;
841         uint32_t tmp = 0;
842         unsigned i;
843         int r;
844
845         WREG32(mmSCRATCH_REG0, 0xCAFEDEAD);
846         r = amdgpu_ring_alloc(ring, 3);
847         if (r)
848                 return r;
849
850         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
851         amdgpu_ring_write(ring, mmSCRATCH_REG0 - PACKET3_SET_UCONFIG_REG_START);
852         amdgpu_ring_write(ring, 0xDEADBEEF);
853         amdgpu_ring_commit(ring);
854
855         for (i = 0; i < adev->usec_timeout; i++) {
856                 tmp = RREG32(mmSCRATCH_REG0);
857                 if (tmp == 0xDEADBEEF)
858                         break;
859                 udelay(1);
860         }
861
862         if (i >= adev->usec_timeout)
863                 r = -ETIMEDOUT;
864
865         return r;
866 }
867
868 static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
869 {
870         struct amdgpu_device *adev = ring->adev;
871         struct amdgpu_ib ib;
872         struct dma_fence *f = NULL;
873
874         unsigned int index;
875         uint64_t gpu_addr;
876         uint32_t tmp;
877         long r;
878
879         r = amdgpu_device_wb_get(adev, &index);
880         if (r)
881                 return r;
882
883         gpu_addr = adev->wb.gpu_addr + (index * 4);
884         adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
885         memset(&ib, 0, sizeof(ib));
886         r = amdgpu_ib_get(adev, NULL, 16,
887                                         AMDGPU_IB_POOL_DIRECT, &ib);
888         if (r)
889                 goto err1;
890
891         ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3);
892         ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
893         ib.ptr[2] = lower_32_bits(gpu_addr);
894         ib.ptr[3] = upper_32_bits(gpu_addr);
895         ib.ptr[4] = 0xDEADBEEF;
896         ib.length_dw = 5;
897
898         r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
899         if (r)
900                 goto err2;
901
902         r = dma_fence_wait_timeout(f, false, timeout);
903         if (r == 0) {
904                 r = -ETIMEDOUT;
905                 goto err2;
906         } else if (r < 0) {
907                 goto err2;
908         }
909
910         tmp = adev->wb.wb[index];
911         if (tmp == 0xDEADBEEF)
912                 r = 0;
913         else
914                 r = -EINVAL;
915
916 err2:
917         amdgpu_ib_free(adev, &ib, NULL);
918         dma_fence_put(f);
919 err1:
920         amdgpu_device_wb_free(adev, index);
921         return r;
922 }
923
924
925 static void gfx_v8_0_free_microcode(struct amdgpu_device *adev)
926 {
927         release_firmware(adev->gfx.pfp_fw);
928         adev->gfx.pfp_fw = NULL;
929         release_firmware(adev->gfx.me_fw);
930         adev->gfx.me_fw = NULL;
931         release_firmware(adev->gfx.ce_fw);
932         adev->gfx.ce_fw = NULL;
933         release_firmware(adev->gfx.rlc_fw);
934         adev->gfx.rlc_fw = NULL;
935         release_firmware(adev->gfx.mec_fw);
936         adev->gfx.mec_fw = NULL;
937         if ((adev->asic_type != CHIP_STONEY) &&
938             (adev->asic_type != CHIP_TOPAZ))
939                 release_firmware(adev->gfx.mec2_fw);
940         adev->gfx.mec2_fw = NULL;
941
942         kfree(adev->gfx.rlc.register_list_format);
943 }
944
945 static int gfx_v8_0_init_microcode(struct amdgpu_device *adev)
946 {
947         const char *chip_name;
948         char fw_name[30];
949         int err;
950         struct amdgpu_firmware_info *info = NULL;
951         const struct common_firmware_header *header = NULL;
952         const struct gfx_firmware_header_v1_0 *cp_hdr;
953         const struct rlc_firmware_header_v2_0 *rlc_hdr;
954         unsigned int *tmp = NULL, i;
955
956         DRM_DEBUG("\n");
957
958         switch (adev->asic_type) {
959         case CHIP_TOPAZ:
960                 chip_name = "topaz";
961                 break;
962         case CHIP_TONGA:
963                 chip_name = "tonga";
964                 break;
965         case CHIP_CARRIZO:
966                 chip_name = "carrizo";
967                 break;
968         case CHIP_FIJI:
969                 chip_name = "fiji";
970                 break;
971         case CHIP_STONEY:
972                 chip_name = "stoney";
973                 break;
974         case CHIP_POLARIS10:
975                 chip_name = "polaris10";
976                 break;
977         case CHIP_POLARIS11:
978                 chip_name = "polaris11";
979                 break;
980         case CHIP_POLARIS12:
981                 chip_name = "polaris12";
982                 break;
983         case CHIP_VEGAM:
984                 chip_name = "vegam";
985                 break;
986         default:
987                 BUG();
988         }
989
990         if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
991                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp_2.bin", chip_name);
992                 err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
993                 if (err == -ENOENT) {
994                         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
995                         err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
996                 }
997         } else {
998                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
999                 err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
1000         }
1001         if (err)
1002                 goto out;
1003         err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
1004         if (err)
1005                 goto out;
1006         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
1007         adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1008         adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1009
1010         if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1011                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me_2.bin", chip_name);
1012                 err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
1013                 if (err == -ENOENT) {
1014                         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
1015                         err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
1016                 }
1017         } else {
1018                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
1019                 err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
1020         }
1021         if (err)
1022                 goto out;
1023         err = amdgpu_ucode_validate(adev->gfx.me_fw);
1024         if (err)
1025                 goto out;
1026         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
1027         adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1028
1029         adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1030
1031         if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1032                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce_2.bin", chip_name);
1033                 err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
1034                 if (err == -ENOENT) {
1035                         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
1036                         err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
1037                 }
1038         } else {
1039                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
1040                 err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
1041         }
1042         if (err)
1043                 goto out;
1044         err = amdgpu_ucode_validate(adev->gfx.ce_fw);
1045         if (err)
1046                 goto out;
1047         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
1048         adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1049         adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1050
1051         /*
1052          * Support for MCBP/Virtualization in combination with chained IBs is
1053          * formal released on feature version #46
1054          */
1055         if (adev->gfx.ce_feature_version >= 46 &&
1056             adev->gfx.pfp_feature_version >= 46) {
1057                 adev->virt.chained_ib_support = true;
1058                 DRM_INFO("Chained IB support enabled!\n");
1059         } else
1060                 adev->virt.chained_ib_support = false;
1061
1062         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
1063         err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
1064         if (err)
1065                 goto out;
1066         err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
1067         rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
1068         adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
1069         adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
1070
1071         adev->gfx.rlc.save_and_restore_offset =
1072                         le32_to_cpu(rlc_hdr->save_and_restore_offset);
1073         adev->gfx.rlc.clear_state_descriptor_offset =
1074                         le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
1075         adev->gfx.rlc.avail_scratch_ram_locations =
1076                         le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
1077         adev->gfx.rlc.reg_restore_list_size =
1078                         le32_to_cpu(rlc_hdr->reg_restore_list_size);
1079         adev->gfx.rlc.reg_list_format_start =
1080                         le32_to_cpu(rlc_hdr->reg_list_format_start);
1081         adev->gfx.rlc.reg_list_format_separate_start =
1082                         le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
1083         adev->gfx.rlc.starting_offsets_start =
1084                         le32_to_cpu(rlc_hdr->starting_offsets_start);
1085         adev->gfx.rlc.reg_list_format_size_bytes =
1086                         le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
1087         adev->gfx.rlc.reg_list_size_bytes =
1088                         le32_to_cpu(rlc_hdr->reg_list_size_bytes);
1089
1090         adev->gfx.rlc.register_list_format =
1091                         kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
1092                                         adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
1093
1094         if (!adev->gfx.rlc.register_list_format) {
1095                 err = -ENOMEM;
1096                 goto out;
1097         }
1098
1099         tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1100                         le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
1101         for (i = 0 ; i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2); i++)
1102                 adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]);
1103
1104         adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
1105
1106         tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1107                         le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
1108         for (i = 0 ; i < (adev->gfx.rlc.reg_list_size_bytes >> 2); i++)
1109                 adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
1110
1111         if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1112                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec_2.bin", chip_name);
1113                 err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1114                 if (err == -ENOENT) {
1115                         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
1116                         err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1117                 }
1118         } else {
1119                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
1120                 err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1121         }
1122         if (err)
1123                 goto out;
1124         err = amdgpu_ucode_validate(adev->gfx.mec_fw);
1125         if (err)
1126                 goto out;
1127         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1128         adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1129         adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1130
1131         if ((adev->asic_type != CHIP_STONEY) &&
1132             (adev->asic_type != CHIP_TOPAZ)) {
1133                 if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1134                         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2_2.bin", chip_name);
1135                         err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1136                         if (err == -ENOENT) {
1137                                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1138                                 err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1139                         }
1140                 } else {
1141                         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1142                         err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1143                 }
1144                 if (!err) {
1145                         err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
1146                         if (err)
1147                                 goto out;
1148                         cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1149                                 adev->gfx.mec2_fw->data;
1150                         adev->gfx.mec2_fw_version =
1151                                 le32_to_cpu(cp_hdr->header.ucode_version);
1152                         adev->gfx.mec2_feature_version =
1153                                 le32_to_cpu(cp_hdr->ucode_feature_version);
1154                 } else {
1155                         err = 0;
1156                         adev->gfx.mec2_fw = NULL;
1157                 }
1158         }
1159
1160         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
1161         info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
1162         info->fw = adev->gfx.pfp_fw;
1163         header = (const struct common_firmware_header *)info->fw->data;
1164         adev->firmware.fw_size +=
1165                 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1166
1167         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
1168         info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
1169         info->fw = adev->gfx.me_fw;
1170         header = (const struct common_firmware_header *)info->fw->data;
1171         adev->firmware.fw_size +=
1172                 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1173
1174         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
1175         info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
1176         info->fw = adev->gfx.ce_fw;
1177         header = (const struct common_firmware_header *)info->fw->data;
1178         adev->firmware.fw_size +=
1179                 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1180
1181         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
1182         info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
1183         info->fw = adev->gfx.rlc_fw;
1184         header = (const struct common_firmware_header *)info->fw->data;
1185         adev->firmware.fw_size +=
1186                 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1187
1188         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
1189         info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
1190         info->fw = adev->gfx.mec_fw;
1191         header = (const struct common_firmware_header *)info->fw->data;
1192         adev->firmware.fw_size +=
1193                 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1194
1195         /* we need account JT in */
1196         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1197         adev->firmware.fw_size +=
1198                 ALIGN(le32_to_cpu(cp_hdr->jt_size) << 2, PAGE_SIZE);
1199
1200         if (amdgpu_sriov_vf(adev)) {
1201                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_STORAGE];
1202                 info->ucode_id = AMDGPU_UCODE_ID_STORAGE;
1203                 info->fw = adev->gfx.mec_fw;
1204                 adev->firmware.fw_size +=
1205                         ALIGN(le32_to_cpu(64 * PAGE_SIZE), PAGE_SIZE);
1206         }
1207
1208         if (adev->gfx.mec2_fw) {
1209                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
1210                 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
1211                 info->fw = adev->gfx.mec2_fw;
1212                 header = (const struct common_firmware_header *)info->fw->data;
1213                 adev->firmware.fw_size +=
1214                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1215         }
1216
1217 out:
1218         if (err) {
1219                 dev_err(adev->dev,
1220                         "gfx8: Failed to load firmware \"%s\"\n",
1221                         fw_name);
1222                 release_firmware(adev->gfx.pfp_fw);
1223                 adev->gfx.pfp_fw = NULL;
1224                 release_firmware(adev->gfx.me_fw);
1225                 adev->gfx.me_fw = NULL;
1226                 release_firmware(adev->gfx.ce_fw);
1227                 adev->gfx.ce_fw = NULL;
1228                 release_firmware(adev->gfx.rlc_fw);
1229                 adev->gfx.rlc_fw = NULL;
1230                 release_firmware(adev->gfx.mec_fw);
1231                 adev->gfx.mec_fw = NULL;
1232                 release_firmware(adev->gfx.mec2_fw);
1233                 adev->gfx.mec2_fw = NULL;
1234         }
1235         return err;
1236 }
1237
1238 static void gfx_v8_0_get_csb_buffer(struct amdgpu_device *adev,
1239                                     volatile u32 *buffer)
1240 {
1241         u32 count = 0, i;
1242         const struct cs_section_def *sect = NULL;
1243         const struct cs_extent_def *ext = NULL;
1244
1245         if (adev->gfx.rlc.cs_data == NULL)
1246                 return;
1247         if (buffer == NULL)
1248                 return;
1249
1250         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1251         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1252
1253         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
1254         buffer[count++] = cpu_to_le32(0x80000000);
1255         buffer[count++] = cpu_to_le32(0x80000000);
1256
1257         for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
1258                 for (ext = sect->section; ext->extent != NULL; ++ext) {
1259                         if (sect->id == SECT_CONTEXT) {
1260                                 buffer[count++] =
1261                                         cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
1262                                 buffer[count++] = cpu_to_le32(ext->reg_index -
1263                                                 PACKET3_SET_CONTEXT_REG_START);
1264                                 for (i = 0; i < ext->reg_count; i++)
1265                                         buffer[count++] = cpu_to_le32(ext->extent[i]);
1266                         } else {
1267                                 return;
1268                         }
1269                 }
1270         }
1271
1272         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
1273         buffer[count++] = cpu_to_le32(mmPA_SC_RASTER_CONFIG -
1274                         PACKET3_SET_CONTEXT_REG_START);
1275         buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config);
1276         buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config_1);
1277
1278         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1279         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
1280
1281         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
1282         buffer[count++] = cpu_to_le32(0);
1283 }
1284
1285 static int gfx_v8_0_cp_jump_table_num(struct amdgpu_device *adev)
1286 {
1287         if (adev->asic_type == CHIP_CARRIZO)
1288                 return 5;
1289         else
1290                 return 4;
1291 }
1292
1293 static int gfx_v8_0_rlc_init(struct amdgpu_device *adev)
1294 {
1295         const struct cs_section_def *cs_data;
1296         int r;
1297
1298         adev->gfx.rlc.cs_data = vi_cs_data;
1299
1300         cs_data = adev->gfx.rlc.cs_data;
1301
1302         if (cs_data) {
1303                 /* init clear state block */
1304                 r = amdgpu_gfx_rlc_init_csb(adev);
1305                 if (r)
1306                         return r;
1307         }
1308
1309         if ((adev->asic_type == CHIP_CARRIZO) ||
1310             (adev->asic_type == CHIP_STONEY)) {
1311                 adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
1312                 r = amdgpu_gfx_rlc_init_cpt(adev);
1313                 if (r)
1314                         return r;
1315         }
1316
1317         /* init spm vmid with 0xf */
1318         if (adev->gfx.rlc.funcs->update_spm_vmid)
1319                 adev->gfx.rlc.funcs->update_spm_vmid(adev, 0xf);
1320
1321         return 0;
1322 }
1323
1324 static void gfx_v8_0_mec_fini(struct amdgpu_device *adev)
1325 {
1326         amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
1327 }
1328
1329 static int gfx_v8_0_mec_init(struct amdgpu_device *adev)
1330 {
1331         int r;
1332         u32 *hpd;
1333         size_t mec_hpd_size;
1334
1335         bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
1336
1337         /* take ownership of the relevant compute queues */
1338         amdgpu_gfx_compute_queue_acquire(adev);
1339
1340         mec_hpd_size = adev->gfx.num_compute_rings * GFX8_MEC_HPD_SIZE;
1341         if (mec_hpd_size) {
1342                 r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
1343                                               AMDGPU_GEM_DOMAIN_VRAM |
1344                                               AMDGPU_GEM_DOMAIN_GTT,
1345                                               &adev->gfx.mec.hpd_eop_obj,
1346                                               &adev->gfx.mec.hpd_eop_gpu_addr,
1347                                               (void **)&hpd);
1348                 if (r) {
1349                         dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
1350                         return r;
1351                 }
1352
1353                 memset(hpd, 0, mec_hpd_size);
1354
1355                 amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
1356                 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1357         }
1358
1359         return 0;
1360 }
1361
1362 static const u32 vgpr_init_compute_shader[] =
1363 {
1364         0x7e000209, 0x7e020208,
1365         0x7e040207, 0x7e060206,
1366         0x7e080205, 0x7e0a0204,
1367         0x7e0c0203, 0x7e0e0202,
1368         0x7e100201, 0x7e120200,
1369         0x7e140209, 0x7e160208,
1370         0x7e180207, 0x7e1a0206,
1371         0x7e1c0205, 0x7e1e0204,
1372         0x7e200203, 0x7e220202,
1373         0x7e240201, 0x7e260200,
1374         0x7e280209, 0x7e2a0208,
1375         0x7e2c0207, 0x7e2e0206,
1376         0x7e300205, 0x7e320204,
1377         0x7e340203, 0x7e360202,
1378         0x7e380201, 0x7e3a0200,
1379         0x7e3c0209, 0x7e3e0208,
1380         0x7e400207, 0x7e420206,
1381         0x7e440205, 0x7e460204,
1382         0x7e480203, 0x7e4a0202,
1383         0x7e4c0201, 0x7e4e0200,
1384         0x7e500209, 0x7e520208,
1385         0x7e540207, 0x7e560206,
1386         0x7e580205, 0x7e5a0204,
1387         0x7e5c0203, 0x7e5e0202,
1388         0x7e600201, 0x7e620200,
1389         0x7e640209, 0x7e660208,
1390         0x7e680207, 0x7e6a0206,
1391         0x7e6c0205, 0x7e6e0204,
1392         0x7e700203, 0x7e720202,
1393         0x7e740201, 0x7e760200,
1394         0x7e780209, 0x7e7a0208,
1395         0x7e7c0207, 0x7e7e0206,
1396         0xbf8a0000, 0xbf810000,
1397 };
1398
1399 static const u32 sgpr_init_compute_shader[] =
1400 {
1401         0xbe8a0100, 0xbe8c0102,
1402         0xbe8e0104, 0xbe900106,
1403         0xbe920108, 0xbe940100,
1404         0xbe960102, 0xbe980104,
1405         0xbe9a0106, 0xbe9c0108,
1406         0xbe9e0100, 0xbea00102,
1407         0xbea20104, 0xbea40106,
1408         0xbea60108, 0xbea80100,
1409         0xbeaa0102, 0xbeac0104,
1410         0xbeae0106, 0xbeb00108,
1411         0xbeb20100, 0xbeb40102,
1412         0xbeb60104, 0xbeb80106,
1413         0xbeba0108, 0xbebc0100,
1414         0xbebe0102, 0xbec00104,
1415         0xbec20106, 0xbec40108,
1416         0xbec60100, 0xbec80102,
1417         0xbee60004, 0xbee70005,
1418         0xbeea0006, 0xbeeb0007,
1419         0xbee80008, 0xbee90009,
1420         0xbefc0000, 0xbf8a0000,
1421         0xbf810000, 0x00000000,
1422 };
1423
1424 static const u32 vgpr_init_regs[] =
1425 {
1426         mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xffffffff,
1427         mmCOMPUTE_RESOURCE_LIMITS, 0x1000000, /* CU_GROUP_COUNT=1 */
1428         mmCOMPUTE_NUM_THREAD_X, 256*4,
1429         mmCOMPUTE_NUM_THREAD_Y, 1,
1430         mmCOMPUTE_NUM_THREAD_Z, 1,
1431         mmCOMPUTE_PGM_RSRC1, 0x100004f, /* VGPRS=15 (64 logical VGPRs), SGPRS=1 (16 SGPRs), BULKY=1 */
1432         mmCOMPUTE_PGM_RSRC2, 20,
1433         mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1434         mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1435         mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1436         mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1437         mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1438         mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1439         mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1440         mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1441         mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1442         mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1443 };
1444
1445 static const u32 sgpr1_init_regs[] =
1446 {
1447         mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0x0f,
1448         mmCOMPUTE_RESOURCE_LIMITS, 0x1000000, /* CU_GROUP_COUNT=1 */
1449         mmCOMPUTE_NUM_THREAD_X, 256*5,
1450         mmCOMPUTE_NUM_THREAD_Y, 1,
1451         mmCOMPUTE_NUM_THREAD_Z, 1,
1452         mmCOMPUTE_PGM_RSRC1, 0x240, /* SGPRS=9 (80 GPRS) */
1453         mmCOMPUTE_PGM_RSRC2, 20,
1454         mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1455         mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1456         mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1457         mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1458         mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1459         mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1460         mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1461         mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1462         mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1463         mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1464 };
1465
1466 static const u32 sgpr2_init_regs[] =
1467 {
1468         mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xf0,
1469         mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1470         mmCOMPUTE_NUM_THREAD_X, 256*5,
1471         mmCOMPUTE_NUM_THREAD_Y, 1,
1472         mmCOMPUTE_NUM_THREAD_Z, 1,
1473         mmCOMPUTE_PGM_RSRC1, 0x240, /* SGPRS=9 (80 GPRS) */
1474         mmCOMPUTE_PGM_RSRC2, 20,
1475         mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1476         mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1477         mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1478         mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1479         mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1480         mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1481         mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1482         mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1483         mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1484         mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1485 };
1486
1487 static const u32 sec_ded_counter_registers[] =
1488 {
1489         mmCPC_EDC_ATC_CNT,
1490         mmCPC_EDC_SCRATCH_CNT,
1491         mmCPC_EDC_UCODE_CNT,
1492         mmCPF_EDC_ATC_CNT,
1493         mmCPF_EDC_ROQ_CNT,
1494         mmCPF_EDC_TAG_CNT,
1495         mmCPG_EDC_ATC_CNT,
1496         mmCPG_EDC_DMA_CNT,
1497         mmCPG_EDC_TAG_CNT,
1498         mmDC_EDC_CSINVOC_CNT,
1499         mmDC_EDC_RESTORE_CNT,
1500         mmDC_EDC_STATE_CNT,
1501         mmGDS_EDC_CNT,
1502         mmGDS_EDC_GRBM_CNT,
1503         mmGDS_EDC_OA_DED,
1504         mmSPI_EDC_CNT,
1505         mmSQC_ATC_EDC_GATCL1_CNT,
1506         mmSQC_EDC_CNT,
1507         mmSQ_EDC_DED_CNT,
1508         mmSQ_EDC_INFO,
1509         mmSQ_EDC_SEC_CNT,
1510         mmTCC_EDC_CNT,
1511         mmTCP_ATC_EDC_GATCL1_CNT,
1512         mmTCP_EDC_CNT,
1513         mmTD_EDC_CNT
1514 };
1515
1516 static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
1517 {
1518         struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
1519         struct amdgpu_ib ib;
1520         struct dma_fence *f = NULL;
1521         int r, i;
1522         u32 tmp;
1523         unsigned total_size, vgpr_offset, sgpr_offset;
1524         u64 gpu_addr;
1525
1526         /* only supported on CZ */
1527         if (adev->asic_type != CHIP_CARRIZO)
1528                 return 0;
1529
1530         /* bail if the compute ring is not ready */
1531         if (!ring->sched.ready)
1532                 return 0;
1533
1534         tmp = RREG32(mmGB_EDC_MODE);
1535         WREG32(mmGB_EDC_MODE, 0);
1536
1537         total_size =
1538                 (((ARRAY_SIZE(vgpr_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1539         total_size +=
1540                 (((ARRAY_SIZE(sgpr1_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1541         total_size +=
1542                 (((ARRAY_SIZE(sgpr2_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1543         total_size = ALIGN(total_size, 256);
1544         vgpr_offset = total_size;
1545         total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256);
1546         sgpr_offset = total_size;
1547         total_size += sizeof(sgpr_init_compute_shader);
1548
1549         /* allocate an indirect buffer to put the commands in */
1550         memset(&ib, 0, sizeof(ib));
1551         r = amdgpu_ib_get(adev, NULL, total_size,
1552                                         AMDGPU_IB_POOL_DIRECT, &ib);
1553         if (r) {
1554                 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
1555                 return r;
1556         }
1557
1558         /* load the compute shaders */
1559         for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++)
1560                 ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i];
1561
1562         for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
1563                 ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
1564
1565         /* init the ib length to 0 */
1566         ib.length_dw = 0;
1567
1568         /* VGPR */
1569         /* write the register state for the compute dispatch */
1570         for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i += 2) {
1571                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1572                 ib.ptr[ib.length_dw++] = vgpr_init_regs[i] - PACKET3_SET_SH_REG_START;
1573                 ib.ptr[ib.length_dw++] = vgpr_init_regs[i + 1];
1574         }
1575         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1576         gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
1577         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1578         ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1579         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1580         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1581
1582         /* write dispatch packet */
1583         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1584         ib.ptr[ib.length_dw++] = 8; /* x */
1585         ib.ptr[ib.length_dw++] = 1; /* y */
1586         ib.ptr[ib.length_dw++] = 1; /* z */
1587         ib.ptr[ib.length_dw++] =
1588                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1589
1590         /* write CS partial flush packet */
1591         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1592         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1593
1594         /* SGPR1 */
1595         /* write the register state for the compute dispatch */
1596         for (i = 0; i < ARRAY_SIZE(sgpr1_init_regs); i += 2) {
1597                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1598                 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i] - PACKET3_SET_SH_REG_START;
1599                 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i + 1];
1600         }
1601         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1602         gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1603         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1604         ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1605         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1606         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1607
1608         /* write dispatch packet */
1609         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1610         ib.ptr[ib.length_dw++] = 8; /* x */
1611         ib.ptr[ib.length_dw++] = 1; /* y */
1612         ib.ptr[ib.length_dw++] = 1; /* z */
1613         ib.ptr[ib.length_dw++] =
1614                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1615
1616         /* write CS partial flush packet */
1617         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1618         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1619
1620         /* SGPR2 */
1621         /* write the register state for the compute dispatch */
1622         for (i = 0; i < ARRAY_SIZE(sgpr2_init_regs); i += 2) {
1623                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1624                 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i] - PACKET3_SET_SH_REG_START;
1625                 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i + 1];
1626         }
1627         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1628         gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1629         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1630         ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1631         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1632         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1633
1634         /* write dispatch packet */
1635         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1636         ib.ptr[ib.length_dw++] = 8; /* x */
1637         ib.ptr[ib.length_dw++] = 1; /* y */
1638         ib.ptr[ib.length_dw++] = 1; /* z */
1639         ib.ptr[ib.length_dw++] =
1640                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1641
1642         /* write CS partial flush packet */
1643         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1644         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1645
1646         /* shedule the ib on the ring */
1647         r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
1648         if (r) {
1649                 DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
1650                 goto fail;
1651         }
1652
1653         /* wait for the GPU to finish processing the IB */
1654         r = dma_fence_wait(f, false);
1655         if (r) {
1656                 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
1657                 goto fail;
1658         }
1659
1660         tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, DED_MODE, 2);
1661         tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, PROP_FED, 1);
1662         WREG32(mmGB_EDC_MODE, tmp);
1663
1664         tmp = RREG32(mmCC_GC_EDC_CONFIG);
1665         tmp = REG_SET_FIELD(tmp, CC_GC_EDC_CONFIG, DIS_EDC, 0) | 1;
1666         WREG32(mmCC_GC_EDC_CONFIG, tmp);
1667
1668
1669         /* read back registers to clear the counters */
1670         for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++)
1671                 RREG32(sec_ded_counter_registers[i]);
1672
1673 fail:
1674         amdgpu_ib_free(adev, &ib, NULL);
1675         dma_fence_put(f);
1676
1677         return r;
1678 }
1679
1680 static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev)
1681 {
1682         u32 gb_addr_config;
1683         u32 mc_arb_ramcfg;
1684         u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map;
1685         u32 tmp;
1686         int ret;
1687
1688         switch (adev->asic_type) {
1689         case CHIP_TOPAZ:
1690                 adev->gfx.config.max_shader_engines = 1;
1691                 adev->gfx.config.max_tile_pipes = 2;
1692                 adev->gfx.config.max_cu_per_sh = 6;
1693                 adev->gfx.config.max_sh_per_se = 1;
1694                 adev->gfx.config.max_backends_per_se = 2;
1695                 adev->gfx.config.max_texture_channel_caches = 2;
1696                 adev->gfx.config.max_gprs = 256;
1697                 adev->gfx.config.max_gs_threads = 32;
1698                 adev->gfx.config.max_hw_contexts = 8;
1699
1700                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1701                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1702                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1703                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1704                 gb_addr_config = TOPAZ_GB_ADDR_CONFIG_GOLDEN;
1705                 break;
1706         case CHIP_FIJI:
1707                 adev->gfx.config.max_shader_engines = 4;
1708                 adev->gfx.config.max_tile_pipes = 16;
1709                 adev->gfx.config.max_cu_per_sh = 16;
1710                 adev->gfx.config.max_sh_per_se = 1;
1711                 adev->gfx.config.max_backends_per_se = 4;
1712                 adev->gfx.config.max_texture_channel_caches = 16;
1713                 adev->gfx.config.max_gprs = 256;
1714                 adev->gfx.config.max_gs_threads = 32;
1715                 adev->gfx.config.max_hw_contexts = 8;
1716
1717                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1718                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1719                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1720                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1721                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1722                 break;
1723         case CHIP_POLARIS11:
1724         case CHIP_POLARIS12:
1725                 ret = amdgpu_atombios_get_gfx_info(adev);
1726                 if (ret)
1727                         return ret;
1728                 adev->gfx.config.max_gprs = 256;
1729                 adev->gfx.config.max_gs_threads = 32;
1730                 adev->gfx.config.max_hw_contexts = 8;
1731
1732                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1733                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1734                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1735                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1736                 gb_addr_config = POLARIS11_GB_ADDR_CONFIG_GOLDEN;
1737                 break;
1738         case CHIP_POLARIS10:
1739         case CHIP_VEGAM:
1740                 ret = amdgpu_atombios_get_gfx_info(adev);
1741                 if (ret)
1742                         return ret;
1743                 adev->gfx.config.max_gprs = 256;
1744                 adev->gfx.config.max_gs_threads = 32;
1745                 adev->gfx.config.max_hw_contexts = 8;
1746
1747                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1748                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1749                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1750                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1751                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1752                 break;
1753         case CHIP_TONGA:
1754                 adev->gfx.config.max_shader_engines = 4;
1755                 adev->gfx.config.max_tile_pipes = 8;
1756                 adev->gfx.config.max_cu_per_sh = 8;
1757                 adev->gfx.config.max_sh_per_se = 1;
1758                 adev->gfx.config.max_backends_per_se = 2;
1759                 adev->gfx.config.max_texture_channel_caches = 8;
1760                 adev->gfx.config.max_gprs = 256;
1761                 adev->gfx.config.max_gs_threads = 32;
1762                 adev->gfx.config.max_hw_contexts = 8;
1763
1764                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1765                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1766                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1767                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1768                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1769                 break;
1770         case CHIP_CARRIZO:
1771                 adev->gfx.config.max_shader_engines = 1;
1772                 adev->gfx.config.max_tile_pipes = 2;
1773                 adev->gfx.config.max_sh_per_se = 1;
1774                 adev->gfx.config.max_backends_per_se = 2;
1775                 adev->gfx.config.max_cu_per_sh = 8;
1776                 adev->gfx.config.max_texture_channel_caches = 2;
1777                 adev->gfx.config.max_gprs = 256;
1778                 adev->gfx.config.max_gs_threads = 32;
1779                 adev->gfx.config.max_hw_contexts = 8;
1780
1781                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1782                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1783                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1784                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1785                 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1786                 break;
1787         case CHIP_STONEY:
1788                 adev->gfx.config.max_shader_engines = 1;
1789                 adev->gfx.config.max_tile_pipes = 2;
1790                 adev->gfx.config.max_sh_per_se = 1;
1791                 adev->gfx.config.max_backends_per_se = 1;
1792                 adev->gfx.config.max_cu_per_sh = 3;
1793                 adev->gfx.config.max_texture_channel_caches = 2;
1794                 adev->gfx.config.max_gprs = 256;
1795                 adev->gfx.config.max_gs_threads = 16;
1796                 adev->gfx.config.max_hw_contexts = 8;
1797
1798                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1799                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1800                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1801                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1802                 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1803                 break;
1804         default:
1805                 adev->gfx.config.max_shader_engines = 2;
1806                 adev->gfx.config.max_tile_pipes = 4;
1807                 adev->gfx.config.max_cu_per_sh = 2;
1808                 adev->gfx.config.max_sh_per_se = 1;
1809                 adev->gfx.config.max_backends_per_se = 2;
1810                 adev->gfx.config.max_texture_channel_caches = 4;
1811                 adev->gfx.config.max_gprs = 256;
1812                 adev->gfx.config.max_gs_threads = 32;
1813                 adev->gfx.config.max_hw_contexts = 8;
1814
1815                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1816                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1817                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1818                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1819                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1820                 break;
1821         }
1822
1823         adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG);
1824         mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg;
1825
1826         adev->gfx.config.num_banks = REG_GET_FIELD(mc_arb_ramcfg,
1827                                 MC_ARB_RAMCFG, NOOFBANK);
1828         adev->gfx.config.num_ranks = REG_GET_FIELD(mc_arb_ramcfg,
1829                                 MC_ARB_RAMCFG, NOOFRANKS);
1830
1831         adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes;
1832         adev->gfx.config.mem_max_burst_length_bytes = 256;
1833         if (adev->flags & AMD_IS_APU) {
1834                 /* Get memory bank mapping mode. */
1835                 tmp = RREG32(mmMC_FUS_DRAM0_BANK_ADDR_MAPPING);
1836                 dimm00_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1837                 dimm01_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1838
1839                 tmp = RREG32(mmMC_FUS_DRAM1_BANK_ADDR_MAPPING);
1840                 dimm10_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1841                 dimm11_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1842
1843                 /* Validate settings in case only one DIMM installed. */
1844                 if ((dimm00_addr_map == 0) || (dimm00_addr_map == 3) || (dimm00_addr_map == 4) || (dimm00_addr_map > 12))
1845                         dimm00_addr_map = 0;
1846                 if ((dimm01_addr_map == 0) || (dimm01_addr_map == 3) || (dimm01_addr_map == 4) || (dimm01_addr_map > 12))
1847                         dimm01_addr_map = 0;
1848                 if ((dimm10_addr_map == 0) || (dimm10_addr_map == 3) || (dimm10_addr_map == 4) || (dimm10_addr_map > 12))
1849                         dimm10_addr_map = 0;
1850                 if ((dimm11_addr_map == 0) || (dimm11_addr_map == 3) || (dimm11_addr_map == 4) || (dimm11_addr_map > 12))
1851                         dimm11_addr_map = 0;
1852
1853                 /* If DIMM Addr map is 8GB, ROW size should be 2KB. Otherwise 1KB. */
1854                 /* If ROW size(DIMM1) != ROW size(DMIMM0), ROW size should be larger one. */
1855                 if ((dimm00_addr_map == 11) || (dimm01_addr_map == 11) || (dimm10_addr_map == 11) || (dimm11_addr_map == 11))
1856                         adev->gfx.config.mem_row_size_in_kb = 2;
1857                 else
1858                         adev->gfx.config.mem_row_size_in_kb = 1;
1859         } else {
1860                 tmp = REG_GET_FIELD(mc_arb_ramcfg, MC_ARB_RAMCFG, NOOFCOLS);
1861                 adev->gfx.config.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
1862                 if (adev->gfx.config.mem_row_size_in_kb > 4)
1863                         adev->gfx.config.mem_row_size_in_kb = 4;
1864         }
1865
1866         adev->gfx.config.shader_engine_tile_size = 32;
1867         adev->gfx.config.num_gpus = 1;
1868         adev->gfx.config.multi_gpu_tile_size = 64;
1869
1870         /* fix up row size */
1871         switch (adev->gfx.config.mem_row_size_in_kb) {
1872         case 1:
1873         default:
1874                 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 0);
1875                 break;
1876         case 2:
1877                 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 1);
1878                 break;
1879         case 4:
1880                 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 2);
1881                 break;
1882         }
1883         adev->gfx.config.gb_addr_config = gb_addr_config;
1884
1885         return 0;
1886 }
1887
1888 static int gfx_v8_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
1889                                         int mec, int pipe, int queue)
1890 {
1891         int r;
1892         unsigned irq_type;
1893         struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
1894         unsigned int hw_prio;
1895
1896         ring = &adev->gfx.compute_ring[ring_id];
1897
1898         /* mec0 is me1 */
1899         ring->me = mec + 1;
1900         ring->pipe = pipe;
1901         ring->queue = queue;
1902
1903         ring->ring_obj = NULL;
1904         ring->use_doorbell = true;
1905         ring->doorbell_index = adev->doorbell_index.mec_ring0 + ring_id;
1906         ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
1907                                 + (ring_id * GFX8_MEC_HPD_SIZE);
1908         sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
1909
1910         irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
1911                 + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
1912                 + ring->pipe;
1913
1914         hw_prio = amdgpu_gfx_is_high_priority_compute_queue(adev, ring) ?
1915                         AMDGPU_RING_PRIO_2 : AMDGPU_RING_PRIO_DEFAULT;
1916         /* type-2 packets are deprecated on MEC, use type-3 instead */
1917         r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, irq_type,
1918                              hw_prio, NULL);
1919         if (r)
1920                 return r;
1921
1922
1923         return 0;
1924 }
1925
1926 static void gfx_v8_0_sq_irq_work_func(struct work_struct *work);
1927
1928 static int gfx_v8_0_sw_init(void *handle)
1929 {
1930         int i, j, k, r, ring_id;
1931         struct amdgpu_ring *ring;
1932         struct amdgpu_kiq *kiq;
1933         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1934
1935         switch (adev->asic_type) {
1936         case CHIP_TONGA:
1937         case CHIP_CARRIZO:
1938         case CHIP_FIJI:
1939         case CHIP_POLARIS10:
1940         case CHIP_POLARIS11:
1941         case CHIP_POLARIS12:
1942         case CHIP_VEGAM:
1943                 adev->gfx.mec.num_mec = 2;
1944                 break;
1945         case CHIP_TOPAZ:
1946         case CHIP_STONEY:
1947         default:
1948                 adev->gfx.mec.num_mec = 1;
1949                 break;
1950         }
1951
1952         adev->gfx.mec.num_pipe_per_mec = 4;
1953         adev->gfx.mec.num_queue_per_pipe = 8;
1954
1955         /* EOP Event */
1956         r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_END_OF_PIPE, &adev->gfx.eop_irq);
1957         if (r)
1958                 return r;
1959
1960         /* Privileged reg */
1961         r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_PRIV_REG_FAULT,
1962                               &adev->gfx.priv_reg_irq);
1963         if (r)
1964                 return r;
1965
1966         /* Privileged inst */
1967         r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_PRIV_INSTR_FAULT,
1968                               &adev->gfx.priv_inst_irq);
1969         if (r)
1970                 return r;
1971
1972         /* Add CP EDC/ECC irq  */
1973         r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_ECC_ERROR,
1974                               &adev->gfx.cp_ecc_error_irq);
1975         if (r)
1976                 return r;
1977
1978         /* SQ interrupts. */
1979         r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_SQ_INTERRUPT_MSG,
1980                               &adev->gfx.sq_irq);
1981         if (r) {
1982                 DRM_ERROR("amdgpu_irq_add() for SQ failed: %d\n", r);
1983                 return r;
1984         }
1985
1986         INIT_WORK(&adev->gfx.sq_work.work, gfx_v8_0_sq_irq_work_func);
1987
1988         adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
1989
1990         r = gfx_v8_0_init_microcode(adev);
1991         if (r) {
1992                 DRM_ERROR("Failed to load gfx firmware!\n");
1993                 return r;
1994         }
1995
1996         r = adev->gfx.rlc.funcs->init(adev);
1997         if (r) {
1998                 DRM_ERROR("Failed to init rlc BOs!\n");
1999                 return r;
2000         }
2001
2002         r = gfx_v8_0_mec_init(adev);
2003         if (r) {
2004                 DRM_ERROR("Failed to init MEC BOs!\n");
2005                 return r;
2006         }
2007
2008         /* set up the gfx ring */
2009         for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
2010                 ring = &adev->gfx.gfx_ring[i];
2011                 ring->ring_obj = NULL;
2012                 sprintf(ring->name, "gfx");
2013                 /* no gfx doorbells on iceland */
2014                 if (adev->asic_type != CHIP_TOPAZ) {
2015                         ring->use_doorbell = true;
2016                         ring->doorbell_index = adev->doorbell_index.gfx_ring0;
2017                 }
2018
2019                 r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
2020                                      AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP,
2021                                      AMDGPU_RING_PRIO_DEFAULT, NULL);
2022                 if (r)
2023                         return r;
2024         }
2025
2026
2027         /* set up the compute queues - allocate horizontally across pipes */
2028         ring_id = 0;
2029         for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
2030                 for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
2031                         for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
2032                                 if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, j))
2033                                         continue;
2034
2035                                 r = gfx_v8_0_compute_ring_init(adev,
2036                                                                 ring_id,
2037                                                                 i, k, j);
2038                                 if (r)
2039                                         return r;
2040
2041                                 ring_id++;
2042                         }
2043                 }
2044         }
2045
2046         r = amdgpu_gfx_kiq_init(adev, GFX8_MEC_HPD_SIZE);
2047         if (r) {
2048                 DRM_ERROR("Failed to init KIQ BOs!\n");
2049                 return r;
2050         }
2051
2052         kiq = &adev->gfx.kiq;
2053         r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
2054         if (r)
2055                 return r;
2056
2057         /* create MQD for all compute queues as well as KIQ for SRIOV case */
2058         r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct vi_mqd_allocation));
2059         if (r)
2060                 return r;
2061
2062         adev->gfx.ce_ram_size = 0x8000;
2063
2064         r = gfx_v8_0_gpu_early_init(adev);
2065         if (r)
2066                 return r;
2067
2068         return 0;
2069 }
2070
2071 static int gfx_v8_0_sw_fini(void *handle)
2072 {
2073         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2074         int i;
2075
2076         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2077                 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
2078         for (i = 0; i < adev->gfx.num_compute_rings; i++)
2079                 amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
2080
2081         amdgpu_gfx_mqd_sw_fini(adev);
2082         amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring);
2083         amdgpu_gfx_kiq_fini(adev);
2084
2085         gfx_v8_0_mec_fini(adev);
2086         amdgpu_gfx_rlc_fini(adev);
2087         amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj,
2088                                 &adev->gfx.rlc.clear_state_gpu_addr,
2089                                 (void **)&adev->gfx.rlc.cs_ptr);
2090         if ((adev->asic_type == CHIP_CARRIZO) ||
2091             (adev->asic_type == CHIP_STONEY)) {
2092                 amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj,
2093                                 &adev->gfx.rlc.cp_table_gpu_addr,
2094                                 (void **)&adev->gfx.rlc.cp_table_ptr);
2095         }
2096         gfx_v8_0_free_microcode(adev);
2097
2098         return 0;
2099 }
2100
2101 static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev)
2102 {
2103         uint32_t *modearray, *mod2array;
2104         const u32 num_tile_mode_states = ARRAY_SIZE(adev->gfx.config.tile_mode_array);
2105         const u32 num_secondary_tile_mode_states = ARRAY_SIZE(adev->gfx.config.macrotile_mode_array);
2106         u32 reg_offset;
2107
2108         modearray = adev->gfx.config.tile_mode_array;
2109         mod2array = adev->gfx.config.macrotile_mode_array;
2110
2111         for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2112                 modearray[reg_offset] = 0;
2113
2114         for (reg_offset = 0; reg_offset <  num_secondary_tile_mode_states; reg_offset++)
2115                 mod2array[reg_offset] = 0;
2116
2117         switch (adev->asic_type) {
2118         case CHIP_TOPAZ:
2119                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2120                                 PIPE_CONFIG(ADDR_SURF_P2) |
2121                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2122                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2123                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2124                                 PIPE_CONFIG(ADDR_SURF_P2) |
2125                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2126                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2127                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2128                                 PIPE_CONFIG(ADDR_SURF_P2) |
2129                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2130                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2131                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2132                                 PIPE_CONFIG(ADDR_SURF_P2) |
2133                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2134                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2135                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2136                                 PIPE_CONFIG(ADDR_SURF_P2) |
2137                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2138                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2139                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2140                                 PIPE_CONFIG(ADDR_SURF_P2) |
2141                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2142                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2143                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2144                                 PIPE_CONFIG(ADDR_SURF_P2) |
2145                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2146                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2147                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2148                                 PIPE_CONFIG(ADDR_SURF_P2));
2149                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2150                                 PIPE_CONFIG(ADDR_SURF_P2) |
2151                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2152                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2153                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2154                                  PIPE_CONFIG(ADDR_SURF_P2) |
2155                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2156                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2157                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2158                                  PIPE_CONFIG(ADDR_SURF_P2) |
2159                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2160                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2161                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2162                                  PIPE_CONFIG(ADDR_SURF_P2) |
2163                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2164                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2165                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2166                                  PIPE_CONFIG(ADDR_SURF_P2) |
2167                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2168                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2169                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2170                                  PIPE_CONFIG(ADDR_SURF_P2) |
2171                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2172                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2173                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2174                                  PIPE_CONFIG(ADDR_SURF_P2) |
2175                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2176                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2177                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2178                                  PIPE_CONFIG(ADDR_SURF_P2) |
2179                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2180                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2181                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2182                                  PIPE_CONFIG(ADDR_SURF_P2) |
2183                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2184                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2185                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2186                                  PIPE_CONFIG(ADDR_SURF_P2) |
2187                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2188                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2189                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2190                                  PIPE_CONFIG(ADDR_SURF_P2) |
2191                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2192                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2193                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2194                                  PIPE_CONFIG(ADDR_SURF_P2) |
2195                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2196                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2197                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2198                                  PIPE_CONFIG(ADDR_SURF_P2) |
2199                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2200                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2201                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2202                                  PIPE_CONFIG(ADDR_SURF_P2) |
2203                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2204                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2205                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2206                                  PIPE_CONFIG(ADDR_SURF_P2) |
2207                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2208                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2209                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2210                                  PIPE_CONFIG(ADDR_SURF_P2) |
2211                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2212                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2213                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2214                                  PIPE_CONFIG(ADDR_SURF_P2) |
2215                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2216                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2217                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2218                                  PIPE_CONFIG(ADDR_SURF_P2) |
2219                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2220                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2221
2222                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2223                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2224                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2225                                 NUM_BANKS(ADDR_SURF_8_BANK));
2226                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2227                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2228                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2229                                 NUM_BANKS(ADDR_SURF_8_BANK));
2230                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2231                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2232                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2233                                 NUM_BANKS(ADDR_SURF_8_BANK));
2234                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2235                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2236                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2237                                 NUM_BANKS(ADDR_SURF_8_BANK));
2238                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2239                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2240                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2241                                 NUM_BANKS(ADDR_SURF_8_BANK));
2242                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2243                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2244                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2245                                 NUM_BANKS(ADDR_SURF_8_BANK));
2246                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2247                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2248                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2249                                 NUM_BANKS(ADDR_SURF_8_BANK));
2250                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2251                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2252                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2253                                 NUM_BANKS(ADDR_SURF_16_BANK));
2254                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2255                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2256                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2257                                 NUM_BANKS(ADDR_SURF_16_BANK));
2258                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2259                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2260                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2261                                  NUM_BANKS(ADDR_SURF_16_BANK));
2262                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2263                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2264                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2265                                  NUM_BANKS(ADDR_SURF_16_BANK));
2266                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2267                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2268                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2269                                  NUM_BANKS(ADDR_SURF_16_BANK));
2270                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2271                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2272                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2273                                  NUM_BANKS(ADDR_SURF_16_BANK));
2274                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2275                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2276                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2277                                  NUM_BANKS(ADDR_SURF_8_BANK));
2278
2279                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2280                         if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
2281                             reg_offset != 23)
2282                                 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2283
2284                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2285                         if (reg_offset != 7)
2286                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2287
2288                 break;
2289         case CHIP_FIJI:
2290         case CHIP_VEGAM:
2291                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2292                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2293                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2294                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2295                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2296                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2297                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2298                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2299                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2300                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2301                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2302                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2303                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2304                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2305                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2306                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2307                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2308                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2309                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2310                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2311                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2312                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2313                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2314                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2315                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2316                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2317                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2318                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2319                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2320                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2321                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2322                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2323                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2324                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2325                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2326                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2327                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2328                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2329                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2330                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2331                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2332                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2333                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2334                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2335                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2336                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2337                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2338                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2339                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2340                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2341                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2342                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2343                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2344                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2345                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2346                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2347                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2348                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2349                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2350                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2351                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2352                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2353                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2354                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2355                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2356                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2357                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2358                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2359                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2360                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2361                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2362                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2363                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2364                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2365                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2366                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2367                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2368                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2369                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2370                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2371                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2372                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2373                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2374                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2375                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2376                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2377                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2378                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2379                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2380                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2381                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2382                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2383                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2384                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2385                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2386                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2387                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2388                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2389                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2390                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2391                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2392                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2393                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2394                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2395                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2396                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2397                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2398                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2399                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2400                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2401                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2402                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2403                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2404                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2405                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2406                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2407                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2408                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2409                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2410                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2411                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2412                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2413
2414                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2415                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2416                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2417                                 NUM_BANKS(ADDR_SURF_8_BANK));
2418                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2419                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2420                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2421                                 NUM_BANKS(ADDR_SURF_8_BANK));
2422                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2423                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2424                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2425                                 NUM_BANKS(ADDR_SURF_8_BANK));
2426                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2427                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2428                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2429                                 NUM_BANKS(ADDR_SURF_8_BANK));
2430                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2431                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2432                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2433                                 NUM_BANKS(ADDR_SURF_8_BANK));
2434                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2435                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2436                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2437                                 NUM_BANKS(ADDR_SURF_8_BANK));
2438                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2439                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2440                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2441                                 NUM_BANKS(ADDR_SURF_8_BANK));
2442                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2443                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2444                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2445                                 NUM_BANKS(ADDR_SURF_8_BANK));
2446                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2447                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2448                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2449                                 NUM_BANKS(ADDR_SURF_8_BANK));
2450                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2451                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2452                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2453                                  NUM_BANKS(ADDR_SURF_8_BANK));
2454                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2455                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2456                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2457                                  NUM_BANKS(ADDR_SURF_8_BANK));
2458                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2459                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2460                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2461                                  NUM_BANKS(ADDR_SURF_8_BANK));
2462                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2463                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2464                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2465                                  NUM_BANKS(ADDR_SURF_8_BANK));
2466                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2467                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2468                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2469                                  NUM_BANKS(ADDR_SURF_4_BANK));
2470
2471                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2472                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2473
2474                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2475                         if (reg_offset != 7)
2476                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2477
2478                 break;
2479         case CHIP_TONGA:
2480                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2481                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2482                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2483                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2484                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2485                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2486                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2487                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2488                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2489                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2490                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2491                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2492                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2493                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2494                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2495                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2496                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2497                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2498                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2499                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2500                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2501                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2502                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2503                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2504                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2505                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2506                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2507                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2508                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2509                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2510                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2511                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2512                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2513                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2514                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2515                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2516                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2517                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2518                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2519                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2520                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2521                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2522                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2523                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2524                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2525                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2526                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2527                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2528                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2529                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2530                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2531                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2532                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2533                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2534                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2535                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2536                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2537                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2538                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2539                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2540                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2541                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2542                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2543                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2544                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2545                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2546                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2547                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2548                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2549                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2550                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2551                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2552                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2553                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2554                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2555                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2556                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2557                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2558                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2559                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2560                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2561                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2562                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2563                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2564                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2565                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2566                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2567                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2568                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2569                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2570                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2571                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2572                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2573                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2574                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2575                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2576                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2577                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2578                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2579                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2580                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2581                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2582                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2583                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2584                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2585                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2586                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2587                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2588                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2589                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2590                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2591                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2592                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2593                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2594                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2595                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2596                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2597                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2598                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2599                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2600                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2601                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2602
2603                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2604                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2605                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2606                                 NUM_BANKS(ADDR_SURF_16_BANK));
2607                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2608                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2609                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2610                                 NUM_BANKS(ADDR_SURF_16_BANK));
2611                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2612                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2613                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2614                                 NUM_BANKS(ADDR_SURF_16_BANK));
2615                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2616                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2617                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2618                                 NUM_BANKS(ADDR_SURF_16_BANK));
2619                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2620                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2621                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2622                                 NUM_BANKS(ADDR_SURF_16_BANK));
2623                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2624                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2625                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2626                                 NUM_BANKS(ADDR_SURF_16_BANK));
2627                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2628                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2629                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2630                                 NUM_BANKS(ADDR_SURF_16_BANK));
2631                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2632                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2633                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2634                                 NUM_BANKS(ADDR_SURF_16_BANK));
2635                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2636                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2637                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2638                                 NUM_BANKS(ADDR_SURF_16_BANK));
2639                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2640                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2641                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2642                                  NUM_BANKS(ADDR_SURF_16_BANK));
2643                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2644                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2645                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2646                                  NUM_BANKS(ADDR_SURF_16_BANK));
2647                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2648                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2649                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2650                                  NUM_BANKS(ADDR_SURF_8_BANK));
2651                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2652                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2653                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2654                                  NUM_BANKS(ADDR_SURF_4_BANK));
2655                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2656                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2657                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2658                                  NUM_BANKS(ADDR_SURF_4_BANK));
2659
2660                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2661                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2662
2663                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2664                         if (reg_offset != 7)
2665                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2666
2667                 break;
2668         case CHIP_POLARIS11:
2669         case CHIP_POLARIS12:
2670                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2671                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2672                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2673                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2674                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2675                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2676                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2677                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2678                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2679                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2680                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2681                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2682                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2683                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2684                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2685                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2686                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2687                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2688                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2689                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2690                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2691                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2692                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2693                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2694                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2695                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2696                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2697                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2698                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2699                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2700                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2701                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2702                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2703                                 PIPE_CONFIG(ADDR_SURF_P4_16x16));
2704                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2705                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2706                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2707                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2708                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2709                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2710                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2711                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2712                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2713                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2714                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2715                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2716                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2717                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2718                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2719                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2720                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2721                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2722                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2723                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2724                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2725                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2726                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2727                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2728                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2729                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2730                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2731                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2732                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2733                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2734                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2735                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2736                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2737                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2738                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2739                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2740                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2741                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2742                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2743                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2744                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2745                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2746                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2747                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2748                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2749                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2750                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2751                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2752                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2753                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2754                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2755                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2756                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2757                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2758                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2759                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2760                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2761                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2762                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2763                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2764                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2765                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2766                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2767                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2768                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2769                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2770                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2771                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2772                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2773                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2774                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2775                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2776                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2777                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2778                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2779                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2780                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2781                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2782                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2783                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2784                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2785                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2786                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2787                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2788                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2789                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2790                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2791                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2792
2793                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2794                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2795                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2796                                 NUM_BANKS(ADDR_SURF_16_BANK));
2797
2798                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2799                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2800                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2801                                 NUM_BANKS(ADDR_SURF_16_BANK));
2802
2803                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2804                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2805                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2806                                 NUM_BANKS(ADDR_SURF_16_BANK));
2807
2808                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2809                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2810                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2811                                 NUM_BANKS(ADDR_SURF_16_BANK));
2812
2813                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2814                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2815                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2816                                 NUM_BANKS(ADDR_SURF_16_BANK));
2817
2818                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2819                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2820                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2821                                 NUM_BANKS(ADDR_SURF_16_BANK));
2822
2823                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2824                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2825                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2826                                 NUM_BANKS(ADDR_SURF_16_BANK));
2827
2828                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2829                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2830                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2831                                 NUM_BANKS(ADDR_SURF_16_BANK));
2832
2833                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2834                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2835                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2836                                 NUM_BANKS(ADDR_SURF_16_BANK));
2837
2838                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2839                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2840                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2841                                 NUM_BANKS(ADDR_SURF_16_BANK));
2842
2843                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2844                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2845                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2846                                 NUM_BANKS(ADDR_SURF_16_BANK));
2847
2848                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2849                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2850                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2851                                 NUM_BANKS(ADDR_SURF_16_BANK));
2852
2853                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2854                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2855                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2856                                 NUM_BANKS(ADDR_SURF_8_BANK));
2857
2858                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2859                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2860                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2861                                 NUM_BANKS(ADDR_SURF_4_BANK));
2862
2863                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2864                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2865
2866                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2867                         if (reg_offset != 7)
2868                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2869
2870                 break;
2871         case CHIP_POLARIS10:
2872                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2873                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2874                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2875                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2876                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2877                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2878                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2879                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2880                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2881                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2882                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2883                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2884                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2885                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2886                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2887                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2888                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2889                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2890                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2891                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2892                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2893                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2894                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2895                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2896                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2897                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2898                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2899                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2900                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2901                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2902                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2903                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2904                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2905                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2906                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2907                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2908                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2909                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2910                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2911                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2912                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2913                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2914                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2915                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2916                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2917                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2918                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2919                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2920                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2921                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2922                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2923                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2924                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2925                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2926                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2927                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2928                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2929                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2930                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2931                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2932                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2933                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2934                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2935                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2936                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2937                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2938                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2939                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2940                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2941                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2942                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2943                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2944                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2945                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2946                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2947                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2948                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2949                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2950                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2951                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2952                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2953                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2954                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2955                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2956                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2957                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2958                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2959                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2960                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2961                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2962                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2963                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2964                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2965                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2966                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2967                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2968                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2969                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2970                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2971                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2972                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2973                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2974                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2975                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2976                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2977                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2978                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2979                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2980                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2981                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2982                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2983                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2984                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2985                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2986                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2987                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2988                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2989                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2990                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2991                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2992                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2993                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2994
2995                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2996                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2997                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2998                                 NUM_BANKS(ADDR_SURF_16_BANK));
2999
3000                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3001                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3002                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3003                                 NUM_BANKS(ADDR_SURF_16_BANK));
3004
3005                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3006                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3007                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3008                                 NUM_BANKS(ADDR_SURF_16_BANK));
3009
3010                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3011                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3012                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3013                                 NUM_BANKS(ADDR_SURF_16_BANK));
3014
3015                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3016                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3017                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3018                                 NUM_BANKS(ADDR_SURF_16_BANK));
3019
3020                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3021                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3022                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3023                                 NUM_BANKS(ADDR_SURF_16_BANK));
3024
3025                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3026                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3027                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3028                                 NUM_BANKS(ADDR_SURF_16_BANK));
3029
3030                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3031                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3032                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3033                                 NUM_BANKS(ADDR_SURF_16_BANK));
3034
3035                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3036                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3037                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3038                                 NUM_BANKS(ADDR_SURF_16_BANK));
3039
3040                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3041                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3042                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3043                                 NUM_BANKS(ADDR_SURF_16_BANK));
3044
3045                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3046                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3047                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3048                                 NUM_BANKS(ADDR_SURF_16_BANK));
3049
3050                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3051                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3052                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3053                                 NUM_BANKS(ADDR_SURF_8_BANK));
3054
3055                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3056                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3057                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3058                                 NUM_BANKS(ADDR_SURF_4_BANK));
3059
3060                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3061                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3062                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3063                                 NUM_BANKS(ADDR_SURF_4_BANK));
3064
3065                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3066                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3067
3068                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3069                         if (reg_offset != 7)
3070                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3071
3072                 break;
3073         case CHIP_STONEY:
3074                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3075                                 PIPE_CONFIG(ADDR_SURF_P2) |
3076                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3077                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3078                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3079                                 PIPE_CONFIG(ADDR_SURF_P2) |
3080                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3081                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3082                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3083                                 PIPE_CONFIG(ADDR_SURF_P2) |
3084                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3085                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3086                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3087                                 PIPE_CONFIG(ADDR_SURF_P2) |
3088                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3089                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3090                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3091                                 PIPE_CONFIG(ADDR_SURF_P2) |
3092                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3093                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3094                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3095                                 PIPE_CONFIG(ADDR_SURF_P2) |
3096                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3097                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3098                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3099                                 PIPE_CONFIG(ADDR_SURF_P2) |
3100                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3101                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3102                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3103                                 PIPE_CONFIG(ADDR_SURF_P2));
3104                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3105                                 PIPE_CONFIG(ADDR_SURF_P2) |
3106                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3107                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3108                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3109                                  PIPE_CONFIG(ADDR_SURF_P2) |
3110                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3111                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3112                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3113                                  PIPE_CONFIG(ADDR_SURF_P2) |
3114                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3115                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3116                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3117                                  PIPE_CONFIG(ADDR_SURF_P2) |
3118                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3119                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3120                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3121                                  PIPE_CONFIG(ADDR_SURF_P2) |
3122                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3123                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3124                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3125                                  PIPE_CONFIG(ADDR_SURF_P2) |
3126                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3127                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3128                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3129                                  PIPE_CONFIG(ADDR_SURF_P2) |
3130                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3131                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3132                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3133                                  PIPE_CONFIG(ADDR_SURF_P2) |
3134                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3135                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3136                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3137                                  PIPE_CONFIG(ADDR_SURF_P2) |
3138                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3139                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3140                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3141                                  PIPE_CONFIG(ADDR_SURF_P2) |
3142                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3143                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3144                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3145                                  PIPE_CONFIG(ADDR_SURF_P2) |
3146                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3147                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3148                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3149                                  PIPE_CONFIG(ADDR_SURF_P2) |
3150                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3151                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3152                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3153                                  PIPE_CONFIG(ADDR_SURF_P2) |
3154                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3155                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3156                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3157                                  PIPE_CONFIG(ADDR_SURF_P2) |
3158                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3159                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3160                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3161                                  PIPE_CONFIG(ADDR_SURF_P2) |
3162                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3163                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3164                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3165                                  PIPE_CONFIG(ADDR_SURF_P2) |
3166                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3167                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3168                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3169                                  PIPE_CONFIG(ADDR_SURF_P2) |
3170                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3171                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3172                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3173                                  PIPE_CONFIG(ADDR_SURF_P2) |
3174                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3175                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3176
3177                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3178                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3179                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3180                                 NUM_BANKS(ADDR_SURF_8_BANK));
3181                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3182                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3183                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3184                                 NUM_BANKS(ADDR_SURF_8_BANK));
3185                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3186                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3187                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3188                                 NUM_BANKS(ADDR_SURF_8_BANK));
3189                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3190                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3191                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3192                                 NUM_BANKS(ADDR_SURF_8_BANK));
3193                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3194                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3195                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3196                                 NUM_BANKS(ADDR_SURF_8_BANK));
3197                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3198                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3199                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3200                                 NUM_BANKS(ADDR_SURF_8_BANK));
3201                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3202                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3203                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3204                                 NUM_BANKS(ADDR_SURF_8_BANK));
3205                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3206                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3207                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3208                                 NUM_BANKS(ADDR_SURF_16_BANK));
3209                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3210                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3211                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3212                                 NUM_BANKS(ADDR_SURF_16_BANK));
3213                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3214                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3215                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3216                                  NUM_BANKS(ADDR_SURF_16_BANK));
3217                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3218                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3219                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3220                                  NUM_BANKS(ADDR_SURF_16_BANK));
3221                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3222                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3223                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3224                                  NUM_BANKS(ADDR_SURF_16_BANK));
3225                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3226                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3227                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3228                                  NUM_BANKS(ADDR_SURF_16_BANK));
3229                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3230                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3231                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3232                                  NUM_BANKS(ADDR_SURF_8_BANK));
3233
3234                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3235                         if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3236                             reg_offset != 23)
3237                                 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3238
3239                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3240                         if (reg_offset != 7)
3241                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3242
3243                 break;
3244         default:
3245                 dev_warn(adev->dev,
3246                          "Unknown chip type (%d) in function gfx_v8_0_tiling_mode_table_init() falling through to CHIP_CARRIZO\n",
3247                          adev->asic_type);
3248                 fallthrough;
3249
3250         case CHIP_CARRIZO:
3251                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3252                                 PIPE_CONFIG(ADDR_SURF_P2) |
3253                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3254                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3255                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3256                                 PIPE_CONFIG(ADDR_SURF_P2) |
3257                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3258                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3259                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3260                                 PIPE_CONFIG(ADDR_SURF_P2) |
3261                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3262                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3263                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3264                                 PIPE_CONFIG(ADDR_SURF_P2) |
3265                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3266                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3267                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3268                                 PIPE_CONFIG(ADDR_SURF_P2) |
3269                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3270                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3271                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3272                                 PIPE_CONFIG(ADDR_SURF_P2) |
3273                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3274                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3275                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3276                                 PIPE_CONFIG(ADDR_SURF_P2) |
3277                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3278                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3279                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3280                                 PIPE_CONFIG(ADDR_SURF_P2));
3281                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3282                                 PIPE_CONFIG(ADDR_SURF_P2) |
3283                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3284                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3285                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3286                                  PIPE_CONFIG(ADDR_SURF_P2) |
3287                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3288                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3289                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3290                                  PIPE_CONFIG(ADDR_SURF_P2) |
3291                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3292                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3293                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3294                                  PIPE_CONFIG(ADDR_SURF_P2) |
3295                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3296                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3297                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3298                                  PIPE_CONFIG(ADDR_SURF_P2) |
3299                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3300                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3301                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3302                                  PIPE_CONFIG(ADDR_SURF_P2) |
3303                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3304                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3305                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3306                                  PIPE_CONFIG(ADDR_SURF_P2) |
3307                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3308                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3309                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3310                                  PIPE_CONFIG(ADDR_SURF_P2) |
3311                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3312                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3313                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3314                                  PIPE_CONFIG(ADDR_SURF_P2) |
3315                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3316                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3317                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3318                                  PIPE_CONFIG(ADDR_SURF_P2) |
3319                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3320                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3321                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3322                                  PIPE_CONFIG(ADDR_SURF_P2) |
3323                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3324                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3325                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3326                                  PIPE_CONFIG(ADDR_SURF_P2) |
3327                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3328                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3329                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3330                                  PIPE_CONFIG(ADDR_SURF_P2) |
3331                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3332                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3333                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3334                                  PIPE_CONFIG(ADDR_SURF_P2) |
3335                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3336                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3337                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3338                                  PIPE_CONFIG(ADDR_SURF_P2) |
3339                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3340                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3341                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3342                                  PIPE_CONFIG(ADDR_SURF_P2) |
3343                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3344                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3345                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3346                                  PIPE_CONFIG(ADDR_SURF_P2) |
3347                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3348                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3349                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3350                                  PIPE_CONFIG(ADDR_SURF_P2) |
3351                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3352                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3353
3354                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3355                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3356                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3357                                 NUM_BANKS(ADDR_SURF_8_BANK));
3358                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3359                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3360                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3361                                 NUM_BANKS(ADDR_SURF_8_BANK));
3362                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3363                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3364                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3365                                 NUM_BANKS(ADDR_SURF_8_BANK));
3366                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3367                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3368                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3369                                 NUM_BANKS(ADDR_SURF_8_BANK));
3370                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3371                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3372                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3373                                 NUM_BANKS(ADDR_SURF_8_BANK));
3374                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3375                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3376                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3377                                 NUM_BANKS(ADDR_SURF_8_BANK));
3378                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3379                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3380                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3381                                 NUM_BANKS(ADDR_SURF_8_BANK));
3382                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3383                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3384                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3385                                 NUM_BANKS(ADDR_SURF_16_BANK));
3386                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3387                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3388                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3389                                 NUM_BANKS(ADDR_SURF_16_BANK));
3390                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3391                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3392                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3393                                  NUM_BANKS(ADDR_SURF_16_BANK));
3394                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3395                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3396                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3397                                  NUM_BANKS(ADDR_SURF_16_BANK));
3398                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3399                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3400                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3401                                  NUM_BANKS(ADDR_SURF_16_BANK));
3402                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3403                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3404                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3405                                  NUM_BANKS(ADDR_SURF_16_BANK));
3406                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3407                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3408                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3409                                  NUM_BANKS(ADDR_SURF_8_BANK));
3410
3411                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3412                         if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3413                             reg_offset != 23)
3414                                 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3415
3416                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3417                         if (reg_offset != 7)
3418                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3419
3420                 break;
3421         }
3422 }
3423
3424 static void gfx_v8_0_select_se_sh(struct amdgpu_device *adev,
3425                                   u32 se_num, u32 sh_num, u32 instance)
3426 {
3427         u32 data;
3428
3429         if (instance == 0xffffffff)
3430                 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
3431         else
3432                 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
3433
3434         if (se_num == 0xffffffff)
3435                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
3436         else
3437                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
3438
3439         if (sh_num == 0xffffffff)
3440                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
3441         else
3442                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
3443
3444         WREG32(mmGRBM_GFX_INDEX, data);
3445 }
3446
3447 static void gfx_v8_0_select_me_pipe_q(struct amdgpu_device *adev,
3448                                   u32 me, u32 pipe, u32 q, u32 vm)
3449 {
3450         vi_srbm_select(adev, me, pipe, q, vm);
3451 }
3452
3453 static u32 gfx_v8_0_get_rb_active_bitmap(struct amdgpu_device *adev)
3454 {
3455         u32 data, mask;
3456
3457         data =  RREG32(mmCC_RB_BACKEND_DISABLE) |
3458                 RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3459
3460         data = REG_GET_FIELD(data, GC_USER_RB_BACKEND_DISABLE, BACKEND_DISABLE);
3461
3462         mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se /
3463                                          adev->gfx.config.max_sh_per_se);
3464
3465         return (~data) & mask;
3466 }
3467
3468 static void
3469 gfx_v8_0_raster_config(struct amdgpu_device *adev, u32 *rconf, u32 *rconf1)
3470 {
3471         switch (adev->asic_type) {
3472         case CHIP_FIJI:
3473         case CHIP_VEGAM:
3474                 *rconf |= RB_MAP_PKR0(2) | RB_MAP_PKR1(2) |
3475                           RB_XSEL2(1) | PKR_MAP(2) |
3476                           PKR_XSEL(1) | PKR_YSEL(1) |
3477                           SE_MAP(2) | SE_XSEL(2) | SE_YSEL(3);
3478                 *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(3) |
3479                            SE_PAIR_YSEL(2);
3480                 break;
3481         case CHIP_TONGA:
3482         case CHIP_POLARIS10:
3483                 *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3484                           SE_XSEL(1) | SE_YSEL(1);
3485                 *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(2) |
3486                            SE_PAIR_YSEL(2);
3487                 break;
3488         case CHIP_TOPAZ:
3489         case CHIP_CARRIZO:
3490                 *rconf |= RB_MAP_PKR0(2);
3491                 *rconf1 |= 0x0;
3492                 break;
3493         case CHIP_POLARIS11:
3494         case CHIP_POLARIS12:
3495                 *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3496                           SE_XSEL(1) | SE_YSEL(1);
3497                 *rconf1 |= 0x0;
3498                 break;
3499         case CHIP_STONEY:
3500                 *rconf |= 0x0;
3501                 *rconf1 |= 0x0;
3502                 break;
3503         default:
3504                 DRM_ERROR("unknown asic: 0x%x\n", adev->asic_type);
3505                 break;
3506         }
3507 }
3508
3509 static void
3510 gfx_v8_0_write_harvested_raster_configs(struct amdgpu_device *adev,
3511                                         u32 raster_config, u32 raster_config_1,
3512                                         unsigned rb_mask, unsigned num_rb)
3513 {
3514         unsigned sh_per_se = max_t(unsigned, adev->gfx.config.max_sh_per_se, 1);
3515         unsigned num_se = max_t(unsigned, adev->gfx.config.max_shader_engines, 1);
3516         unsigned rb_per_pkr = min_t(unsigned, num_rb / num_se / sh_per_se, 2);
3517         unsigned rb_per_se = num_rb / num_se;
3518         unsigned se_mask[4];
3519         unsigned se;
3520
3521         se_mask[0] = ((1 << rb_per_se) - 1) & rb_mask;
3522         se_mask[1] = (se_mask[0] << rb_per_se) & rb_mask;
3523         se_mask[2] = (se_mask[1] << rb_per_se) & rb_mask;
3524         se_mask[3] = (se_mask[2] << rb_per_se) & rb_mask;
3525
3526         WARN_ON(!(num_se == 1 || num_se == 2 || num_se == 4));
3527         WARN_ON(!(sh_per_se == 1 || sh_per_se == 2));
3528         WARN_ON(!(rb_per_pkr == 1 || rb_per_pkr == 2));
3529
3530         if ((num_se > 2) && ((!se_mask[0] && !se_mask[1]) ||
3531                              (!se_mask[2] && !se_mask[3]))) {
3532                 raster_config_1 &= ~SE_PAIR_MAP_MASK;
3533
3534                 if (!se_mask[0] && !se_mask[1]) {
3535                         raster_config_1 |=
3536                                 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_3);
3537                 } else {
3538                         raster_config_1 |=
3539                                 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_0);
3540                 }
3541         }
3542
3543         for (se = 0; se < num_se; se++) {
3544                 unsigned raster_config_se = raster_config;
3545                 unsigned pkr0_mask = ((1 << rb_per_pkr) - 1) << (se * rb_per_se);
3546                 unsigned pkr1_mask = pkr0_mask << rb_per_pkr;
3547                 int idx = (se / 2) * 2;
3548
3549                 if ((num_se > 1) && (!se_mask[idx] || !se_mask[idx + 1])) {
3550                         raster_config_se &= ~SE_MAP_MASK;
3551
3552                         if (!se_mask[idx]) {
3553                                 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_3);
3554                         } else {
3555                                 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_0);
3556                         }
3557                 }
3558
3559                 pkr0_mask &= rb_mask;
3560                 pkr1_mask &= rb_mask;
3561                 if (rb_per_se > 2 && (!pkr0_mask || !pkr1_mask)) {
3562                         raster_config_se &= ~PKR_MAP_MASK;
3563
3564                         if (!pkr0_mask) {
3565                                 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_3);
3566                         } else {
3567                                 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_0);
3568                         }
3569                 }
3570
3571                 if (rb_per_se >= 2) {
3572                         unsigned rb0_mask = 1 << (se * rb_per_se);
3573                         unsigned rb1_mask = rb0_mask << 1;
3574
3575                         rb0_mask &= rb_mask;
3576                         rb1_mask &= rb_mask;
3577                         if (!rb0_mask || !rb1_mask) {
3578                                 raster_config_se &= ~RB_MAP_PKR0_MASK;
3579
3580                                 if (!rb0_mask) {
3581                                         raster_config_se |=
3582                                                 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_3);
3583                                 } else {
3584                                         raster_config_se |=
3585                                                 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_0);
3586                                 }
3587                         }
3588
3589                         if (rb_per_se > 2) {
3590                                 rb0_mask = 1 << (se * rb_per_se + rb_per_pkr);
3591                                 rb1_mask = rb0_mask << 1;
3592                                 rb0_mask &= rb_mask;
3593                                 rb1_mask &= rb_mask;
3594                                 if (!rb0_mask || !rb1_mask) {
3595                                         raster_config_se &= ~RB_MAP_PKR1_MASK;
3596
3597                                         if (!rb0_mask) {
3598                                                 raster_config_se |=
3599                                                         RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_3);
3600                                         } else {
3601                                                 raster_config_se |=
3602                                                         RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_0);
3603                                         }
3604                                 }
3605                         }
3606                 }
3607
3608                 /* GRBM_GFX_INDEX has a different offset on VI */
3609                 gfx_v8_0_select_se_sh(adev, se, 0xffffffff, 0xffffffff);
3610                 WREG32(mmPA_SC_RASTER_CONFIG, raster_config_se);
3611                 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3612         }
3613
3614         /* GRBM_GFX_INDEX has a different offset on VI */
3615         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3616 }
3617
3618 static void gfx_v8_0_setup_rb(struct amdgpu_device *adev)
3619 {
3620         int i, j;
3621         u32 data;
3622         u32 raster_config = 0, raster_config_1 = 0;
3623         u32 active_rbs = 0;
3624         u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
3625                                         adev->gfx.config.max_sh_per_se;
3626         unsigned num_rb_pipes;
3627
3628         mutex_lock(&adev->grbm_idx_mutex);
3629         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3630                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3631                         gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3632                         data = gfx_v8_0_get_rb_active_bitmap(adev);
3633                         active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
3634                                                rb_bitmap_width_per_sh);
3635                 }
3636         }
3637         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3638
3639         adev->gfx.config.backend_enable_mask = active_rbs;
3640         adev->gfx.config.num_rbs = hweight32(active_rbs);
3641
3642         num_rb_pipes = min_t(unsigned, adev->gfx.config.max_backends_per_se *
3643                              adev->gfx.config.max_shader_engines, 16);
3644
3645         gfx_v8_0_raster_config(adev, &raster_config, &raster_config_1);
3646
3647         if (!adev->gfx.config.backend_enable_mask ||
3648                         adev->gfx.config.num_rbs >= num_rb_pipes) {
3649                 WREG32(mmPA_SC_RASTER_CONFIG, raster_config);
3650                 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3651         } else {
3652                 gfx_v8_0_write_harvested_raster_configs(adev, raster_config, raster_config_1,
3653                                                         adev->gfx.config.backend_enable_mask,
3654                                                         num_rb_pipes);
3655         }
3656
3657         /* cache the values for userspace */
3658         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3659                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3660                         gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3661                         adev->gfx.config.rb_config[i][j].rb_backend_disable =
3662                                 RREG32(mmCC_RB_BACKEND_DISABLE);
3663                         adev->gfx.config.rb_config[i][j].user_rb_backend_disable =
3664                                 RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3665                         adev->gfx.config.rb_config[i][j].raster_config =
3666                                 RREG32(mmPA_SC_RASTER_CONFIG);
3667                         adev->gfx.config.rb_config[i][j].raster_config_1 =
3668                                 RREG32(mmPA_SC_RASTER_CONFIG_1);
3669                 }
3670         }
3671         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3672         mutex_unlock(&adev->grbm_idx_mutex);
3673 }
3674
3675 #define DEFAULT_SH_MEM_BASES    (0x6000)
3676 /**
3677  * gfx_v8_0_init_compute_vmid - gart enable
3678  *
3679  * @adev: amdgpu_device pointer
3680  *
3681  * Initialize compute vmid sh_mem registers
3682  *
3683  */
3684 static void gfx_v8_0_init_compute_vmid(struct amdgpu_device *adev)
3685 {
3686         int i;
3687         uint32_t sh_mem_config;
3688         uint32_t sh_mem_bases;
3689
3690         /*
3691          * Configure apertures:
3692          * LDS:         0x60000000'00000000 - 0x60000001'00000000 (4GB)
3693          * Scratch:     0x60000001'00000000 - 0x60000002'00000000 (4GB)
3694          * GPUVM:       0x60010000'00000000 - 0x60020000'00000000 (1TB)
3695          */
3696         sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
3697
3698         sh_mem_config = SH_MEM_ADDRESS_MODE_HSA64 <<
3699                         SH_MEM_CONFIG__ADDRESS_MODE__SHIFT |
3700                         SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
3701                         SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT |
3702                         MTYPE_CC << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT |
3703                         SH_MEM_CONFIG__PRIVATE_ATC_MASK;
3704
3705         mutex_lock(&adev->srbm_mutex);
3706         for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
3707                 vi_srbm_select(adev, 0, 0, 0, i);
3708                 /* CP and shaders */
3709                 WREG32(mmSH_MEM_CONFIG, sh_mem_config);
3710                 WREG32(mmSH_MEM_APE1_BASE, 1);
3711                 WREG32(mmSH_MEM_APE1_LIMIT, 0);
3712                 WREG32(mmSH_MEM_BASES, sh_mem_bases);
3713         }
3714         vi_srbm_select(adev, 0, 0, 0, 0);
3715         mutex_unlock(&adev->srbm_mutex);
3716
3717         /* Initialize all compute VMIDs to have no GDS, GWS, or OA
3718            access. These should be enabled by FW for target VMIDs. */
3719         for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
3720                 WREG32(amdgpu_gds_reg_offset[i].mem_base, 0);
3721                 WREG32(amdgpu_gds_reg_offset[i].mem_size, 0);
3722                 WREG32(amdgpu_gds_reg_offset[i].gws, 0);
3723                 WREG32(amdgpu_gds_reg_offset[i].oa, 0);
3724         }
3725 }
3726
3727 static void gfx_v8_0_init_gds_vmid(struct amdgpu_device *adev)
3728 {
3729         int vmid;
3730
3731         /*
3732          * Initialize all compute and user-gfx VMIDs to have no GDS, GWS, or OA
3733          * access. Compute VMIDs should be enabled by FW for target VMIDs,
3734          * the driver can enable them for graphics. VMID0 should maintain
3735          * access so that HWS firmware can save/restore entries.
3736          */
3737         for (vmid = 1; vmid < AMDGPU_NUM_VMID; vmid++) {
3738                 WREG32(amdgpu_gds_reg_offset[vmid].mem_base, 0);
3739                 WREG32(amdgpu_gds_reg_offset[vmid].mem_size, 0);
3740                 WREG32(amdgpu_gds_reg_offset[vmid].gws, 0);
3741                 WREG32(amdgpu_gds_reg_offset[vmid].oa, 0);
3742         }
3743 }
3744
3745 static void gfx_v8_0_config_init(struct amdgpu_device *adev)
3746 {
3747         switch (adev->asic_type) {
3748         default:
3749                 adev->gfx.config.double_offchip_lds_buf = 1;
3750                 break;
3751         case CHIP_CARRIZO:
3752         case CHIP_STONEY:
3753                 adev->gfx.config.double_offchip_lds_buf = 0;
3754                 break;
3755         }
3756 }
3757
3758 static void gfx_v8_0_constants_init(struct amdgpu_device *adev)
3759 {
3760         u32 tmp, sh_static_mem_cfg;
3761         int i;
3762
3763         WREG32_FIELD(GRBM_CNTL, READ_TIMEOUT, 0xFF);
3764         WREG32(mmGB_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3765         WREG32(mmHDP_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3766         WREG32(mmDMIF_ADDR_CALC, adev->gfx.config.gb_addr_config);
3767
3768         gfx_v8_0_tiling_mode_table_init(adev);
3769         gfx_v8_0_setup_rb(adev);
3770         gfx_v8_0_get_cu_info(adev);
3771         gfx_v8_0_config_init(adev);
3772
3773         /* XXX SH_MEM regs */
3774         /* where to put LDS, scratch, GPUVM in FSA64 space */
3775         sh_static_mem_cfg = REG_SET_FIELD(0, SH_STATIC_MEM_CONFIG,
3776                                    SWIZZLE_ENABLE, 1);
3777         sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG,
3778                                    ELEMENT_SIZE, 1);
3779         sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG,
3780                                    INDEX_STRIDE, 3);
3781         WREG32(mmSH_STATIC_MEM_CONFIG, sh_static_mem_cfg);
3782
3783         mutex_lock(&adev->srbm_mutex);
3784         for (i = 0; i < adev->vm_manager.id_mgr[0].num_ids; i++) {
3785                 vi_srbm_select(adev, 0, 0, 0, i);
3786                 /* CP and shaders */
3787                 if (i == 0) {
3788                         tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_UC);
3789                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
3790                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3791                                             SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3792                         WREG32(mmSH_MEM_CONFIG, tmp);
3793                         WREG32(mmSH_MEM_BASES, 0);
3794                 } else {
3795                         tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_NC);
3796                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
3797                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3798                                             SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3799                         WREG32(mmSH_MEM_CONFIG, tmp);
3800                         tmp = adev->gmc.shared_aperture_start >> 48;
3801                         WREG32(mmSH_MEM_BASES, tmp);
3802                 }
3803
3804                 WREG32(mmSH_MEM_APE1_BASE, 1);
3805                 WREG32(mmSH_MEM_APE1_LIMIT, 0);
3806         }
3807         vi_srbm_select(adev, 0, 0, 0, 0);
3808         mutex_unlock(&adev->srbm_mutex);
3809
3810         gfx_v8_0_init_compute_vmid(adev);
3811         gfx_v8_0_init_gds_vmid(adev);
3812
3813         mutex_lock(&adev->grbm_idx_mutex);
3814         /*
3815          * making sure that the following register writes will be broadcasted
3816          * to all the shaders
3817          */
3818         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3819
3820         WREG32(mmPA_SC_FIFO_SIZE,
3821                    (adev->gfx.config.sc_prim_fifo_size_frontend <<
3822                         PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) |
3823                    (adev->gfx.config.sc_prim_fifo_size_backend <<
3824                         PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT) |
3825                    (adev->gfx.config.sc_hiz_tile_fifo_size <<
3826                         PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) |
3827                    (adev->gfx.config.sc_earlyz_tile_fifo_size <<
3828                         PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT));
3829
3830         tmp = RREG32(mmSPI_ARB_PRIORITY);
3831         tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS0, 2);
3832         tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS1, 2);
3833         tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS2, 2);
3834         tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS3, 2);
3835         WREG32(mmSPI_ARB_PRIORITY, tmp);
3836
3837         mutex_unlock(&adev->grbm_idx_mutex);
3838
3839 }
3840
3841 static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
3842 {
3843         u32 i, j, k;
3844         u32 mask;
3845
3846         mutex_lock(&adev->grbm_idx_mutex);
3847         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3848                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3849                         gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3850                         for (k = 0; k < adev->usec_timeout; k++) {
3851                                 if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0)
3852                                         break;
3853                                 udelay(1);
3854                         }
3855                         if (k == adev->usec_timeout) {
3856                                 gfx_v8_0_select_se_sh(adev, 0xffffffff,
3857                                                       0xffffffff, 0xffffffff);
3858                                 mutex_unlock(&adev->grbm_idx_mutex);
3859                                 DRM_INFO("Timeout wait for RLC serdes %u,%u\n",
3860                                          i, j);
3861                                 return;
3862                         }
3863                 }
3864         }
3865         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3866         mutex_unlock(&adev->grbm_idx_mutex);
3867
3868         mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
3869                 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
3870                 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
3871                 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
3872         for (k = 0; k < adev->usec_timeout; k++) {
3873                 if ((RREG32(mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
3874                         break;
3875                 udelay(1);
3876         }
3877 }
3878
3879 static void gfx_v8_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
3880                                                bool enable)
3881 {
3882         u32 tmp = RREG32(mmCP_INT_CNTL_RING0);
3883
3884         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
3885         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
3886         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
3887         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
3888
3889         WREG32(mmCP_INT_CNTL_RING0, tmp);
3890 }
3891
3892 static void gfx_v8_0_init_csb(struct amdgpu_device *adev)
3893 {
3894         adev->gfx.rlc.funcs->get_csb_buffer(adev, adev->gfx.rlc.cs_ptr);
3895         /* csib */
3896         WREG32(mmRLC_CSIB_ADDR_HI,
3897                         adev->gfx.rlc.clear_state_gpu_addr >> 32);
3898         WREG32(mmRLC_CSIB_ADDR_LO,
3899                         adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
3900         WREG32(mmRLC_CSIB_LENGTH,
3901                         adev->gfx.rlc.clear_state_size);
3902 }
3903
3904 static void gfx_v8_0_parse_ind_reg_list(int *register_list_format,
3905                                 int ind_offset,
3906                                 int list_size,
3907                                 int *unique_indices,
3908                                 int *indices_count,
3909                                 int max_indices,
3910                                 int *ind_start_offsets,
3911                                 int *offset_count,
3912                                 int max_offset)
3913 {
3914         int indices;
3915         bool new_entry = true;
3916
3917         for (; ind_offset < list_size; ind_offset++) {
3918
3919                 if (new_entry) {
3920                         new_entry = false;
3921                         ind_start_offsets[*offset_count] = ind_offset;
3922                         *offset_count = *offset_count + 1;
3923                         BUG_ON(*offset_count >= max_offset);
3924                 }
3925
3926                 if (register_list_format[ind_offset] == 0xFFFFFFFF) {
3927                         new_entry = true;
3928                         continue;
3929                 }
3930
3931                 ind_offset += 2;
3932
3933                 /* look for the matching indice */
3934                 for (indices = 0;
3935                         indices < *indices_count;
3936                         indices++) {
3937                         if (unique_indices[indices] ==
3938                                 register_list_format[ind_offset])
3939                                 break;
3940                 }
3941
3942                 if (indices >= *indices_count) {
3943                         unique_indices[*indices_count] =
3944                                 register_list_format[ind_offset];
3945                         indices = *indices_count;
3946                         *indices_count = *indices_count + 1;
3947                         BUG_ON(*indices_count >= max_indices);
3948                 }
3949
3950                 register_list_format[ind_offset] = indices;
3951         }
3952 }
3953
3954 static int gfx_v8_0_init_save_restore_list(struct amdgpu_device *adev)
3955 {
3956         int i, temp, data;
3957         int unique_indices[] = {0, 0, 0, 0, 0, 0, 0, 0};
3958         int indices_count = 0;
3959         int indirect_start_offsets[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
3960         int offset_count = 0;
3961
3962         int list_size;
3963         unsigned int *register_list_format =
3964                 kmemdup(adev->gfx.rlc.register_list_format,
3965                         adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
3966         if (!register_list_format)
3967                 return -ENOMEM;
3968
3969         gfx_v8_0_parse_ind_reg_list(register_list_format,
3970                                 RLC_FormatDirectRegListLength,
3971                                 adev->gfx.rlc.reg_list_format_size_bytes >> 2,
3972                                 unique_indices,
3973                                 &indices_count,
3974                                 ARRAY_SIZE(unique_indices),
3975                                 indirect_start_offsets,
3976                                 &offset_count,
3977                                 ARRAY_SIZE(indirect_start_offsets));
3978
3979         /* save and restore list */
3980         WREG32_FIELD(RLC_SRM_CNTL, AUTO_INCR_ADDR, 1);
3981
3982         WREG32(mmRLC_SRM_ARAM_ADDR, 0);
3983         for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
3984                 WREG32(mmRLC_SRM_ARAM_DATA, adev->gfx.rlc.register_restore[i]);
3985
3986         /* indirect list */
3987         WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_list_format_start);
3988         for (i = 0; i < adev->gfx.rlc.reg_list_format_size_bytes >> 2; i++)
3989                 WREG32(mmRLC_GPM_SCRATCH_DATA, register_list_format[i]);
3990
3991         list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
3992         list_size = list_size >> 1;
3993         WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_restore_list_size);
3994         WREG32(mmRLC_GPM_SCRATCH_DATA, list_size);
3995
3996         /* starting offsets starts */
3997         WREG32(mmRLC_GPM_SCRATCH_ADDR,
3998                 adev->gfx.rlc.starting_offsets_start);
3999         for (i = 0; i < ARRAY_SIZE(indirect_start_offsets); i++)
4000                 WREG32(mmRLC_GPM_SCRATCH_DATA,
4001                                 indirect_start_offsets[i]);
4002
4003         /* unique indices */
4004         temp = mmRLC_SRM_INDEX_CNTL_ADDR_0;
4005         data = mmRLC_SRM_INDEX_CNTL_DATA_0;
4006         for (i = 0; i < ARRAY_SIZE(unique_indices); i++) {
4007                 if (unique_indices[i] != 0) {
4008                         WREG32(temp + i, unique_indices[i] & 0x3FFFF);
4009                         WREG32(data + i, unique_indices[i] >> 20);
4010                 }
4011         }
4012         kfree(register_list_format);
4013
4014         return 0;
4015 }
4016
4017 static void gfx_v8_0_enable_save_restore_machine(struct amdgpu_device *adev)
4018 {
4019         WREG32_FIELD(RLC_SRM_CNTL, SRM_ENABLE, 1);
4020 }
4021
4022 static void gfx_v8_0_init_power_gating(struct amdgpu_device *adev)
4023 {
4024         uint32_t data;
4025
4026         WREG32_FIELD(CP_RB_WPTR_POLL_CNTL, IDLE_POLL_COUNT, 0x60);
4027
4028         data = REG_SET_FIELD(0, RLC_PG_DELAY, POWER_UP_DELAY, 0x10);
4029         data = REG_SET_FIELD(data, RLC_PG_DELAY, POWER_DOWN_DELAY, 0x10);
4030         data = REG_SET_FIELD(data, RLC_PG_DELAY, CMD_PROPAGATE_DELAY, 0x10);
4031         data = REG_SET_FIELD(data, RLC_PG_DELAY, MEM_SLEEP_DELAY, 0x10);
4032         WREG32(mmRLC_PG_DELAY, data);
4033
4034         WREG32_FIELD(RLC_PG_DELAY_2, SERDES_CMD_DELAY, 0x3);
4035         WREG32_FIELD(RLC_AUTO_PG_CTRL, GRBM_REG_SAVE_GFX_IDLE_THRESHOLD, 0x55f0);
4036
4037 }
4038
4039 static void cz_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
4040                                                 bool enable)
4041 {
4042         WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PU_ENABLE, enable ? 1 : 0);
4043 }
4044
4045 static void cz_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
4046                                                   bool enable)
4047 {
4048         WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PD_ENABLE, enable ? 1 : 0);
4049 }
4050
4051 static void cz_enable_cp_power_gating(struct amdgpu_device *adev, bool enable)
4052 {
4053         WREG32_FIELD(RLC_PG_CNTL, CP_PG_DISABLE, enable ? 0 : 1);
4054 }
4055
4056 static void gfx_v8_0_init_pg(struct amdgpu_device *adev)
4057 {
4058         if ((adev->asic_type == CHIP_CARRIZO) ||
4059             (adev->asic_type == CHIP_STONEY)) {
4060                 gfx_v8_0_init_csb(adev);
4061                 gfx_v8_0_init_save_restore_list(adev);
4062                 gfx_v8_0_enable_save_restore_machine(adev);
4063                 WREG32(mmRLC_JUMP_TABLE_RESTORE, adev->gfx.rlc.cp_table_gpu_addr >> 8);
4064                 gfx_v8_0_init_power_gating(adev);
4065                 WREG32(mmRLC_PG_ALWAYS_ON_CU_MASK, adev->gfx.cu_info.ao_cu_mask);
4066         } else if ((adev->asic_type == CHIP_POLARIS11) ||
4067                    (adev->asic_type == CHIP_POLARIS12) ||
4068                    (adev->asic_type == CHIP_VEGAM)) {
4069                 gfx_v8_0_init_csb(adev);
4070                 gfx_v8_0_init_save_restore_list(adev);
4071                 gfx_v8_0_enable_save_restore_machine(adev);
4072                 gfx_v8_0_init_power_gating(adev);
4073         }
4074
4075 }
4076
4077 static void gfx_v8_0_rlc_stop(struct amdgpu_device *adev)
4078 {
4079         WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 0);
4080
4081         gfx_v8_0_enable_gui_idle_interrupt(adev, false);
4082         gfx_v8_0_wait_for_rlc_serdes(adev);
4083 }
4084
4085 static void gfx_v8_0_rlc_reset(struct amdgpu_device *adev)
4086 {
4087         WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
4088         udelay(50);
4089
4090         WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
4091         udelay(50);
4092 }
4093
4094 static void gfx_v8_0_rlc_start(struct amdgpu_device *adev)
4095 {
4096         WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 1);
4097
4098         /* carrizo do enable cp interrupt after cp inited */
4099         if (!(adev->flags & AMD_IS_APU))
4100                 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
4101
4102         udelay(50);
4103 }
4104
4105 static int gfx_v8_0_rlc_resume(struct amdgpu_device *adev)
4106 {
4107         if (amdgpu_sriov_vf(adev)) {
4108                 gfx_v8_0_init_csb(adev);
4109                 return 0;
4110         }
4111
4112         adev->gfx.rlc.funcs->stop(adev);
4113         adev->gfx.rlc.funcs->reset(adev);
4114         gfx_v8_0_init_pg(adev);
4115         adev->gfx.rlc.funcs->start(adev);
4116
4117         return 0;
4118 }
4119
4120 static void gfx_v8_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
4121 {
4122         u32 tmp = RREG32(mmCP_ME_CNTL);
4123
4124         if (enable) {
4125                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 0);
4126                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 0);
4127                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 0);
4128         } else {
4129                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 1);
4130                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 1);
4131                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 1);
4132         }
4133         WREG32(mmCP_ME_CNTL, tmp);
4134         udelay(50);
4135 }
4136
4137 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev)
4138 {
4139         u32 count = 0;
4140         const struct cs_section_def *sect = NULL;
4141         const struct cs_extent_def *ext = NULL;
4142
4143         /* begin clear state */
4144         count += 2;
4145         /* context control state */
4146         count += 3;
4147
4148         for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4149                 for (ext = sect->section; ext->extent != NULL; ++ext) {
4150                         if (sect->id == SECT_CONTEXT)
4151                                 count += 2 + ext->reg_count;
4152                         else
4153                                 return 0;
4154                 }
4155         }
4156         /* pa_sc_raster_config/pa_sc_raster_config1 */
4157         count += 4;
4158         /* end clear state */
4159         count += 2;
4160         /* clear state */
4161         count += 2;
4162
4163         return count;
4164 }
4165
4166 static int gfx_v8_0_cp_gfx_start(struct amdgpu_device *adev)
4167 {
4168         struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
4169         const struct cs_section_def *sect = NULL;
4170         const struct cs_extent_def *ext = NULL;
4171         int r, i;
4172
4173         /* init the CP */
4174         WREG32(mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
4175         WREG32(mmCP_ENDIAN_SWAP, 0);
4176         WREG32(mmCP_DEVICE_ID, 1);
4177
4178         gfx_v8_0_cp_gfx_enable(adev, true);
4179
4180         r = amdgpu_ring_alloc(ring, gfx_v8_0_get_csb_size(adev) + 4);
4181         if (r) {
4182                 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
4183                 return r;
4184         }
4185
4186         /* clear state buffer */
4187         amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4188         amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
4189
4190         amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4191         amdgpu_ring_write(ring, 0x80000000);
4192         amdgpu_ring_write(ring, 0x80000000);
4193
4194         for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4195                 for (ext = sect->section; ext->extent != NULL; ++ext) {
4196                         if (sect->id == SECT_CONTEXT) {
4197                                 amdgpu_ring_write(ring,
4198                                        PACKET3(PACKET3_SET_CONTEXT_REG,
4199                                                ext->reg_count));
4200                                 amdgpu_ring_write(ring,
4201                                        ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
4202                                 for (i = 0; i < ext->reg_count; i++)
4203                                         amdgpu_ring_write(ring, ext->extent[i]);
4204                         }
4205                 }
4206         }
4207
4208         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
4209         amdgpu_ring_write(ring, mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
4210         amdgpu_ring_write(ring, adev->gfx.config.rb_config[0][0].raster_config);
4211         amdgpu_ring_write(ring, adev->gfx.config.rb_config[0][0].raster_config_1);
4212
4213         amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4214         amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
4215
4216         amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
4217         amdgpu_ring_write(ring, 0);
4218
4219         /* init the CE partitions */
4220         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
4221         amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
4222         amdgpu_ring_write(ring, 0x8000);
4223         amdgpu_ring_write(ring, 0x8000);
4224
4225         amdgpu_ring_commit(ring);
4226
4227         return 0;
4228 }
4229 static void gfx_v8_0_set_cpg_door_bell(struct amdgpu_device *adev, struct amdgpu_ring *ring)
4230 {
4231         u32 tmp;
4232         /* no gfx doorbells on iceland */
4233         if (adev->asic_type == CHIP_TOPAZ)
4234                 return;
4235
4236         tmp = RREG32(mmCP_RB_DOORBELL_CONTROL);
4237
4238         if (ring->use_doorbell) {
4239                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4240                                 DOORBELL_OFFSET, ring->doorbell_index);
4241                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4242                                                 DOORBELL_HIT, 0);
4243                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4244                                             DOORBELL_EN, 1);
4245         } else {
4246                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0);
4247         }
4248
4249         WREG32(mmCP_RB_DOORBELL_CONTROL, tmp);
4250
4251         if (adev->flags & AMD_IS_APU)
4252                 return;
4253
4254         tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
4255                                         DOORBELL_RANGE_LOWER,
4256                                         adev->doorbell_index.gfx_ring0);
4257         WREG32(mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
4258
4259         WREG32(mmCP_RB_DOORBELL_RANGE_UPPER,
4260                 CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
4261 }
4262
4263 static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev)
4264 {
4265         struct amdgpu_ring *ring;
4266         u32 tmp;
4267         u32 rb_bufsz;
4268         u64 rb_addr, rptr_addr, wptr_gpu_addr;
4269
4270         /* Set the write pointer delay */
4271         WREG32(mmCP_RB_WPTR_DELAY, 0);
4272
4273         /* set the RB to use vmid 0 */
4274         WREG32(mmCP_RB_VMID, 0);
4275
4276         /* Set ring buffer size */
4277         ring = &adev->gfx.gfx_ring[0];
4278         rb_bufsz = order_base_2(ring->ring_size / 8);
4279         tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
4280         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
4281         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MTYPE, 3);
4282         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MIN_IB_AVAILSZ, 1);
4283 #ifdef __BIG_ENDIAN
4284         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
4285 #endif
4286         WREG32(mmCP_RB0_CNTL, tmp);
4287
4288         /* Initialize the ring buffer's read and write pointers */
4289         WREG32(mmCP_RB0_CNTL, tmp | CP_RB0_CNTL__RB_RPTR_WR_ENA_MASK);
4290         ring->wptr = 0;
4291         WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
4292
4293         /* set the wb address wether it's enabled or not */
4294         rptr_addr = ring->rptr_gpu_addr;
4295         WREG32(mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
4296         WREG32(mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 0xFF);
4297
4298         wptr_gpu_addr = ring->wptr_gpu_addr;
4299         WREG32(mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr));
4300         WREG32(mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr));
4301         mdelay(1);
4302         WREG32(mmCP_RB0_CNTL, tmp);
4303
4304         rb_addr = ring->gpu_addr >> 8;
4305         WREG32(mmCP_RB0_BASE, rb_addr);
4306         WREG32(mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
4307
4308         gfx_v8_0_set_cpg_door_bell(adev, ring);
4309         /* start the ring */
4310         amdgpu_ring_clear_ring(ring);
4311         gfx_v8_0_cp_gfx_start(adev);
4312         ring->sched.ready = true;
4313
4314         return 0;
4315 }
4316
4317 static void gfx_v8_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
4318 {
4319         if (enable) {
4320                 WREG32(mmCP_MEC_CNTL, 0);
4321         } else {
4322                 WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
4323                 adev->gfx.kiq.ring.sched.ready = false;
4324         }
4325         udelay(50);
4326 }
4327
4328 /* KIQ functions */
4329 static void gfx_v8_0_kiq_setting(struct amdgpu_ring *ring)
4330 {
4331         uint32_t tmp;
4332         struct amdgpu_device *adev = ring->adev;
4333
4334         /* tell RLC which is KIQ queue */
4335         tmp = RREG32(mmRLC_CP_SCHEDULERS);
4336         tmp &= 0xffffff00;
4337         tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
4338         WREG32(mmRLC_CP_SCHEDULERS, tmp);
4339         tmp |= 0x80;
4340         WREG32(mmRLC_CP_SCHEDULERS, tmp);
4341 }
4342
4343 static int gfx_v8_0_kiq_kcq_enable(struct amdgpu_device *adev)
4344 {
4345         struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
4346         uint64_t queue_mask = 0;
4347         int r, i;
4348
4349         for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) {
4350                 if (!test_bit(i, adev->gfx.mec.queue_bitmap))
4351                         continue;
4352
4353                 /* This situation may be hit in the future if a new HW
4354                  * generation exposes more than 64 queues. If so, the
4355                  * definition of queue_mask needs updating */
4356                 if (WARN_ON(i >= (sizeof(queue_mask)*8))) {
4357                         DRM_ERROR("Invalid KCQ enabled: %d\n", i);
4358                         break;
4359                 }
4360
4361                 queue_mask |= (1ull << i);
4362         }
4363
4364         r = amdgpu_ring_alloc(kiq_ring, (8 * adev->gfx.num_compute_rings) + 8);
4365         if (r) {
4366                 DRM_ERROR("Failed to lock KIQ (%d).\n", r);
4367                 return r;
4368         }
4369         /* set resources */
4370         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
4371         amdgpu_ring_write(kiq_ring, 0); /* vmid_mask:0 queue_type:0 (KIQ) */
4372         amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask)); /* queue mask lo */
4373         amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask)); /* queue mask hi */
4374         amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */
4375         amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */
4376         amdgpu_ring_write(kiq_ring, 0); /* oac mask */
4377         amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */
4378         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4379                 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4380                 uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
4381                 uint64_t wptr_addr = ring->wptr_gpu_addr;
4382
4383                 /* map queues */
4384                 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
4385                 /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
4386                 amdgpu_ring_write(kiq_ring,
4387                                   PACKET3_MAP_QUEUES_NUM_QUEUES(1));
4388                 amdgpu_ring_write(kiq_ring,
4389                                   PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index) |
4390                                   PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
4391                                   PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
4392                                   PACKET3_MAP_QUEUES_ME(ring->me == 1 ? 0 : 1)); /* doorbell */
4393                 amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
4394                 amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
4395                 amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
4396                 amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
4397         }
4398
4399         amdgpu_ring_commit(kiq_ring);
4400
4401         return 0;
4402 }
4403
4404 static int gfx_v8_0_deactivate_hqd(struct amdgpu_device *adev, u32 req)
4405 {
4406         int i, r = 0;
4407
4408         if (RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK) {
4409                 WREG32_FIELD(CP_HQD_DEQUEUE_REQUEST, DEQUEUE_REQ, req);
4410                 for (i = 0; i < adev->usec_timeout; i++) {
4411                         if (!(RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK))
4412                                 break;
4413                         udelay(1);
4414                 }
4415                 if (i == adev->usec_timeout)
4416                         r = -ETIMEDOUT;
4417         }
4418         WREG32(mmCP_HQD_DEQUEUE_REQUEST, 0);
4419         WREG32(mmCP_HQD_PQ_RPTR, 0);
4420         WREG32(mmCP_HQD_PQ_WPTR, 0);
4421
4422         return r;
4423 }
4424
4425 static void gfx_v8_0_mqd_set_priority(struct amdgpu_ring *ring, struct vi_mqd *mqd)
4426 {
4427         struct amdgpu_device *adev = ring->adev;
4428
4429         if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
4430                 if (amdgpu_gfx_is_high_priority_compute_queue(adev, ring)) {
4431                         mqd->cp_hqd_pipe_priority = AMDGPU_GFX_PIPE_PRIO_HIGH;
4432                         mqd->cp_hqd_queue_priority =
4433                                 AMDGPU_GFX_QUEUE_PRIORITY_MAXIMUM;
4434                 }
4435         }
4436 }
4437
4438 static int gfx_v8_0_mqd_init(struct amdgpu_ring *ring)
4439 {
4440         struct amdgpu_device *adev = ring->adev;
4441         struct vi_mqd *mqd = ring->mqd_ptr;
4442         uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
4443         uint32_t tmp;
4444
4445         mqd->header = 0xC0310800;
4446         mqd->compute_pipelinestat_enable = 0x00000001;
4447         mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
4448         mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
4449         mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
4450         mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
4451         mqd->compute_misc_reserved = 0x00000003;
4452         mqd->dynamic_cu_mask_addr_lo = lower_32_bits(ring->mqd_gpu_addr
4453                                                      + offsetof(struct vi_mqd_allocation, dynamic_cu_mask));
4454         mqd->dynamic_cu_mask_addr_hi = upper_32_bits(ring->mqd_gpu_addr
4455                                                      + offsetof(struct vi_mqd_allocation, dynamic_cu_mask));
4456         eop_base_addr = ring->eop_gpu_addr >> 8;
4457         mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
4458         mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
4459
4460         /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4461         tmp = RREG32(mmCP_HQD_EOP_CONTROL);
4462         tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
4463                         (order_base_2(GFX8_MEC_HPD_SIZE / 4) - 1));
4464
4465         mqd->cp_hqd_eop_control = tmp;
4466
4467         /* enable doorbell? */
4468         tmp = REG_SET_FIELD(RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL),
4469                             CP_HQD_PQ_DOORBELL_CONTROL,
4470                             DOORBELL_EN,
4471                             ring->use_doorbell ? 1 : 0);
4472
4473         mqd->cp_hqd_pq_doorbell_control = tmp;
4474
4475         /* set the pointer to the MQD */
4476         mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
4477         mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
4478
4479         /* set MQD vmid to 0 */
4480         tmp = RREG32(mmCP_MQD_CONTROL);
4481         tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
4482         mqd->cp_mqd_control = tmp;
4483
4484         /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4485         hqd_gpu_addr = ring->gpu_addr >> 8;
4486         mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
4487         mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4488
4489         /* set up the HQD, this is similar to CP_RB0_CNTL */
4490         tmp = RREG32(mmCP_HQD_PQ_CONTROL);
4491         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
4492                             (order_base_2(ring->ring_size / 4) - 1));
4493         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
4494                         (order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1));
4495 #ifdef __BIG_ENDIAN
4496         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
4497 #endif
4498         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
4499         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
4500         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
4501         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
4502         mqd->cp_hqd_pq_control = tmp;
4503
4504         /* set the wb address whether it's enabled or not */
4505         wb_gpu_addr = ring->rptr_gpu_addr;
4506         mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
4507         mqd->cp_hqd_pq_rptr_report_addr_hi =
4508                 upper_32_bits(wb_gpu_addr) & 0xffff;
4509
4510         /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
4511         wb_gpu_addr = ring->wptr_gpu_addr;
4512         mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
4513         mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4514
4515         tmp = 0;
4516         /* enable the doorbell if requested */
4517         if (ring->use_doorbell) {
4518                 tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
4519                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4520                                 DOORBELL_OFFSET, ring->doorbell_index);
4521
4522                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4523                                          DOORBELL_EN, 1);
4524                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4525                                          DOORBELL_SOURCE, 0);
4526                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4527                                          DOORBELL_HIT, 0);
4528         }
4529
4530         mqd->cp_hqd_pq_doorbell_control = tmp;
4531
4532         /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4533         ring->wptr = 0;
4534         mqd->cp_hqd_pq_wptr = ring->wptr;
4535         mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
4536
4537         /* set the vmid for the queue */
4538         mqd->cp_hqd_vmid = 0;
4539
4540         tmp = RREG32(mmCP_HQD_PERSISTENT_STATE);
4541         tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
4542         mqd->cp_hqd_persistent_state = tmp;
4543
4544         /* set MTYPE */
4545         tmp = RREG32(mmCP_HQD_IB_CONTROL);
4546         tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
4547         tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MTYPE, 3);
4548         mqd->cp_hqd_ib_control = tmp;
4549
4550         tmp = RREG32(mmCP_HQD_IQ_TIMER);
4551         tmp = REG_SET_FIELD(tmp, CP_HQD_IQ_TIMER, MTYPE, 3);
4552         mqd->cp_hqd_iq_timer = tmp;
4553
4554         tmp = RREG32(mmCP_HQD_CTX_SAVE_CONTROL);
4555         tmp = REG_SET_FIELD(tmp, CP_HQD_CTX_SAVE_CONTROL, MTYPE, 3);
4556         mqd->cp_hqd_ctx_save_control = tmp;
4557
4558         /* defaults */
4559         mqd->cp_hqd_eop_rptr = RREG32(mmCP_HQD_EOP_RPTR);
4560         mqd->cp_hqd_eop_wptr = RREG32(mmCP_HQD_EOP_WPTR);
4561         mqd->cp_hqd_ctx_save_base_addr_lo = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_LO);
4562         mqd->cp_hqd_ctx_save_base_addr_hi = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_HI);
4563         mqd->cp_hqd_cntl_stack_offset = RREG32(mmCP_HQD_CNTL_STACK_OFFSET);
4564         mqd->cp_hqd_cntl_stack_size = RREG32(mmCP_HQD_CNTL_STACK_SIZE);
4565         mqd->cp_hqd_wg_state_offset = RREG32(mmCP_HQD_WG_STATE_OFFSET);
4566         mqd->cp_hqd_ctx_save_size = RREG32(mmCP_HQD_CTX_SAVE_SIZE);
4567         mqd->cp_hqd_eop_done_events = RREG32(mmCP_HQD_EOP_EVENTS);
4568         mqd->cp_hqd_error = RREG32(mmCP_HQD_ERROR);
4569         mqd->cp_hqd_eop_wptr_mem = RREG32(mmCP_HQD_EOP_WPTR_MEM);
4570         mqd->cp_hqd_eop_dones = RREG32(mmCP_HQD_EOP_DONES);
4571
4572         /* set static priority for a queue/ring */
4573         gfx_v8_0_mqd_set_priority(ring, mqd);
4574         mqd->cp_hqd_quantum = RREG32(mmCP_HQD_QUANTUM);
4575
4576         /* map_queues packet doesn't need activate the queue,
4577          * so only kiq need set this field.
4578          */
4579         if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)
4580                 mqd->cp_hqd_active = 1;
4581
4582         return 0;
4583 }
4584
4585 static int gfx_v8_0_mqd_commit(struct amdgpu_device *adev,
4586                         struct vi_mqd *mqd)
4587 {
4588         uint32_t mqd_reg;
4589         uint32_t *mqd_data;
4590
4591         /* HQD registers extend from mmCP_MQD_BASE_ADDR to mmCP_HQD_ERROR */
4592         mqd_data = &mqd->cp_mqd_base_addr_lo;
4593
4594         /* disable wptr polling */
4595         WREG32_FIELD(CP_PQ_WPTR_POLL_CNTL, EN, 0);
4596
4597         /* program all HQD registers */
4598         for (mqd_reg = mmCP_HQD_VMID; mqd_reg <= mmCP_HQD_EOP_CONTROL; mqd_reg++)
4599                 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4600
4601         /* Tonga errata: EOP RPTR/WPTR should be left unmodified.
4602          * This is safe since EOP RPTR==WPTR for any inactive HQD
4603          * on ASICs that do not support context-save.
4604          * EOP writes/reads can start anywhere in the ring.
4605          */
4606         if (adev->asic_type != CHIP_TONGA) {
4607                 WREG32(mmCP_HQD_EOP_RPTR, mqd->cp_hqd_eop_rptr);
4608                 WREG32(mmCP_HQD_EOP_WPTR, mqd->cp_hqd_eop_wptr);
4609                 WREG32(mmCP_HQD_EOP_WPTR_MEM, mqd->cp_hqd_eop_wptr_mem);
4610         }
4611
4612         for (mqd_reg = mmCP_HQD_EOP_EVENTS; mqd_reg <= mmCP_HQD_ERROR; mqd_reg++)
4613                 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4614
4615         /* activate the HQD */
4616         for (mqd_reg = mmCP_MQD_BASE_ADDR; mqd_reg <= mmCP_HQD_ACTIVE; mqd_reg++)
4617                 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4618
4619         return 0;
4620 }
4621
4622 static int gfx_v8_0_kiq_init_queue(struct amdgpu_ring *ring)
4623 {
4624         struct amdgpu_device *adev = ring->adev;
4625         struct vi_mqd *mqd = ring->mqd_ptr;
4626         int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS;
4627
4628         gfx_v8_0_kiq_setting(ring);
4629
4630         if (amdgpu_in_reset(adev)) { /* for GPU_RESET case */
4631                 /* reset MQD to a clean status */
4632                 if (adev->gfx.mec.mqd_backup[mqd_idx])
4633                         memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation));
4634
4635                 /* reset ring buffer */
4636                 ring->wptr = 0;
4637                 amdgpu_ring_clear_ring(ring);
4638                 mutex_lock(&adev->srbm_mutex);
4639                 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4640                 gfx_v8_0_mqd_commit(adev, mqd);
4641                 vi_srbm_select(adev, 0, 0, 0, 0);
4642                 mutex_unlock(&adev->srbm_mutex);
4643         } else {
4644                 memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation));
4645                 ((struct vi_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
4646                 ((struct vi_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
4647                 if (amdgpu_sriov_vf(adev) && adev->in_suspend)
4648                         amdgpu_ring_clear_ring(ring);
4649                 mutex_lock(&adev->srbm_mutex);
4650                 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4651                 gfx_v8_0_mqd_init(ring);
4652                 gfx_v8_0_mqd_commit(adev, mqd);
4653                 vi_srbm_select(adev, 0, 0, 0, 0);
4654                 mutex_unlock(&adev->srbm_mutex);
4655
4656                 if (adev->gfx.mec.mqd_backup[mqd_idx])
4657                         memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation));
4658         }
4659
4660         return 0;
4661 }
4662
4663 static int gfx_v8_0_kcq_init_queue(struct amdgpu_ring *ring)
4664 {
4665         struct amdgpu_device *adev = ring->adev;
4666         struct vi_mqd *mqd = ring->mqd_ptr;
4667         int mqd_idx = ring - &adev->gfx.compute_ring[0];
4668
4669         if (!amdgpu_in_reset(adev) && !adev->in_suspend) {
4670                 memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation));
4671                 ((struct vi_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
4672                 ((struct vi_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
4673                 mutex_lock(&adev->srbm_mutex);
4674                 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4675                 gfx_v8_0_mqd_init(ring);
4676                 vi_srbm_select(adev, 0, 0, 0, 0);
4677                 mutex_unlock(&adev->srbm_mutex);
4678
4679                 if (adev->gfx.mec.mqd_backup[mqd_idx])
4680                         memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation));
4681         } else if (amdgpu_in_reset(adev)) { /* for GPU_RESET case */
4682                 /* reset MQD to a clean status */
4683                 if (adev->gfx.mec.mqd_backup[mqd_idx])
4684                         memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation));
4685                 /* reset ring buffer */
4686                 ring->wptr = 0;
4687                 amdgpu_ring_clear_ring(ring);
4688         } else {
4689                 amdgpu_ring_clear_ring(ring);
4690         }
4691         return 0;
4692 }
4693
4694 static void gfx_v8_0_set_mec_doorbell_range(struct amdgpu_device *adev)
4695 {
4696         if (adev->asic_type > CHIP_TONGA) {
4697                 WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER, adev->doorbell_index.kiq << 2);
4698                 WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER, adev->doorbell_index.mec_ring7 << 2);
4699         }
4700         /* enable doorbells */
4701         WREG32_FIELD(CP_PQ_STATUS, DOORBELL_ENABLE, 1);
4702 }
4703
4704 static int gfx_v8_0_kiq_resume(struct amdgpu_device *adev)
4705 {
4706         struct amdgpu_ring *ring;
4707         int r;
4708
4709         ring = &adev->gfx.kiq.ring;
4710
4711         r = amdgpu_bo_reserve(ring->mqd_obj, false);
4712         if (unlikely(r != 0))
4713                 return r;
4714
4715         r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr);
4716         if (unlikely(r != 0))
4717                 return r;
4718
4719         gfx_v8_0_kiq_init_queue(ring);
4720         amdgpu_bo_kunmap(ring->mqd_obj);
4721         ring->mqd_ptr = NULL;
4722         amdgpu_bo_unreserve(ring->mqd_obj);
4723         ring->sched.ready = true;
4724         return 0;
4725 }
4726
4727 static int gfx_v8_0_kcq_resume(struct amdgpu_device *adev)
4728 {
4729         struct amdgpu_ring *ring = NULL;
4730         int r = 0, i;
4731
4732         gfx_v8_0_cp_compute_enable(adev, true);
4733
4734         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4735                 ring = &adev->gfx.compute_ring[i];
4736
4737                 r = amdgpu_bo_reserve(ring->mqd_obj, false);
4738                 if (unlikely(r != 0))
4739                         goto done;
4740                 r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr);
4741                 if (!r) {
4742                         r = gfx_v8_0_kcq_init_queue(ring);
4743                         amdgpu_bo_kunmap(ring->mqd_obj);
4744                         ring->mqd_ptr = NULL;
4745                 }
4746                 amdgpu_bo_unreserve(ring->mqd_obj);
4747                 if (r)
4748                         goto done;
4749         }
4750
4751         gfx_v8_0_set_mec_doorbell_range(adev);
4752
4753         r = gfx_v8_0_kiq_kcq_enable(adev);
4754         if (r)
4755                 goto done;
4756
4757 done:
4758         return r;
4759 }
4760
4761 static int gfx_v8_0_cp_test_all_rings(struct amdgpu_device *adev)
4762 {
4763         int r, i;
4764         struct amdgpu_ring *ring;
4765
4766         /* collect all the ring_tests here, gfx, kiq, compute */
4767         ring = &adev->gfx.gfx_ring[0];
4768         r = amdgpu_ring_test_helper(ring);
4769         if (r)
4770                 return r;
4771
4772         ring = &adev->gfx.kiq.ring;
4773         r = amdgpu_ring_test_helper(ring);
4774         if (r)
4775                 return r;
4776
4777         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4778                 ring = &adev->gfx.compute_ring[i];
4779                 amdgpu_ring_test_helper(ring);
4780         }
4781
4782         return 0;
4783 }
4784
4785 static int gfx_v8_0_cp_resume(struct amdgpu_device *adev)
4786 {
4787         int r;
4788
4789         if (!(adev->flags & AMD_IS_APU))
4790                 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
4791
4792         r = gfx_v8_0_kiq_resume(adev);
4793         if (r)
4794                 return r;
4795
4796         r = gfx_v8_0_cp_gfx_resume(adev);
4797         if (r)
4798                 return r;
4799
4800         r = gfx_v8_0_kcq_resume(adev);
4801         if (r)
4802                 return r;
4803
4804         r = gfx_v8_0_cp_test_all_rings(adev);
4805         if (r)
4806                 return r;
4807
4808         gfx_v8_0_enable_gui_idle_interrupt(adev, true);
4809
4810         return 0;
4811 }
4812
4813 static void gfx_v8_0_cp_enable(struct amdgpu_device *adev, bool enable)
4814 {
4815         gfx_v8_0_cp_gfx_enable(adev, enable);
4816         gfx_v8_0_cp_compute_enable(adev, enable);
4817 }
4818
4819 static int gfx_v8_0_hw_init(void *handle)
4820 {
4821         int r;
4822         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4823
4824         gfx_v8_0_init_golden_registers(adev);
4825         gfx_v8_0_constants_init(adev);
4826
4827         r = adev->gfx.rlc.funcs->resume(adev);
4828         if (r)
4829                 return r;
4830
4831         r = gfx_v8_0_cp_resume(adev);
4832
4833         return r;
4834 }
4835
4836 static int gfx_v8_0_kcq_disable(struct amdgpu_device *adev)
4837 {
4838         int r, i;
4839         struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
4840
4841         r = amdgpu_ring_alloc(kiq_ring, 6 * adev->gfx.num_compute_rings);
4842         if (r)
4843                 DRM_ERROR("Failed to lock KIQ (%d).\n", r);
4844
4845         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4846                 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4847
4848                 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
4849                 amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
4850                                                 PACKET3_UNMAP_QUEUES_ACTION(1) | /* RESET_QUEUES */
4851                                                 PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) |
4852                                                 PACKET3_UNMAP_QUEUES_ENGINE_SEL(0) |
4853                                                 PACKET3_UNMAP_QUEUES_NUM_QUEUES(1));
4854                 amdgpu_ring_write(kiq_ring, PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
4855                 amdgpu_ring_write(kiq_ring, 0);
4856                 amdgpu_ring_write(kiq_ring, 0);
4857                 amdgpu_ring_write(kiq_ring, 0);
4858         }
4859         r = amdgpu_ring_test_helper(kiq_ring);
4860         if (r)
4861                 DRM_ERROR("KCQ disable failed\n");
4862
4863         return r;
4864 }
4865
4866 static bool gfx_v8_0_is_idle(void *handle)
4867 {
4868         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4869
4870         if (REG_GET_FIELD(RREG32(mmGRBM_STATUS), GRBM_STATUS, GUI_ACTIVE)
4871                 || RREG32(mmGRBM_STATUS2) != 0x8)
4872                 return false;
4873         else
4874                 return true;
4875 }
4876
4877 static bool gfx_v8_0_rlc_is_idle(void *handle)
4878 {
4879         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4880
4881         if (RREG32(mmGRBM_STATUS2) != 0x8)
4882                 return false;
4883         else
4884                 return true;
4885 }
4886
4887 static int gfx_v8_0_wait_for_rlc_idle(void *handle)
4888 {
4889         unsigned int i;
4890         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4891
4892         for (i = 0; i < adev->usec_timeout; i++) {
4893                 if (gfx_v8_0_rlc_is_idle(handle))
4894                         return 0;
4895
4896                 udelay(1);
4897         }
4898         return -ETIMEDOUT;
4899 }
4900
4901 static int gfx_v8_0_wait_for_idle(void *handle)
4902 {
4903         unsigned int i;
4904         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4905
4906         for (i = 0; i < adev->usec_timeout; i++) {
4907                 if (gfx_v8_0_is_idle(handle))
4908                         return 0;
4909
4910                 udelay(1);
4911         }
4912         return -ETIMEDOUT;
4913 }
4914
4915 static int gfx_v8_0_hw_fini(void *handle)
4916 {
4917         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4918
4919         amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
4920         amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
4921
4922         amdgpu_irq_put(adev, &adev->gfx.cp_ecc_error_irq, 0);
4923
4924         amdgpu_irq_put(adev, &adev->gfx.sq_irq, 0);
4925
4926         /* disable KCQ to avoid CPC touch memory not valid anymore */
4927         gfx_v8_0_kcq_disable(adev);
4928
4929         if (amdgpu_sriov_vf(adev)) {
4930                 pr_debug("For SRIOV client, shouldn't do anything.\n");
4931                 return 0;
4932         }
4933         amdgpu_gfx_rlc_enter_safe_mode(adev);
4934         if (!gfx_v8_0_wait_for_idle(adev))
4935                 gfx_v8_0_cp_enable(adev, false);
4936         else
4937                 pr_err("cp is busy, skip halt cp\n");
4938         if (!gfx_v8_0_wait_for_rlc_idle(adev))
4939                 adev->gfx.rlc.funcs->stop(adev);
4940         else
4941                 pr_err("rlc is busy, skip halt rlc\n");
4942         amdgpu_gfx_rlc_exit_safe_mode(adev);
4943
4944         return 0;
4945 }
4946
4947 static int gfx_v8_0_suspend(void *handle)
4948 {
4949         return gfx_v8_0_hw_fini(handle);
4950 }
4951
4952 static int gfx_v8_0_resume(void *handle)
4953 {
4954         return gfx_v8_0_hw_init(handle);
4955 }
4956
4957 static bool gfx_v8_0_check_soft_reset(void *handle)
4958 {
4959         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4960         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
4961         u32 tmp;
4962
4963         /* GRBM_STATUS */
4964         tmp = RREG32(mmGRBM_STATUS);
4965         if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
4966                    GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
4967                    GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
4968                    GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
4969                    GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
4970                    GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK |
4971                    GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
4972                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4973                                                 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
4974                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4975                                                 GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
4976                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
4977                                                 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
4978         }
4979
4980         /* GRBM_STATUS2 */
4981         tmp = RREG32(mmGRBM_STATUS2);
4982         if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
4983                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4984                                                 GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
4985
4986         if (REG_GET_FIELD(tmp, GRBM_STATUS2, CPF_BUSY) ||
4987             REG_GET_FIELD(tmp, GRBM_STATUS2, CPC_BUSY) ||
4988             REG_GET_FIELD(tmp, GRBM_STATUS2, CPG_BUSY)) {
4989                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
4990                                                 SOFT_RESET_CPF, 1);
4991                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
4992                                                 SOFT_RESET_CPC, 1);
4993                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
4994                                                 SOFT_RESET_CPG, 1);
4995                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET,
4996                                                 SOFT_RESET_GRBM, 1);
4997         }
4998
4999         /* SRBM_STATUS */
5000         tmp = RREG32(mmSRBM_STATUS);
5001         if (REG_GET_FIELD(tmp, SRBM_STATUS, GRBM_RQ_PENDING))
5002                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5003                                                 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
5004         if (REG_GET_FIELD(tmp, SRBM_STATUS, SEM_BUSY))
5005                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5006                                                 SRBM_SOFT_RESET, SOFT_RESET_SEM, 1);
5007
5008         if (grbm_soft_reset || srbm_soft_reset) {
5009                 adev->gfx.grbm_soft_reset = grbm_soft_reset;
5010                 adev->gfx.srbm_soft_reset = srbm_soft_reset;
5011                 return true;
5012         } else {
5013                 adev->gfx.grbm_soft_reset = 0;
5014                 adev->gfx.srbm_soft_reset = 0;
5015                 return false;
5016         }
5017 }
5018
5019 static int gfx_v8_0_pre_soft_reset(void *handle)
5020 {
5021         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5022         u32 grbm_soft_reset = 0;
5023
5024         if ((!adev->gfx.grbm_soft_reset) &&
5025             (!adev->gfx.srbm_soft_reset))
5026                 return 0;
5027
5028         grbm_soft_reset = adev->gfx.grbm_soft_reset;
5029
5030         /* stop the rlc */
5031         adev->gfx.rlc.funcs->stop(adev);
5032
5033         if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5034             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
5035                 /* Disable GFX parsing/prefetching */
5036                 gfx_v8_0_cp_gfx_enable(adev, false);
5037
5038         if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5039             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
5040             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
5041             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
5042                 int i;
5043
5044                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5045                         struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5046
5047                         mutex_lock(&adev->srbm_mutex);
5048                         vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5049                         gfx_v8_0_deactivate_hqd(adev, 2);
5050                         vi_srbm_select(adev, 0, 0, 0, 0);
5051                         mutex_unlock(&adev->srbm_mutex);
5052                 }
5053                 /* Disable MEC parsing/prefetching */
5054                 gfx_v8_0_cp_compute_enable(adev, false);
5055         }
5056
5057         return 0;
5058 }
5059
5060 static int gfx_v8_0_soft_reset(void *handle)
5061 {
5062         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5063         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5064         u32 tmp;
5065
5066         if ((!adev->gfx.grbm_soft_reset) &&
5067             (!adev->gfx.srbm_soft_reset))
5068                 return 0;
5069
5070         grbm_soft_reset = adev->gfx.grbm_soft_reset;
5071         srbm_soft_reset = adev->gfx.srbm_soft_reset;
5072
5073         if (grbm_soft_reset || srbm_soft_reset) {
5074                 tmp = RREG32(mmGMCON_DEBUG);
5075                 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 1);
5076                 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 1);
5077                 WREG32(mmGMCON_DEBUG, tmp);
5078                 udelay(50);
5079         }
5080
5081         if (grbm_soft_reset) {
5082                 tmp = RREG32(mmGRBM_SOFT_RESET);
5083                 tmp |= grbm_soft_reset;
5084                 dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
5085                 WREG32(mmGRBM_SOFT_RESET, tmp);
5086                 tmp = RREG32(mmGRBM_SOFT_RESET);
5087
5088                 udelay(50);
5089
5090                 tmp &= ~grbm_soft_reset;
5091                 WREG32(mmGRBM_SOFT_RESET, tmp);
5092                 tmp = RREG32(mmGRBM_SOFT_RESET);
5093         }
5094
5095         if (srbm_soft_reset) {
5096                 tmp = RREG32(mmSRBM_SOFT_RESET);
5097                 tmp |= srbm_soft_reset;
5098                 dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
5099                 WREG32(mmSRBM_SOFT_RESET, tmp);
5100                 tmp = RREG32(mmSRBM_SOFT_RESET);
5101
5102                 udelay(50);
5103
5104                 tmp &= ~srbm_soft_reset;
5105                 WREG32(mmSRBM_SOFT_RESET, tmp);
5106                 tmp = RREG32(mmSRBM_SOFT_RESET);
5107         }
5108
5109         if (grbm_soft_reset || srbm_soft_reset) {
5110                 tmp = RREG32(mmGMCON_DEBUG);
5111                 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 0);
5112                 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 0);
5113                 WREG32(mmGMCON_DEBUG, tmp);
5114         }
5115
5116         /* Wait a little for things to settle down */
5117         udelay(50);
5118
5119         return 0;
5120 }
5121
5122 static int gfx_v8_0_post_soft_reset(void *handle)
5123 {
5124         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5125         u32 grbm_soft_reset = 0;
5126
5127         if ((!adev->gfx.grbm_soft_reset) &&
5128             (!adev->gfx.srbm_soft_reset))
5129                 return 0;
5130
5131         grbm_soft_reset = adev->gfx.grbm_soft_reset;
5132
5133         if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5134             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
5135             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
5136             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
5137                 int i;
5138
5139                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5140                         struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5141
5142                         mutex_lock(&adev->srbm_mutex);
5143                         vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5144                         gfx_v8_0_deactivate_hqd(adev, 2);
5145                         vi_srbm_select(adev, 0, 0, 0, 0);
5146                         mutex_unlock(&adev->srbm_mutex);
5147                 }
5148                 gfx_v8_0_kiq_resume(adev);
5149                 gfx_v8_0_kcq_resume(adev);
5150         }
5151
5152         if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5153             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
5154                 gfx_v8_0_cp_gfx_resume(adev);
5155
5156         gfx_v8_0_cp_test_all_rings(adev);
5157
5158         adev->gfx.rlc.funcs->start(adev);
5159
5160         return 0;
5161 }
5162
5163 /**
5164  * gfx_v8_0_get_gpu_clock_counter - return GPU clock counter snapshot
5165  *
5166  * @adev: amdgpu_device pointer
5167  *
5168  * Fetches a GPU clock counter snapshot.
5169  * Returns the 64 bit clock counter snapshot.
5170  */
5171 static uint64_t gfx_v8_0_get_gpu_clock_counter(struct amdgpu_device *adev)
5172 {
5173         uint64_t clock;
5174
5175         mutex_lock(&adev->gfx.gpu_clock_mutex);
5176         WREG32(mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
5177         clock = (uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_LSB) |
5178                 ((uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
5179         mutex_unlock(&adev->gfx.gpu_clock_mutex);
5180         return clock;
5181 }
5182
5183 static void gfx_v8_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
5184                                           uint32_t vmid,
5185                                           uint32_t gds_base, uint32_t gds_size,
5186                                           uint32_t gws_base, uint32_t gws_size,
5187                                           uint32_t oa_base, uint32_t oa_size)
5188 {
5189         /* GDS Base */
5190         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5191         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5192                                 WRITE_DATA_DST_SEL(0)));
5193         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_base);
5194         amdgpu_ring_write(ring, 0);
5195         amdgpu_ring_write(ring, gds_base);
5196
5197         /* GDS Size */
5198         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5199         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5200                                 WRITE_DATA_DST_SEL(0)));
5201         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_size);
5202         amdgpu_ring_write(ring, 0);
5203         amdgpu_ring_write(ring, gds_size);
5204
5205         /* GWS */
5206         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5207         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5208                                 WRITE_DATA_DST_SEL(0)));
5209         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].gws);
5210         amdgpu_ring_write(ring, 0);
5211         amdgpu_ring_write(ring, gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
5212
5213         /* OA */
5214         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5215         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5216                                 WRITE_DATA_DST_SEL(0)));
5217         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].oa);
5218         amdgpu_ring_write(ring, 0);
5219         amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << oa_base));
5220 }
5221
5222 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
5223 {
5224         WREG32(mmSQ_IND_INDEX,
5225                 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
5226                 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
5227                 (address << SQ_IND_INDEX__INDEX__SHIFT) |
5228                 (SQ_IND_INDEX__FORCE_READ_MASK));
5229         return RREG32(mmSQ_IND_DATA);
5230 }
5231
5232 static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
5233                            uint32_t wave, uint32_t thread,
5234                            uint32_t regno, uint32_t num, uint32_t *out)
5235 {
5236         WREG32(mmSQ_IND_INDEX,
5237                 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
5238                 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
5239                 (regno << SQ_IND_INDEX__INDEX__SHIFT) |
5240                 (thread << SQ_IND_INDEX__THREAD_ID__SHIFT) |
5241                 (SQ_IND_INDEX__FORCE_READ_MASK) |
5242                 (SQ_IND_INDEX__AUTO_INCR_MASK));
5243         while (num--)
5244                 *(out++) = RREG32(mmSQ_IND_DATA);
5245 }
5246
5247 static void gfx_v8_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
5248 {
5249         /* type 0 wave data */
5250         dst[(*no_fields)++] = 0;
5251         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS);
5252         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO);
5253         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI);
5254         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO);
5255         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI);
5256         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID);
5257         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0);
5258         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1);
5259         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC);
5260         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC);
5261         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS);
5262         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS);
5263         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_LO);
5264         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_HI);
5265         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_LO);
5266         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_HI);
5267         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0);
5268         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0);
5269         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_MODE);
5270 }
5271
5272 static void gfx_v8_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
5273                                      uint32_t wave, uint32_t start,
5274                                      uint32_t size, uint32_t *dst)
5275 {
5276         wave_read_regs(
5277                 adev, simd, wave, 0,
5278                 start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
5279 }
5280
5281
5282 static const struct amdgpu_gfx_funcs gfx_v8_0_gfx_funcs = {
5283         .get_gpu_clock_counter = &gfx_v8_0_get_gpu_clock_counter,
5284         .select_se_sh = &gfx_v8_0_select_se_sh,
5285         .read_wave_data = &gfx_v8_0_read_wave_data,
5286         .read_wave_sgprs = &gfx_v8_0_read_wave_sgprs,
5287         .select_me_pipe_q = &gfx_v8_0_select_me_pipe_q
5288 };
5289
5290 static int gfx_v8_0_early_init(void *handle)
5291 {
5292         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5293
5294         adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS;
5295         adev->gfx.num_compute_rings = min(amdgpu_gfx_get_num_kcq(adev),
5296                                           AMDGPU_MAX_COMPUTE_RINGS);
5297         adev->gfx.funcs = &gfx_v8_0_gfx_funcs;
5298         gfx_v8_0_set_ring_funcs(adev);
5299         gfx_v8_0_set_irq_funcs(adev);
5300         gfx_v8_0_set_gds_init(adev);
5301         gfx_v8_0_set_rlc_funcs(adev);
5302
5303         return 0;
5304 }
5305
5306 static int gfx_v8_0_late_init(void *handle)
5307 {
5308         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5309         int r;
5310
5311         r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
5312         if (r)
5313                 return r;
5314
5315         r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
5316         if (r)
5317                 return r;
5318
5319         /* requires IBs so do in late init after IB pool is initialized */
5320         r = gfx_v8_0_do_edc_gpr_workarounds(adev);
5321         if (r)
5322                 return r;
5323
5324         r = amdgpu_irq_get(adev, &adev->gfx.cp_ecc_error_irq, 0);
5325         if (r) {
5326                 DRM_ERROR("amdgpu_irq_get() failed to get IRQ for EDC, r: %d.\n", r);
5327                 return r;
5328         }
5329
5330         r = amdgpu_irq_get(adev, &adev->gfx.sq_irq, 0);
5331         if (r) {
5332                 DRM_ERROR(
5333                         "amdgpu_irq_get() failed to get IRQ for SQ, r: %d.\n",
5334                         r);
5335                 return r;
5336         }
5337
5338         return 0;
5339 }
5340
5341 static void gfx_v8_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
5342                                                        bool enable)
5343 {
5344         if ((adev->asic_type == CHIP_POLARIS11) ||
5345             (adev->asic_type == CHIP_POLARIS12) ||
5346             (adev->asic_type == CHIP_VEGAM))
5347                 /* Send msg to SMU via Powerplay */
5348                 amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, enable);
5349
5350         WREG32_FIELD(RLC_PG_CNTL, STATIC_PER_CU_PG_ENABLE, enable ? 1 : 0);
5351 }
5352
5353 static void gfx_v8_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
5354                                                         bool enable)
5355 {
5356         WREG32_FIELD(RLC_PG_CNTL, DYN_PER_CU_PG_ENABLE, enable ? 1 : 0);
5357 }
5358
5359 static void polaris11_enable_gfx_quick_mg_power_gating(struct amdgpu_device *adev,
5360                 bool enable)
5361 {
5362         WREG32_FIELD(RLC_PG_CNTL, QUICK_PG_ENABLE, enable ? 1 : 0);
5363 }
5364
5365 static void cz_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
5366                                           bool enable)
5367 {
5368         WREG32_FIELD(RLC_PG_CNTL, GFX_POWER_GATING_ENABLE, enable ? 1 : 0);
5369 }
5370
5371 static void cz_enable_gfx_pipeline_power_gating(struct amdgpu_device *adev,
5372                                                 bool enable)
5373 {
5374         WREG32_FIELD(RLC_PG_CNTL, GFX_PIPELINE_PG_ENABLE, enable ? 1 : 0);
5375
5376         /* Read any GFX register to wake up GFX. */
5377         if (!enable)
5378                 RREG32(mmDB_RENDER_CONTROL);
5379 }
5380
5381 static void cz_update_gfx_cg_power_gating(struct amdgpu_device *adev,
5382                                           bool enable)
5383 {
5384         if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
5385                 cz_enable_gfx_cg_power_gating(adev, true);
5386                 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
5387                         cz_enable_gfx_pipeline_power_gating(adev, true);
5388         } else {
5389                 cz_enable_gfx_cg_power_gating(adev, false);
5390                 cz_enable_gfx_pipeline_power_gating(adev, false);
5391         }
5392 }
5393
5394 static int gfx_v8_0_set_powergating_state(void *handle,
5395                                           enum amd_powergating_state state)
5396 {
5397         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5398         bool enable = (state == AMD_PG_STATE_GATE);
5399
5400         if (amdgpu_sriov_vf(adev))
5401                 return 0;
5402
5403         if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_SMG |
5404                                 AMD_PG_SUPPORT_RLC_SMU_HS |
5405                                 AMD_PG_SUPPORT_CP |
5406                                 AMD_PG_SUPPORT_GFX_DMG))
5407                 amdgpu_gfx_rlc_enter_safe_mode(adev);
5408         switch (adev->asic_type) {
5409         case CHIP_CARRIZO:
5410         case CHIP_STONEY:
5411
5412                 if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
5413                         cz_enable_sck_slow_down_on_power_up(adev, true);
5414                         cz_enable_sck_slow_down_on_power_down(adev, true);
5415                 } else {
5416                         cz_enable_sck_slow_down_on_power_up(adev, false);
5417                         cz_enable_sck_slow_down_on_power_down(adev, false);
5418                 }
5419                 if (adev->pg_flags & AMD_PG_SUPPORT_CP)
5420                         cz_enable_cp_power_gating(adev, true);
5421                 else
5422                         cz_enable_cp_power_gating(adev, false);
5423
5424                 cz_update_gfx_cg_power_gating(adev, enable);
5425
5426                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5427                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5428                 else
5429                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5430
5431                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5432                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5433                 else
5434                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5435                 break;
5436         case CHIP_POLARIS11:
5437         case CHIP_POLARIS12:
5438         case CHIP_VEGAM:
5439                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5440                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5441                 else
5442                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5443
5444                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5445                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5446                 else
5447                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5448
5449                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_QUICK_MG) && enable)
5450                         polaris11_enable_gfx_quick_mg_power_gating(adev, true);
5451                 else
5452                         polaris11_enable_gfx_quick_mg_power_gating(adev, false);
5453                 break;
5454         default:
5455                 break;
5456         }
5457         if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_SMG |
5458                                 AMD_PG_SUPPORT_RLC_SMU_HS |
5459                                 AMD_PG_SUPPORT_CP |
5460                                 AMD_PG_SUPPORT_GFX_DMG))
5461                 amdgpu_gfx_rlc_exit_safe_mode(adev);
5462         return 0;
5463 }
5464
5465 static void gfx_v8_0_get_clockgating_state(void *handle, u64 *flags)
5466 {
5467         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5468         int data;
5469
5470         if (amdgpu_sriov_vf(adev))
5471                 *flags = 0;
5472
5473         /* AMD_CG_SUPPORT_GFX_MGCG */
5474         data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5475         if (!(data & RLC_CGTT_MGCG_OVERRIDE__CPF_MASK))
5476                 *flags |= AMD_CG_SUPPORT_GFX_MGCG;
5477
5478         /* AMD_CG_SUPPORT_GFX_CGLG */
5479         data = RREG32(mmRLC_CGCG_CGLS_CTRL);
5480         if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
5481                 *flags |= AMD_CG_SUPPORT_GFX_CGCG;
5482
5483         /* AMD_CG_SUPPORT_GFX_CGLS */
5484         if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
5485                 *flags |= AMD_CG_SUPPORT_GFX_CGLS;
5486
5487         /* AMD_CG_SUPPORT_GFX_CGTS */
5488         data = RREG32(mmCGTS_SM_CTRL_REG);
5489         if (!(data & CGTS_SM_CTRL_REG__OVERRIDE_MASK))
5490                 *flags |= AMD_CG_SUPPORT_GFX_CGTS;
5491
5492         /* AMD_CG_SUPPORT_GFX_CGTS_LS */
5493         if (!(data & CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK))
5494                 *flags |= AMD_CG_SUPPORT_GFX_CGTS_LS;
5495
5496         /* AMD_CG_SUPPORT_GFX_RLC_LS */
5497         data = RREG32(mmRLC_MEM_SLP_CNTL);
5498         if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK)
5499                 *flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS;
5500
5501         /* AMD_CG_SUPPORT_GFX_CP_LS */
5502         data = RREG32(mmCP_MEM_SLP_CNTL);
5503         if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK)
5504                 *flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS;
5505 }
5506
5507 static void gfx_v8_0_send_serdes_cmd(struct amdgpu_device *adev,
5508                                      uint32_t reg_addr, uint32_t cmd)
5509 {
5510         uint32_t data;
5511
5512         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
5513
5514         WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5515         WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5516
5517         data = RREG32(mmRLC_SERDES_WR_CTRL);
5518         if (adev->asic_type == CHIP_STONEY)
5519                 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5520                           RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5521                           RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5522                           RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5523                           RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5524                           RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5525                           RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5526                           RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5527                           RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5528         else
5529                 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5530                           RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5531                           RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5532                           RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5533                           RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5534                           RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5535                           RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5536                           RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5537                           RLC_SERDES_WR_CTRL__BPM_DATA_MASK |
5538                           RLC_SERDES_WR_CTRL__REG_ADDR_MASK |
5539                           RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5540         data |= (RLC_SERDES_WR_CTRL__RSVD_BPM_ADDR_MASK |
5541                  (cmd << RLC_SERDES_WR_CTRL__BPM_DATA__SHIFT) |
5542                  (reg_addr << RLC_SERDES_WR_CTRL__REG_ADDR__SHIFT) |
5543                  (0xff << RLC_SERDES_WR_CTRL__BPM_ADDR__SHIFT));
5544
5545         WREG32(mmRLC_SERDES_WR_CTRL, data);
5546 }
5547
5548 #define MSG_ENTER_RLC_SAFE_MODE     1
5549 #define MSG_EXIT_RLC_SAFE_MODE      0
5550 #define RLC_GPR_REG2__REQ_MASK 0x00000001
5551 #define RLC_GPR_REG2__REQ__SHIFT 0
5552 #define RLC_GPR_REG2__MESSAGE__SHIFT 0x00000001
5553 #define RLC_GPR_REG2__MESSAGE_MASK 0x0000001e
5554
5555 static bool gfx_v8_0_is_rlc_enabled(struct amdgpu_device *adev)
5556 {
5557         uint32_t rlc_setting;
5558
5559         rlc_setting = RREG32(mmRLC_CNTL);
5560         if (!(rlc_setting & RLC_CNTL__RLC_ENABLE_F32_MASK))
5561                 return false;
5562
5563         return true;
5564 }
5565
5566 static void gfx_v8_0_set_safe_mode(struct amdgpu_device *adev)
5567 {
5568         uint32_t data;
5569         unsigned i;
5570         data = RREG32(mmRLC_CNTL);
5571         data |= RLC_SAFE_MODE__CMD_MASK;
5572         data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5573         data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
5574         WREG32(mmRLC_SAFE_MODE, data);
5575
5576         /* wait for RLC_SAFE_MODE */
5577         for (i = 0; i < adev->usec_timeout; i++) {
5578                 if ((RREG32(mmRLC_GPM_STAT) &
5579                      (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5580                       RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) ==
5581                     (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5582                      RLC_GPM_STAT__GFX_POWER_STATUS_MASK))
5583                         break;
5584                 udelay(1);
5585         }
5586         for (i = 0; i < adev->usec_timeout; i++) {
5587                 if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
5588                         break;
5589                 udelay(1);
5590         }
5591 }
5592
5593 static void gfx_v8_0_unset_safe_mode(struct amdgpu_device *adev)
5594 {
5595         uint32_t data;
5596         unsigned i;
5597
5598         data = RREG32(mmRLC_CNTL);
5599         data |= RLC_SAFE_MODE__CMD_MASK;
5600         data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5601         WREG32(mmRLC_SAFE_MODE, data);
5602
5603         for (i = 0; i < adev->usec_timeout; i++) {
5604                 if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
5605                         break;
5606                 udelay(1);
5607         }
5608 }
5609
5610 static void gfx_v8_0_update_spm_vmid(struct amdgpu_device *adev, unsigned vmid)
5611 {
5612         u32 data;
5613
5614         amdgpu_gfx_off_ctrl(adev, false);
5615
5616         if (amdgpu_sriov_is_pp_one_vf(adev))
5617                 data = RREG32_NO_KIQ(mmRLC_SPM_VMID);
5618         else
5619                 data = RREG32(mmRLC_SPM_VMID);
5620
5621         data &= ~RLC_SPM_VMID__RLC_SPM_VMID_MASK;
5622         data |= (vmid & RLC_SPM_VMID__RLC_SPM_VMID_MASK) << RLC_SPM_VMID__RLC_SPM_VMID__SHIFT;
5623
5624         if (amdgpu_sriov_is_pp_one_vf(adev))
5625                 WREG32_NO_KIQ(mmRLC_SPM_VMID, data);
5626         else
5627                 WREG32(mmRLC_SPM_VMID, data);
5628
5629         amdgpu_gfx_off_ctrl(adev, true);
5630 }
5631
5632 static const struct amdgpu_rlc_funcs iceland_rlc_funcs = {
5633         .is_rlc_enabled = gfx_v8_0_is_rlc_enabled,
5634         .set_safe_mode = gfx_v8_0_set_safe_mode,
5635         .unset_safe_mode = gfx_v8_0_unset_safe_mode,
5636         .init = gfx_v8_0_rlc_init,
5637         .get_csb_size = gfx_v8_0_get_csb_size,
5638         .get_csb_buffer = gfx_v8_0_get_csb_buffer,
5639         .get_cp_table_num = gfx_v8_0_cp_jump_table_num,
5640         .resume = gfx_v8_0_rlc_resume,
5641         .stop = gfx_v8_0_rlc_stop,
5642         .reset = gfx_v8_0_rlc_reset,
5643         .start = gfx_v8_0_rlc_start,
5644         .update_spm_vmid = gfx_v8_0_update_spm_vmid
5645 };
5646
5647 static void gfx_v8_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
5648                                                       bool enable)
5649 {
5650         uint32_t temp, data;
5651
5652         amdgpu_gfx_rlc_enter_safe_mode(adev);
5653
5654         /* It is disabled by HW by default */
5655         if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
5656                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5657                         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS)
5658                                 /* 1 - RLC memory Light sleep */
5659                                 WREG32_FIELD(RLC_MEM_SLP_CNTL, RLC_MEM_LS_EN, 1);
5660
5661                         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS)
5662                                 WREG32_FIELD(CP_MEM_SLP_CNTL, CP_MEM_LS_EN, 1);
5663                 }
5664
5665                 /* 3 - RLC_CGTT_MGCG_OVERRIDE */
5666                 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5667                 if (adev->flags & AMD_IS_APU)
5668                         data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5669                                   RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5670                                   RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK);
5671                 else
5672                         data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5673                                   RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5674                                   RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5675                                   RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5676
5677                 if (temp != data)
5678                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5679
5680                 /* 4 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5681                 gfx_v8_0_wait_for_rlc_serdes(adev);
5682
5683                 /* 5 - clear mgcg override */
5684                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5685
5686                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS) {
5687                         /* 6 - Enable CGTS(Tree Shade) MGCG /MGLS */
5688                         temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5689                         data &= ~(CGTS_SM_CTRL_REG__SM_MODE_MASK);
5690                         data |= (0x2 << CGTS_SM_CTRL_REG__SM_MODE__SHIFT);
5691                         data |= CGTS_SM_CTRL_REG__SM_MODE_ENABLE_MASK;
5692                         data &= ~CGTS_SM_CTRL_REG__OVERRIDE_MASK;
5693                         if ((adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) &&
5694                             (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS_LS))
5695                                 data &= ~CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK;
5696                         data |= CGTS_SM_CTRL_REG__ON_MONITOR_ADD_EN_MASK;
5697                         data |= (0x96 << CGTS_SM_CTRL_REG__ON_MONITOR_ADD__SHIFT);
5698                         if (temp != data)
5699                                 WREG32(mmCGTS_SM_CTRL_REG, data);
5700                 }
5701                 udelay(50);
5702
5703                 /* 7 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5704                 gfx_v8_0_wait_for_rlc_serdes(adev);
5705         } else {
5706                 /* 1 - MGCG_OVERRIDE[0] for CP and MGCG_OVERRIDE[1] for RLC */
5707                 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5708                 data |= (RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5709                                 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5710                                 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5711                                 RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5712                 if (temp != data)
5713                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5714
5715                 /* 2 - disable MGLS in RLC */
5716                 data = RREG32(mmRLC_MEM_SLP_CNTL);
5717                 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
5718                         data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
5719                         WREG32(mmRLC_MEM_SLP_CNTL, data);
5720                 }
5721
5722                 /* 3 - disable MGLS in CP */
5723                 data = RREG32(mmCP_MEM_SLP_CNTL);
5724                 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
5725                         data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
5726                         WREG32(mmCP_MEM_SLP_CNTL, data);
5727                 }
5728
5729                 /* 4 - Disable CGTS(Tree Shade) MGCG and MGLS */
5730                 temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5731                 data |= (CGTS_SM_CTRL_REG__OVERRIDE_MASK |
5732                                 CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK);
5733                 if (temp != data)
5734                         WREG32(mmCGTS_SM_CTRL_REG, data);
5735
5736                 /* 5 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5737                 gfx_v8_0_wait_for_rlc_serdes(adev);
5738
5739                 /* 6 - set mgcg override */
5740                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, SET_BPM_SERDES_CMD);
5741
5742                 udelay(50);
5743
5744                 /* 7- wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5745                 gfx_v8_0_wait_for_rlc_serdes(adev);
5746         }
5747
5748         amdgpu_gfx_rlc_exit_safe_mode(adev);
5749 }
5750
5751 static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
5752                                                       bool enable)
5753 {
5754         uint32_t temp, temp1, data, data1;
5755
5756         temp = data = RREG32(mmRLC_CGCG_CGLS_CTRL);
5757
5758         amdgpu_gfx_rlc_enter_safe_mode(adev);
5759
5760         if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
5761                 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5762                 data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK;
5763                 if (temp1 != data1)
5764                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5765
5766                 /* : wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5767                 gfx_v8_0_wait_for_rlc_serdes(adev);
5768
5769                 /* 2 - clear cgcg override */
5770                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5771
5772                 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5773                 gfx_v8_0_wait_for_rlc_serdes(adev);
5774
5775                 /* 3 - write cmd to set CGLS */
5776                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, SET_BPM_SERDES_CMD);
5777
5778                 /* 4 - enable cgcg */
5779                 data |= RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
5780
5781                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5782                         /* enable cgls*/
5783                         data |= RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5784
5785                         temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5786                         data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK;
5787
5788                         if (temp1 != data1)
5789                                 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5790                 } else {
5791                         data &= ~RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5792                 }
5793
5794                 if (temp != data)
5795                         WREG32(mmRLC_CGCG_CGLS_CTRL, data);
5796
5797                 /* 5 enable cntx_empty_int_enable/cntx_busy_int_enable/
5798                  * Cmp_busy/GFX_Idle interrupts
5799                  */
5800                 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
5801         } else {
5802                 /* disable cntx_empty_int_enable & GFX Idle interrupt */
5803                 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
5804
5805                 /* TEST CGCG */
5806                 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5807                 data1 |= (RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK |
5808                                 RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK);
5809                 if (temp1 != data1)
5810                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5811
5812                 /* read gfx register to wake up cgcg */
5813                 RREG32(mmCB_CGTT_SCLK_CTRL);
5814                 RREG32(mmCB_CGTT_SCLK_CTRL);
5815                 RREG32(mmCB_CGTT_SCLK_CTRL);
5816                 RREG32(mmCB_CGTT_SCLK_CTRL);
5817
5818                 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5819                 gfx_v8_0_wait_for_rlc_serdes(adev);
5820
5821                 /* write cmd to Set CGCG Override */
5822                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, SET_BPM_SERDES_CMD);
5823
5824                 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5825                 gfx_v8_0_wait_for_rlc_serdes(adev);
5826
5827                 /* write cmd to Clear CGLS */
5828                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, CLE_BPM_SERDES_CMD);
5829
5830                 /* disable cgcg, cgls should be disabled too. */
5831                 data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
5832                           RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
5833                 if (temp != data)
5834                         WREG32(mmRLC_CGCG_CGLS_CTRL, data);
5835                 /* enable interrupts again for PG */
5836                 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
5837         }
5838
5839         gfx_v8_0_wait_for_rlc_serdes(adev);
5840
5841         amdgpu_gfx_rlc_exit_safe_mode(adev);
5842 }
5843 static int gfx_v8_0_update_gfx_clock_gating(struct amdgpu_device *adev,
5844                                             bool enable)
5845 {
5846         if (enable) {
5847                 /* CGCG/CGLS should be enabled after MGCG/MGLS/TS(CG/LS)
5848                  * ===  MGCG + MGLS + TS(CG/LS) ===
5849                  */
5850                 gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
5851                 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
5852         } else {
5853                 /* CGCG/CGLS should be disabled before MGCG/MGLS/TS(CG/LS)
5854                  * ===  CGCG + CGLS ===
5855                  */
5856                 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
5857                 gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
5858         }
5859         return 0;
5860 }
5861
5862 static int gfx_v8_0_tonga_update_gfx_clock_gating(struct amdgpu_device *adev,
5863                                           enum amd_clockgating_state state)
5864 {
5865         uint32_t msg_id, pp_state = 0;
5866         uint32_t pp_support_state = 0;
5867
5868         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) {
5869                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5870                         pp_support_state = PP_STATE_SUPPORT_LS;
5871                         pp_state = PP_STATE_LS;
5872                 }
5873                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
5874                         pp_support_state |= PP_STATE_SUPPORT_CG;
5875                         pp_state |= PP_STATE_CG;
5876                 }
5877                 if (state == AMD_CG_STATE_UNGATE)
5878                         pp_state = 0;
5879
5880                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5881                                 PP_BLOCK_GFX_CG,
5882                                 pp_support_state,
5883                                 pp_state);
5884                 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5885         }
5886
5887         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) {
5888                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5889                         pp_support_state = PP_STATE_SUPPORT_LS;
5890                         pp_state = PP_STATE_LS;
5891                 }
5892
5893                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
5894                         pp_support_state |= PP_STATE_SUPPORT_CG;
5895                         pp_state |= PP_STATE_CG;
5896                 }
5897
5898                 if (state == AMD_CG_STATE_UNGATE)
5899                         pp_state = 0;
5900
5901                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5902                                 PP_BLOCK_GFX_MG,
5903                                 pp_support_state,
5904                                 pp_state);
5905                 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5906         }
5907
5908         return 0;
5909 }
5910
5911 static int gfx_v8_0_polaris_update_gfx_clock_gating(struct amdgpu_device *adev,
5912                                           enum amd_clockgating_state state)
5913 {
5914
5915         uint32_t msg_id, pp_state = 0;
5916         uint32_t pp_support_state = 0;
5917
5918         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) {
5919                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5920                         pp_support_state = PP_STATE_SUPPORT_LS;
5921                         pp_state = PP_STATE_LS;
5922                 }
5923                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
5924                         pp_support_state |= PP_STATE_SUPPORT_CG;
5925                         pp_state |= PP_STATE_CG;
5926                 }
5927                 if (state == AMD_CG_STATE_UNGATE)
5928                         pp_state = 0;
5929
5930                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5931                                 PP_BLOCK_GFX_CG,
5932                                 pp_support_state,
5933                                 pp_state);
5934                 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5935         }
5936
5937         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_3D_CGCG | AMD_CG_SUPPORT_GFX_3D_CGLS)) {
5938                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) {
5939                         pp_support_state = PP_STATE_SUPPORT_LS;
5940                         pp_state = PP_STATE_LS;
5941                 }
5942                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG) {
5943                         pp_support_state |= PP_STATE_SUPPORT_CG;
5944                         pp_state |= PP_STATE_CG;
5945                 }
5946                 if (state == AMD_CG_STATE_UNGATE)
5947                         pp_state = 0;
5948
5949                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5950                                 PP_BLOCK_GFX_3D,
5951                                 pp_support_state,
5952                                 pp_state);
5953                 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5954         }
5955
5956         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) {
5957                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5958                         pp_support_state = PP_STATE_SUPPORT_LS;
5959                         pp_state = PP_STATE_LS;
5960                 }
5961
5962                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
5963                         pp_support_state |= PP_STATE_SUPPORT_CG;
5964                         pp_state |= PP_STATE_CG;
5965                 }
5966
5967                 if (state == AMD_CG_STATE_UNGATE)
5968                         pp_state = 0;
5969
5970                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5971                                 PP_BLOCK_GFX_MG,
5972                                 pp_support_state,
5973                                 pp_state);
5974                 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5975         }
5976
5977         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
5978                 pp_support_state = PP_STATE_SUPPORT_LS;
5979
5980                 if (state == AMD_CG_STATE_UNGATE)
5981                         pp_state = 0;
5982                 else
5983                         pp_state = PP_STATE_LS;
5984
5985                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5986                                 PP_BLOCK_GFX_RLC,
5987                                 pp_support_state,
5988                                 pp_state);
5989                 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5990         }
5991
5992         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
5993                 pp_support_state = PP_STATE_SUPPORT_LS;
5994
5995                 if (state == AMD_CG_STATE_UNGATE)
5996                         pp_state = 0;
5997                 else
5998                         pp_state = PP_STATE_LS;
5999                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6000                         PP_BLOCK_GFX_CP,
6001                         pp_support_state,
6002                         pp_state);
6003                 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
6004         }
6005
6006         return 0;
6007 }
6008
6009 static int gfx_v8_0_set_clockgating_state(void *handle,
6010                                           enum amd_clockgating_state state)
6011 {
6012         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
6013
6014         if (amdgpu_sriov_vf(adev))
6015                 return 0;
6016
6017         switch (adev->asic_type) {
6018         case CHIP_FIJI:
6019         case CHIP_CARRIZO:
6020         case CHIP_STONEY:
6021                 gfx_v8_0_update_gfx_clock_gating(adev,
6022                                                  state == AMD_CG_STATE_GATE);
6023                 break;
6024         case CHIP_TONGA:
6025                 gfx_v8_0_tonga_update_gfx_clock_gating(adev, state);
6026                 break;
6027         case CHIP_POLARIS10:
6028         case CHIP_POLARIS11:
6029         case CHIP_POLARIS12:
6030         case CHIP_VEGAM:
6031                 gfx_v8_0_polaris_update_gfx_clock_gating(adev, state);
6032                 break;
6033         default:
6034                 break;
6035         }
6036         return 0;
6037 }
6038
6039 static u64 gfx_v8_0_ring_get_rptr(struct amdgpu_ring *ring)
6040 {
6041         return *ring->rptr_cpu_addr;
6042 }
6043
6044 static u64 gfx_v8_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
6045 {
6046         struct amdgpu_device *adev = ring->adev;
6047
6048         if (ring->use_doorbell)
6049                 /* XXX check if swapping is necessary on BE */
6050                 return *ring->wptr_cpu_addr;
6051         else
6052                 return RREG32(mmCP_RB0_WPTR);
6053 }
6054
6055 static void gfx_v8_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
6056 {
6057         struct amdgpu_device *adev = ring->adev;
6058
6059         if (ring->use_doorbell) {
6060                 /* XXX check if swapping is necessary on BE */
6061                 *ring->wptr_cpu_addr = lower_32_bits(ring->wptr);
6062                 WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
6063         } else {
6064                 WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
6065                 (void)RREG32(mmCP_RB0_WPTR);
6066         }
6067 }
6068
6069 static void gfx_v8_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
6070 {
6071         u32 ref_and_mask, reg_mem_engine;
6072
6073         if ((ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) ||
6074             (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)) {
6075                 switch (ring->me) {
6076                 case 1:
6077                         ref_and_mask = GPU_HDP_FLUSH_DONE__CP2_MASK << ring->pipe;
6078                         break;
6079                 case 2:
6080                         ref_and_mask = GPU_HDP_FLUSH_DONE__CP6_MASK << ring->pipe;
6081                         break;
6082                 default:
6083                         return;
6084                 }
6085                 reg_mem_engine = 0;
6086         } else {
6087                 ref_and_mask = GPU_HDP_FLUSH_DONE__CP0_MASK;
6088                 reg_mem_engine = WAIT_REG_MEM_ENGINE(1); /* pfp */
6089         }
6090
6091         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6092         amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
6093                                  WAIT_REG_MEM_FUNCTION(3) |  /* == */
6094                                  reg_mem_engine));
6095         amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_REQ);
6096         amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_DONE);
6097         amdgpu_ring_write(ring, ref_and_mask);
6098         amdgpu_ring_write(ring, ref_and_mask);
6099         amdgpu_ring_write(ring, 0x20); /* poll interval */
6100 }
6101
6102 static void gfx_v8_0_ring_emit_vgt_flush(struct amdgpu_ring *ring)
6103 {
6104         amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
6105         amdgpu_ring_write(ring, EVENT_TYPE(VS_PARTIAL_FLUSH) |
6106                 EVENT_INDEX(4));
6107
6108         amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
6109         amdgpu_ring_write(ring, EVENT_TYPE(VGT_FLUSH) |
6110                 EVENT_INDEX(0));
6111 }
6112
6113 static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
6114                                         struct amdgpu_job *job,
6115                                         struct amdgpu_ib *ib,
6116                                         uint32_t flags)
6117 {
6118         unsigned vmid = AMDGPU_JOB_GET_VMID(job);
6119         u32 header, control = 0;
6120
6121         if (ib->flags & AMDGPU_IB_FLAG_CE)
6122                 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
6123         else
6124                 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
6125
6126         control |= ib->length_dw | (vmid << 24);
6127
6128         if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) {
6129                 control |= INDIRECT_BUFFER_PRE_ENB(1);
6130
6131                 if (!(ib->flags & AMDGPU_IB_FLAG_CE) && vmid)
6132                         gfx_v8_0_ring_emit_de_meta(ring);
6133         }
6134
6135         amdgpu_ring_write(ring, header);
6136         amdgpu_ring_write(ring,
6137 #ifdef __BIG_ENDIAN
6138                           (2 << 0) |
6139 #endif
6140                           (ib->gpu_addr & 0xFFFFFFFC));
6141         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6142         amdgpu_ring_write(ring, control);
6143 }
6144
6145 static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
6146                                           struct amdgpu_job *job,
6147                                           struct amdgpu_ib *ib,
6148                                           uint32_t flags)
6149 {
6150         unsigned vmid = AMDGPU_JOB_GET_VMID(job);
6151         u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
6152
6153         /* Currently, there is a high possibility to get wave ID mismatch
6154          * between ME and GDS, leading to a hw deadlock, because ME generates
6155          * different wave IDs than the GDS expects. This situation happens
6156          * randomly when at least 5 compute pipes use GDS ordered append.
6157          * The wave IDs generated by ME are also wrong after suspend/resume.
6158          * Those are probably bugs somewhere else in the kernel driver.
6159          *
6160          * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and
6161          * GDS to 0 for this ring (me/pipe).
6162          */
6163         if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) {
6164                 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
6165                 amdgpu_ring_write(ring, mmGDS_COMPUTE_MAX_WAVE_ID - PACKET3_SET_CONFIG_REG_START);
6166                 amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id);
6167         }
6168
6169         amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
6170         amdgpu_ring_write(ring,
6171 #ifdef __BIG_ENDIAN
6172                                 (2 << 0) |
6173 #endif
6174                                 (ib->gpu_addr & 0xFFFFFFFC));
6175         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6176         amdgpu_ring_write(ring, control);
6177 }
6178
6179 static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr,
6180                                          u64 seq, unsigned flags)
6181 {
6182         bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6183         bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6184
6185         /* Workaround for cache flush problems. First send a dummy EOP
6186          * event down the pipe with seq one below.
6187          */
6188         amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
6189         amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6190                                  EOP_TC_ACTION_EN |
6191                                  EOP_TC_WB_ACTION_EN |
6192                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6193                                  EVENT_INDEX(5)));
6194         amdgpu_ring_write(ring, addr & 0xfffffffc);
6195         amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
6196                                 DATA_SEL(1) | INT_SEL(0));
6197         amdgpu_ring_write(ring, lower_32_bits(seq - 1));
6198         amdgpu_ring_write(ring, upper_32_bits(seq - 1));
6199
6200         /* Then send the real EOP event down the pipe:
6201          * EVENT_WRITE_EOP - flush caches, send int */
6202         amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
6203         amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6204                                  EOP_TC_ACTION_EN |
6205                                  EOP_TC_WB_ACTION_EN |
6206                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6207                                  EVENT_INDEX(5)));
6208         amdgpu_ring_write(ring, addr & 0xfffffffc);
6209         amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
6210                           DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
6211         amdgpu_ring_write(ring, lower_32_bits(seq));
6212         amdgpu_ring_write(ring, upper_32_bits(seq));
6213
6214 }
6215
6216 static void gfx_v8_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
6217 {
6218         int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
6219         uint32_t seq = ring->fence_drv.sync_seq;
6220         uint64_t addr = ring->fence_drv.gpu_addr;
6221
6222         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6223         amdgpu_ring_write(ring, (WAIT_REG_MEM_MEM_SPACE(1) | /* memory */
6224                                  WAIT_REG_MEM_FUNCTION(3) | /* equal */
6225                                  WAIT_REG_MEM_ENGINE(usepfp))); /* pfp or me */
6226         amdgpu_ring_write(ring, addr & 0xfffffffc);
6227         amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
6228         amdgpu_ring_write(ring, seq);
6229         amdgpu_ring_write(ring, 0xffffffff);
6230         amdgpu_ring_write(ring, 4); /* poll interval */
6231 }
6232
6233 static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
6234                                         unsigned vmid, uint64_t pd_addr)
6235 {
6236         int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
6237
6238         amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
6239
6240         /* wait for the invalidate to complete */
6241         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6242         amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
6243                                  WAIT_REG_MEM_FUNCTION(0) |  /* always */
6244                                  WAIT_REG_MEM_ENGINE(0))); /* me */
6245         amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
6246         amdgpu_ring_write(ring, 0);
6247         amdgpu_ring_write(ring, 0); /* ref */
6248         amdgpu_ring_write(ring, 0); /* mask */
6249         amdgpu_ring_write(ring, 0x20); /* poll interval */
6250
6251         /* compute doesn't have PFP */
6252         if (usepfp) {
6253                 /* sync PFP to ME, otherwise we might get invalid PFP reads */
6254                 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
6255                 amdgpu_ring_write(ring, 0x0);
6256         }
6257 }
6258
6259 static u64 gfx_v8_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
6260 {
6261         return *ring->wptr_cpu_addr;
6262 }
6263
6264 static void gfx_v8_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
6265 {
6266         struct amdgpu_device *adev = ring->adev;
6267
6268         /* XXX check if swapping is necessary on BE */
6269         *ring->wptr_cpu_addr = lower_32_bits(ring->wptr);
6270         WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
6271 }
6272
6273 static void gfx_v8_0_ring_emit_fence_compute(struct amdgpu_ring *ring,
6274                                              u64 addr, u64 seq,
6275                                              unsigned flags)
6276 {
6277         bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6278         bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6279
6280         /* RELEASE_MEM - flush caches, send int */
6281         amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
6282         amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6283                                  EOP_TC_ACTION_EN |
6284                                  EOP_TC_WB_ACTION_EN |
6285                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6286                                  EVENT_INDEX(5)));
6287         amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
6288         amdgpu_ring_write(ring, addr & 0xfffffffc);
6289         amdgpu_ring_write(ring, upper_32_bits(addr));
6290         amdgpu_ring_write(ring, lower_32_bits(seq));
6291         amdgpu_ring_write(ring, upper_32_bits(seq));
6292 }
6293
6294 static void gfx_v8_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
6295                                          u64 seq, unsigned int flags)
6296 {
6297         /* we only allocate 32bit for each seq wb address */
6298         BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
6299
6300         /* write fence seq to the "addr" */
6301         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6302         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6303                                  WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
6304         amdgpu_ring_write(ring, lower_32_bits(addr));
6305         amdgpu_ring_write(ring, upper_32_bits(addr));
6306         amdgpu_ring_write(ring, lower_32_bits(seq));
6307
6308         if (flags & AMDGPU_FENCE_FLAG_INT) {
6309                 /* set register to trigger INT */
6310                 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6311                 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6312                                          WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
6313                 amdgpu_ring_write(ring, mmCPC_INT_STATUS);
6314                 amdgpu_ring_write(ring, 0);
6315                 amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
6316         }
6317 }
6318
6319 static void gfx_v8_ring_emit_sb(struct amdgpu_ring *ring)
6320 {
6321         amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
6322         amdgpu_ring_write(ring, 0);
6323 }
6324
6325 static void gfx_v8_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
6326 {
6327         uint32_t dw2 = 0;
6328
6329         if (amdgpu_sriov_vf(ring->adev))
6330                 gfx_v8_0_ring_emit_ce_meta(ring);
6331
6332         dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
6333         if (flags & AMDGPU_HAVE_CTX_SWITCH) {
6334                 gfx_v8_0_ring_emit_vgt_flush(ring);
6335                 /* set load_global_config & load_global_uconfig */
6336                 dw2 |= 0x8001;
6337                 /* set load_cs_sh_regs */
6338                 dw2 |= 0x01000000;
6339                 /* set load_per_context_state & load_gfx_sh_regs for GFX */
6340                 dw2 |= 0x10002;
6341
6342                 /* set load_ce_ram if preamble presented */
6343                 if (AMDGPU_PREAMBLE_IB_PRESENT & flags)
6344                         dw2 |= 0x10000000;
6345         } else {
6346                 /* still load_ce_ram if this is the first time preamble presented
6347                  * although there is no context switch happens.
6348                  */
6349                 if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags)
6350                         dw2 |= 0x10000000;
6351         }
6352
6353         amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
6354         amdgpu_ring_write(ring, dw2);
6355         amdgpu_ring_write(ring, 0);
6356 }
6357
6358 static unsigned gfx_v8_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring)
6359 {
6360         unsigned ret;
6361
6362         amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
6363         amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
6364         amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
6365         amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */
6366         ret = ring->wptr & ring->buf_mask;
6367         amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */
6368         return ret;
6369 }
6370
6371 static void gfx_v8_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset)
6372 {
6373         unsigned cur;
6374
6375         BUG_ON(offset > ring->buf_mask);
6376         BUG_ON(ring->ring[offset] != 0x55aa55aa);
6377
6378         cur = (ring->wptr & ring->buf_mask) - 1;
6379         if (likely(cur > offset))
6380                 ring->ring[offset] = cur - offset;
6381         else
6382                 ring->ring[offset] = (ring->ring_size >> 2) - offset + cur;
6383 }
6384
6385 static void gfx_v8_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg,
6386                                     uint32_t reg_val_offs)
6387 {
6388         struct amdgpu_device *adev = ring->adev;
6389
6390         amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
6391         amdgpu_ring_write(ring, 0 |     /* src: register*/
6392                                 (5 << 8) |      /* dst: memory */
6393                                 (1 << 20));     /* write confirm */
6394         amdgpu_ring_write(ring, reg);
6395         amdgpu_ring_write(ring, 0);
6396         amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
6397                                 reg_val_offs * 4));
6398         amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
6399                                 reg_val_offs * 4));
6400 }
6401
6402 static void gfx_v8_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
6403                                   uint32_t val)
6404 {
6405         uint32_t cmd;
6406
6407         switch (ring->funcs->type) {
6408         case AMDGPU_RING_TYPE_GFX:
6409                 cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM;
6410                 break;
6411         case AMDGPU_RING_TYPE_KIQ:
6412                 cmd = 1 << 16; /* no inc addr */
6413                 break;
6414         default:
6415                 cmd = WR_CONFIRM;
6416                 break;
6417         }
6418
6419         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6420         amdgpu_ring_write(ring, cmd);
6421         amdgpu_ring_write(ring, reg);
6422         amdgpu_ring_write(ring, 0);
6423         amdgpu_ring_write(ring, val);
6424 }
6425
6426 static void gfx_v8_0_ring_soft_recovery(struct amdgpu_ring *ring, unsigned vmid)
6427 {
6428         struct amdgpu_device *adev = ring->adev;
6429         uint32_t value = 0;
6430
6431         value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03);
6432         value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01);
6433         value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1);
6434         value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid);
6435         WREG32(mmSQ_CMD, value);
6436 }
6437
6438 static void gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
6439                                                  enum amdgpu_interrupt_state state)
6440 {
6441         WREG32_FIELD(CP_INT_CNTL_RING0, TIME_STAMP_INT_ENABLE,
6442                      state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6443 }
6444
6445 static void gfx_v8_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
6446                                                      int me, int pipe,
6447                                                      enum amdgpu_interrupt_state state)
6448 {
6449         u32 mec_int_cntl, mec_int_cntl_reg;
6450
6451         /*
6452          * amdgpu controls only the first MEC. That's why this function only
6453          * handles the setting of interrupts for this specific MEC. All other
6454          * pipes' interrupts are set by amdkfd.
6455          */
6456
6457         if (me == 1) {
6458                 switch (pipe) {
6459                 case 0:
6460                         mec_int_cntl_reg = mmCP_ME1_PIPE0_INT_CNTL;
6461                         break;
6462                 case 1:
6463                         mec_int_cntl_reg = mmCP_ME1_PIPE1_INT_CNTL;
6464                         break;
6465                 case 2:
6466                         mec_int_cntl_reg = mmCP_ME1_PIPE2_INT_CNTL;
6467                         break;
6468                 case 3:
6469                         mec_int_cntl_reg = mmCP_ME1_PIPE3_INT_CNTL;
6470                         break;
6471                 default:
6472                         DRM_DEBUG("invalid pipe %d\n", pipe);
6473                         return;
6474                 }
6475         } else {
6476                 DRM_DEBUG("invalid me %d\n", me);
6477                 return;
6478         }
6479
6480         switch (state) {
6481         case AMDGPU_IRQ_STATE_DISABLE:
6482                 mec_int_cntl = RREG32(mec_int_cntl_reg);
6483                 mec_int_cntl &= ~CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK;
6484                 WREG32(mec_int_cntl_reg, mec_int_cntl);
6485                 break;
6486         case AMDGPU_IRQ_STATE_ENABLE:
6487                 mec_int_cntl = RREG32(mec_int_cntl_reg);
6488                 mec_int_cntl |= CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK;
6489                 WREG32(mec_int_cntl_reg, mec_int_cntl);
6490                 break;
6491         default:
6492                 break;
6493         }
6494 }
6495
6496 static int gfx_v8_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
6497                                              struct amdgpu_irq_src *source,
6498                                              unsigned type,
6499                                              enum amdgpu_interrupt_state state)
6500 {
6501         WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_REG_INT_ENABLE,
6502                      state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6503
6504         return 0;
6505 }
6506
6507 static int gfx_v8_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
6508                                               struct amdgpu_irq_src *source,
6509                                               unsigned type,
6510                                               enum amdgpu_interrupt_state state)
6511 {
6512         WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_INSTR_INT_ENABLE,
6513                      state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6514
6515         return 0;
6516 }
6517
6518 static int gfx_v8_0_set_eop_interrupt_state(struct amdgpu_device *adev,
6519                                             struct amdgpu_irq_src *src,
6520                                             unsigned type,
6521                                             enum amdgpu_interrupt_state state)
6522 {
6523         switch (type) {
6524         case AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP:
6525                 gfx_v8_0_set_gfx_eop_interrupt_state(adev, state);
6526                 break;
6527         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
6528                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
6529                 break;
6530         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
6531                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
6532                 break;
6533         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
6534                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
6535                 break;
6536         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
6537                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
6538                 break;
6539         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
6540                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
6541                 break;
6542         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
6543                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
6544                 break;
6545         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
6546                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
6547                 break;
6548         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
6549                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
6550                 break;
6551         default:
6552                 break;
6553         }
6554         return 0;
6555 }
6556
6557 static int gfx_v8_0_set_cp_ecc_int_state(struct amdgpu_device *adev,
6558                                          struct amdgpu_irq_src *source,
6559                                          unsigned int type,
6560                                          enum amdgpu_interrupt_state state)
6561 {
6562         int enable_flag;
6563
6564         switch (state) {
6565         case AMDGPU_IRQ_STATE_DISABLE:
6566                 enable_flag = 0;
6567                 break;
6568
6569         case AMDGPU_IRQ_STATE_ENABLE:
6570                 enable_flag = 1;
6571                 break;
6572
6573         default:
6574                 return -EINVAL;
6575         }
6576
6577         WREG32_FIELD(CP_INT_CNTL, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6578         WREG32_FIELD(CP_INT_CNTL_RING0, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6579         WREG32_FIELD(CP_INT_CNTL_RING1, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6580         WREG32_FIELD(CP_INT_CNTL_RING2, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6581         WREG32_FIELD(CPC_INT_CNTL, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6582         WREG32_FIELD(CP_ME1_PIPE0_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6583                      enable_flag);
6584         WREG32_FIELD(CP_ME1_PIPE1_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6585                      enable_flag);
6586         WREG32_FIELD(CP_ME1_PIPE2_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6587                      enable_flag);
6588         WREG32_FIELD(CP_ME1_PIPE3_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6589                      enable_flag);
6590         WREG32_FIELD(CP_ME2_PIPE0_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6591                      enable_flag);
6592         WREG32_FIELD(CP_ME2_PIPE1_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6593                      enable_flag);
6594         WREG32_FIELD(CP_ME2_PIPE2_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6595                      enable_flag);
6596         WREG32_FIELD(CP_ME2_PIPE3_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6597                      enable_flag);
6598
6599         return 0;
6600 }
6601
6602 static int gfx_v8_0_set_sq_int_state(struct amdgpu_device *adev,
6603                                      struct amdgpu_irq_src *source,
6604                                      unsigned int type,
6605                                      enum amdgpu_interrupt_state state)
6606 {
6607         int enable_flag;
6608
6609         switch (state) {
6610         case AMDGPU_IRQ_STATE_DISABLE:
6611                 enable_flag = 1;
6612                 break;
6613
6614         case AMDGPU_IRQ_STATE_ENABLE:
6615                 enable_flag = 0;
6616                 break;
6617
6618         default:
6619                 return -EINVAL;
6620         }
6621
6622         WREG32_FIELD(SQ_INTERRUPT_MSG_CTRL, STALL,
6623                      enable_flag);
6624
6625         return 0;
6626 }
6627
6628 static int gfx_v8_0_eop_irq(struct amdgpu_device *adev,
6629                             struct amdgpu_irq_src *source,
6630                             struct amdgpu_iv_entry *entry)
6631 {
6632         int i;
6633         u8 me_id, pipe_id, queue_id;
6634         struct amdgpu_ring *ring;
6635
6636         DRM_DEBUG("IH: CP EOP\n");
6637         me_id = (entry->ring_id & 0x0c) >> 2;
6638         pipe_id = (entry->ring_id & 0x03) >> 0;
6639         queue_id = (entry->ring_id & 0x70) >> 4;
6640
6641         switch (me_id) {
6642         case 0:
6643                 amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
6644                 break;
6645         case 1:
6646         case 2:
6647                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
6648                         ring = &adev->gfx.compute_ring[i];
6649                         /* Per-queue interrupt is supported for MEC starting from VI.
6650                           * The interrupt can only be enabled/disabled per pipe instead of per queue.
6651                           */
6652                         if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
6653                                 amdgpu_fence_process(ring);
6654                 }
6655                 break;
6656         }
6657         return 0;
6658 }
6659
6660 static void gfx_v8_0_fault(struct amdgpu_device *adev,
6661                            struct amdgpu_iv_entry *entry)
6662 {
6663         u8 me_id, pipe_id, queue_id;
6664         struct amdgpu_ring *ring;
6665         int i;
6666
6667         me_id = (entry->ring_id & 0x0c) >> 2;
6668         pipe_id = (entry->ring_id & 0x03) >> 0;
6669         queue_id = (entry->ring_id & 0x70) >> 4;
6670
6671         switch (me_id) {
6672         case 0:
6673                 drm_sched_fault(&adev->gfx.gfx_ring[0].sched);
6674                 break;
6675         case 1:
6676         case 2:
6677                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
6678                         ring = &adev->gfx.compute_ring[i];
6679                         if (ring->me == me_id && ring->pipe == pipe_id &&
6680                             ring->queue == queue_id)
6681                                 drm_sched_fault(&ring->sched);
6682                 }
6683                 break;
6684         }
6685 }
6686
6687 static int gfx_v8_0_priv_reg_irq(struct amdgpu_device *adev,
6688                                  struct amdgpu_irq_src *source,
6689                                  struct amdgpu_iv_entry *entry)
6690 {
6691         DRM_ERROR("Illegal register access in command stream\n");
6692         gfx_v8_0_fault(adev, entry);
6693         return 0;
6694 }
6695
6696 static int gfx_v8_0_priv_inst_irq(struct amdgpu_device *adev,
6697                                   struct amdgpu_irq_src *source,
6698                                   struct amdgpu_iv_entry *entry)
6699 {
6700         DRM_ERROR("Illegal instruction in command stream\n");
6701         gfx_v8_0_fault(adev, entry);
6702         return 0;
6703 }
6704
6705 static int gfx_v8_0_cp_ecc_error_irq(struct amdgpu_device *adev,
6706                                      struct amdgpu_irq_src *source,
6707                                      struct amdgpu_iv_entry *entry)
6708 {
6709         DRM_ERROR("CP EDC/ECC error detected.");
6710         return 0;
6711 }
6712
6713 static void gfx_v8_0_parse_sq_irq(struct amdgpu_device *adev, unsigned ih_data,
6714                                   bool from_wq)
6715 {
6716         u32 enc, se_id, sh_id, cu_id;
6717         char type[20];
6718         int sq_edc_source = -1;
6719
6720         enc = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_CMN, ENCODING);
6721         se_id = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_CMN, SE_ID);
6722
6723         switch (enc) {
6724                 case 0:
6725                         DRM_INFO("SQ general purpose intr detected:"
6726                                         "se_id %d, immed_overflow %d, host_reg_overflow %d,"
6727                                         "host_cmd_overflow %d, cmd_timestamp %d,"
6728                                         "reg_timestamp %d, thread_trace_buff_full %d,"
6729                                         "wlt %d, thread_trace %d.\n",
6730                                         se_id,
6731                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, IMMED_OVERFLOW),
6732                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, HOST_REG_OVERFLOW),
6733                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, HOST_CMD_OVERFLOW),
6734                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, CMD_TIMESTAMP),
6735                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, REG_TIMESTAMP),
6736                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, THREAD_TRACE_BUF_FULL),
6737                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, WLT),
6738                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, THREAD_TRACE)
6739                                         );
6740                         break;
6741                 case 1:
6742                 case 2:
6743
6744                         cu_id = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, CU_ID);
6745                         sh_id = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, SH_ID);
6746
6747                         /*
6748                          * This function can be called either directly from ISR
6749                          * or from BH in which case we can access SQ_EDC_INFO
6750                          * instance
6751                          */
6752                         if (from_wq) {
6753                                 mutex_lock(&adev->grbm_idx_mutex);
6754                                 gfx_v8_0_select_se_sh(adev, se_id, sh_id, cu_id);
6755
6756                                 sq_edc_source = REG_GET_FIELD(RREG32(mmSQ_EDC_INFO), SQ_EDC_INFO, SOURCE);
6757
6758                                 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
6759                                 mutex_unlock(&adev->grbm_idx_mutex);
6760                         }
6761
6762                         if (enc == 1)
6763                                 sprintf(type, "instruction intr");
6764                         else
6765                                 sprintf(type, "EDC/ECC error");
6766
6767                         DRM_INFO(
6768                                 "SQ %s detected: "
6769                                         "se_id %d, sh_id %d, cu_id %d, simd_id %d, wave_id %d, vm_id %d "
6770                                         "trap %s, sq_ed_info.source %s.\n",
6771                                         type, se_id, sh_id, cu_id,
6772                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, SIMD_ID),
6773                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, WAVE_ID),
6774                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, VM_ID),
6775                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, PRIV) ? "true" : "false",
6776                                         (sq_edc_source != -1) ? sq_edc_source_names[sq_edc_source] : "unavailable"
6777                                 );
6778                         break;
6779                 default:
6780                         DRM_ERROR("SQ invalid encoding type\n.");
6781         }
6782 }
6783
6784 static void gfx_v8_0_sq_irq_work_func(struct work_struct *work)
6785 {
6786
6787         struct amdgpu_device *adev = container_of(work, struct amdgpu_device, gfx.sq_work.work);
6788         struct sq_work *sq_work = container_of(work, struct sq_work, work);
6789
6790         gfx_v8_0_parse_sq_irq(adev, sq_work->ih_data, true);
6791 }
6792
6793 static int gfx_v8_0_sq_irq(struct amdgpu_device *adev,
6794                            struct amdgpu_irq_src *source,
6795                            struct amdgpu_iv_entry *entry)
6796 {
6797         unsigned ih_data = entry->src_data[0];
6798
6799         /*
6800          * Try to submit work so SQ_EDC_INFO can be accessed from
6801          * BH. If previous work submission hasn't finished yet
6802          * just print whatever info is possible directly from the ISR.
6803          */
6804         if (work_pending(&adev->gfx.sq_work.work)) {
6805                 gfx_v8_0_parse_sq_irq(adev, ih_data, false);
6806         } else {
6807                 adev->gfx.sq_work.ih_data = ih_data;
6808                 schedule_work(&adev->gfx.sq_work.work);
6809         }
6810
6811         return 0;
6812 }
6813
6814 static void gfx_v8_0_emit_mem_sync(struct amdgpu_ring *ring)
6815 {
6816         amdgpu_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
6817         amdgpu_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
6818                           PACKET3_TC_ACTION_ENA |
6819                           PACKET3_SH_KCACHE_ACTION_ENA |
6820                           PACKET3_SH_ICACHE_ACTION_ENA |
6821                           PACKET3_TC_WB_ACTION_ENA);  /* CP_COHER_CNTL */
6822         amdgpu_ring_write(ring, 0xffffffff);  /* CP_COHER_SIZE */
6823         amdgpu_ring_write(ring, 0);  /* CP_COHER_BASE */
6824         amdgpu_ring_write(ring, 0x0000000A); /* poll interval */
6825 }
6826
6827 static void gfx_v8_0_emit_mem_sync_compute(struct amdgpu_ring *ring)
6828 {
6829         amdgpu_ring_write(ring, PACKET3(PACKET3_ACQUIRE_MEM, 5));
6830         amdgpu_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
6831                           PACKET3_TC_ACTION_ENA |
6832                           PACKET3_SH_KCACHE_ACTION_ENA |
6833                           PACKET3_SH_ICACHE_ACTION_ENA |
6834                           PACKET3_TC_WB_ACTION_ENA);  /* CP_COHER_CNTL */
6835         amdgpu_ring_write(ring, 0xffffffff);    /* CP_COHER_SIZE */
6836         amdgpu_ring_write(ring, 0xff);          /* CP_COHER_SIZE_HI */
6837         amdgpu_ring_write(ring, 0);             /* CP_COHER_BASE */
6838         amdgpu_ring_write(ring, 0);             /* CP_COHER_BASE_HI */
6839         amdgpu_ring_write(ring, 0x0000000A);    /* poll interval */
6840 }
6841
6842
6843 /* mmSPI_WCL_PIPE_PERCENT_CS[0-7]_DEFAULT values are same */
6844 #define mmSPI_WCL_PIPE_PERCENT_CS_DEFAULT       0x0000007f
6845 static void gfx_v8_0_emit_wave_limit_cs(struct amdgpu_ring *ring,
6846                                         uint32_t pipe, bool enable)
6847 {
6848         uint32_t val;
6849         uint32_t wcl_cs_reg;
6850
6851         val = enable ? 0x1 : mmSPI_WCL_PIPE_PERCENT_CS_DEFAULT;
6852
6853         switch (pipe) {
6854         case 0:
6855                 wcl_cs_reg = mmSPI_WCL_PIPE_PERCENT_CS0;
6856                 break;
6857         case 1:
6858                 wcl_cs_reg = mmSPI_WCL_PIPE_PERCENT_CS1;
6859                 break;
6860         case 2:
6861                 wcl_cs_reg = mmSPI_WCL_PIPE_PERCENT_CS2;
6862                 break;
6863         case 3:
6864                 wcl_cs_reg = mmSPI_WCL_PIPE_PERCENT_CS3;
6865                 break;
6866         default:
6867                 DRM_DEBUG("invalid pipe %d\n", pipe);
6868                 return;
6869         }
6870
6871         amdgpu_ring_emit_wreg(ring, wcl_cs_reg, val);
6872
6873 }
6874
6875 #define mmSPI_WCL_PIPE_PERCENT_GFX_DEFAULT      0x07ffffff
6876 static void gfx_v8_0_emit_wave_limit(struct amdgpu_ring *ring, bool enable)
6877 {
6878         struct amdgpu_device *adev = ring->adev;
6879         uint32_t val;
6880         int i;
6881
6882         /* mmSPI_WCL_PIPE_PERCENT_GFX is 7 bit multiplier register to limit
6883          * number of gfx waves. Setting 5 bit will make sure gfx only gets
6884          * around 25% of gpu resources.
6885          */
6886         val = enable ? 0x1f : mmSPI_WCL_PIPE_PERCENT_GFX_DEFAULT;
6887         amdgpu_ring_emit_wreg(ring, mmSPI_WCL_PIPE_PERCENT_GFX, val);
6888
6889         /* Restrict waves for normal/low priority compute queues as well
6890          * to get best QoS for high priority compute jobs.
6891          *
6892          * amdgpu controls only 1st ME(0-3 CS pipes).
6893          */
6894         for (i = 0; i < adev->gfx.mec.num_pipe_per_mec; i++) {
6895                 if (i != ring->pipe)
6896                         gfx_v8_0_emit_wave_limit_cs(ring, i, enable);
6897
6898         }
6899
6900 }
6901
6902 static const struct amd_ip_funcs gfx_v8_0_ip_funcs = {
6903         .name = "gfx_v8_0",
6904         .early_init = gfx_v8_0_early_init,
6905         .late_init = gfx_v8_0_late_init,
6906         .sw_init = gfx_v8_0_sw_init,
6907         .sw_fini = gfx_v8_0_sw_fini,
6908         .hw_init = gfx_v8_0_hw_init,
6909         .hw_fini = gfx_v8_0_hw_fini,
6910         .suspend = gfx_v8_0_suspend,
6911         .resume = gfx_v8_0_resume,
6912         .is_idle = gfx_v8_0_is_idle,
6913         .wait_for_idle = gfx_v8_0_wait_for_idle,
6914         .check_soft_reset = gfx_v8_0_check_soft_reset,
6915         .pre_soft_reset = gfx_v8_0_pre_soft_reset,
6916         .soft_reset = gfx_v8_0_soft_reset,
6917         .post_soft_reset = gfx_v8_0_post_soft_reset,
6918         .set_clockgating_state = gfx_v8_0_set_clockgating_state,
6919         .set_powergating_state = gfx_v8_0_set_powergating_state,
6920         .get_clockgating_state = gfx_v8_0_get_clockgating_state,
6921 };
6922
6923 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
6924         .type = AMDGPU_RING_TYPE_GFX,
6925         .align_mask = 0xff,
6926         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6927         .support_64bit_ptrs = false,
6928         .get_rptr = gfx_v8_0_ring_get_rptr,
6929         .get_wptr = gfx_v8_0_ring_get_wptr_gfx,
6930         .set_wptr = gfx_v8_0_ring_set_wptr_gfx,
6931         .emit_frame_size = /* maximum 215dw if count 16 IBs in */
6932                 5 +  /* COND_EXEC */
6933                 7 +  /* PIPELINE_SYNC */
6934                 VI_FLUSH_GPU_TLB_NUM_WREG * 5 + 9 + /* VM_FLUSH */
6935                 12 +  /* FENCE for VM_FLUSH */
6936                 20 + /* GDS switch */
6937                 4 + /* double SWITCH_BUFFER,
6938                        the first COND_EXEC jump to the place just
6939                            prior to this double SWITCH_BUFFER  */
6940                 5 + /* COND_EXEC */
6941                 7 +      /*     HDP_flush */
6942                 4 +      /*     VGT_flush */
6943                 14 + /* CE_META */
6944                 31 + /* DE_META */
6945                 3 + /* CNTX_CTRL */
6946                 5 + /* HDP_INVL */
6947                 12 + 12 + /* FENCE x2 */
6948                 2 + /* SWITCH_BUFFER */
6949                 5, /* SURFACE_SYNC */
6950         .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_gfx */
6951         .emit_ib = gfx_v8_0_ring_emit_ib_gfx,
6952         .emit_fence = gfx_v8_0_ring_emit_fence_gfx,
6953         .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
6954         .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
6955         .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
6956         .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
6957         .test_ring = gfx_v8_0_ring_test_ring,
6958         .test_ib = gfx_v8_0_ring_test_ib,
6959         .insert_nop = amdgpu_ring_insert_nop,
6960         .pad_ib = amdgpu_ring_generic_pad_ib,
6961         .emit_switch_buffer = gfx_v8_ring_emit_sb,
6962         .emit_cntxcntl = gfx_v8_ring_emit_cntxcntl,
6963         .init_cond_exec = gfx_v8_0_ring_emit_init_cond_exec,
6964         .patch_cond_exec = gfx_v8_0_ring_emit_patch_cond_exec,
6965         .emit_wreg = gfx_v8_0_ring_emit_wreg,
6966         .soft_recovery = gfx_v8_0_ring_soft_recovery,
6967         .emit_mem_sync = gfx_v8_0_emit_mem_sync,
6968 };
6969
6970 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
6971         .type = AMDGPU_RING_TYPE_COMPUTE,
6972         .align_mask = 0xff,
6973         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6974         .support_64bit_ptrs = false,
6975         .get_rptr = gfx_v8_0_ring_get_rptr,
6976         .get_wptr = gfx_v8_0_ring_get_wptr_compute,
6977         .set_wptr = gfx_v8_0_ring_set_wptr_compute,
6978         .emit_frame_size =
6979                 20 + /* gfx_v8_0_ring_emit_gds_switch */
6980                 7 + /* gfx_v8_0_ring_emit_hdp_flush */
6981                 5 + /* hdp_invalidate */
6982                 7 + /* gfx_v8_0_ring_emit_pipeline_sync */
6983                 VI_FLUSH_GPU_TLB_NUM_WREG * 5 + 7 + /* gfx_v8_0_ring_emit_vm_flush */
6984                 7 + 7 + 7 + /* gfx_v8_0_ring_emit_fence_compute x3 for user fence, vm fence */
6985                 7 + /* gfx_v8_0_emit_mem_sync_compute */
6986                 5 + /* gfx_v8_0_emit_wave_limit for updating mmSPI_WCL_PIPE_PERCENT_GFX register */
6987                 15, /* for updating 3 mmSPI_WCL_PIPE_PERCENT_CS registers */
6988         .emit_ib_size = 7, /* gfx_v8_0_ring_emit_ib_compute */
6989         .emit_ib = gfx_v8_0_ring_emit_ib_compute,
6990         .emit_fence = gfx_v8_0_ring_emit_fence_compute,
6991         .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
6992         .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
6993         .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
6994         .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
6995         .test_ring = gfx_v8_0_ring_test_ring,
6996         .test_ib = gfx_v8_0_ring_test_ib,
6997         .insert_nop = amdgpu_ring_insert_nop,
6998         .pad_ib = amdgpu_ring_generic_pad_ib,
6999         .emit_wreg = gfx_v8_0_ring_emit_wreg,
7000         .emit_mem_sync = gfx_v8_0_emit_mem_sync_compute,
7001         .emit_wave_limit = gfx_v8_0_emit_wave_limit,
7002 };
7003
7004 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_kiq = {
7005         .type = AMDGPU_RING_TYPE_KIQ,
7006         .align_mask = 0xff,
7007         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
7008         .support_64bit_ptrs = false,
7009         .get_rptr = gfx_v8_0_ring_get_rptr,
7010         .get_wptr = gfx_v8_0_ring_get_wptr_compute,
7011         .set_wptr = gfx_v8_0_ring_set_wptr_compute,
7012         .emit_frame_size =
7013                 20 + /* gfx_v8_0_ring_emit_gds_switch */
7014                 7 + /* gfx_v8_0_ring_emit_hdp_flush */
7015                 5 + /* hdp_invalidate */
7016                 7 + /* gfx_v8_0_ring_emit_pipeline_sync */
7017                 17 + /* gfx_v8_0_ring_emit_vm_flush */
7018                 7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_kiq x3 for user fence, vm fence */
7019         .emit_ib_size = 7, /* gfx_v8_0_ring_emit_ib_compute */
7020         .emit_fence = gfx_v8_0_ring_emit_fence_kiq,
7021         .test_ring = gfx_v8_0_ring_test_ring,
7022         .insert_nop = amdgpu_ring_insert_nop,
7023         .pad_ib = amdgpu_ring_generic_pad_ib,
7024         .emit_rreg = gfx_v8_0_ring_emit_rreg,
7025         .emit_wreg = gfx_v8_0_ring_emit_wreg,
7026 };
7027
7028 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev)
7029 {
7030         int i;
7031
7032         adev->gfx.kiq.ring.funcs = &gfx_v8_0_ring_funcs_kiq;
7033
7034         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
7035                 adev->gfx.gfx_ring[i].funcs = &gfx_v8_0_ring_funcs_gfx;
7036
7037         for (i = 0; i < adev->gfx.num_compute_rings; i++)
7038                 adev->gfx.compute_ring[i].funcs = &gfx_v8_0_ring_funcs_compute;
7039 }
7040
7041 static const struct amdgpu_irq_src_funcs gfx_v8_0_eop_irq_funcs = {
7042         .set = gfx_v8_0_set_eop_interrupt_state,
7043         .process = gfx_v8_0_eop_irq,
7044 };
7045
7046 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_reg_irq_funcs = {
7047         .set = gfx_v8_0_set_priv_reg_fault_state,
7048         .process = gfx_v8_0_priv_reg_irq,
7049 };
7050
7051 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_inst_irq_funcs = {
7052         .set = gfx_v8_0_set_priv_inst_fault_state,
7053         .process = gfx_v8_0_priv_inst_irq,
7054 };
7055
7056 static const struct amdgpu_irq_src_funcs gfx_v8_0_cp_ecc_error_irq_funcs = {
7057         .set = gfx_v8_0_set_cp_ecc_int_state,
7058         .process = gfx_v8_0_cp_ecc_error_irq,
7059 };
7060
7061 static const struct amdgpu_irq_src_funcs gfx_v8_0_sq_irq_funcs = {
7062         .set = gfx_v8_0_set_sq_int_state,
7063         .process = gfx_v8_0_sq_irq,
7064 };
7065
7066 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev)
7067 {
7068         adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
7069         adev->gfx.eop_irq.funcs = &gfx_v8_0_eop_irq_funcs;
7070
7071         adev->gfx.priv_reg_irq.num_types = 1;
7072         adev->gfx.priv_reg_irq.funcs = &gfx_v8_0_priv_reg_irq_funcs;
7073
7074         adev->gfx.priv_inst_irq.num_types = 1;
7075         adev->gfx.priv_inst_irq.funcs = &gfx_v8_0_priv_inst_irq_funcs;
7076
7077         adev->gfx.cp_ecc_error_irq.num_types = 1;
7078         adev->gfx.cp_ecc_error_irq.funcs = &gfx_v8_0_cp_ecc_error_irq_funcs;
7079
7080         adev->gfx.sq_irq.num_types = 1;
7081         adev->gfx.sq_irq.funcs = &gfx_v8_0_sq_irq_funcs;
7082 }
7083
7084 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev)
7085 {
7086         adev->gfx.rlc.funcs = &iceland_rlc_funcs;
7087 }
7088
7089 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev)
7090 {
7091         /* init asci gds info */
7092         adev->gds.gds_size = RREG32(mmGDS_VMID0_SIZE);
7093         adev->gds.gws_size = 64;
7094         adev->gds.oa_size = 16;
7095         adev->gds.gds_compute_max_wave_id = RREG32(mmGDS_COMPUTE_MAX_WAVE_ID);
7096 }
7097
7098 static void gfx_v8_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
7099                                                  u32 bitmap)
7100 {
7101         u32 data;
7102
7103         if (!bitmap)
7104                 return;
7105
7106         data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
7107         data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
7108
7109         WREG32(mmGC_USER_SHADER_ARRAY_CONFIG, data);
7110 }
7111
7112 static u32 gfx_v8_0_get_cu_active_bitmap(struct amdgpu_device *adev)
7113 {
7114         u32 data, mask;
7115
7116         data =  RREG32(mmCC_GC_SHADER_ARRAY_CONFIG) |
7117                 RREG32(mmGC_USER_SHADER_ARRAY_CONFIG);
7118
7119         mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh);
7120
7121         return ~REG_GET_FIELD(data, CC_GC_SHADER_ARRAY_CONFIG, INACTIVE_CUS) & mask;
7122 }
7123
7124 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev)
7125 {
7126         int i, j, k, counter, active_cu_number = 0;
7127         u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
7128         struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
7129         unsigned disable_masks[4 * 2];
7130         u32 ao_cu_num;
7131
7132         memset(cu_info, 0, sizeof(*cu_info));
7133
7134         if (adev->flags & AMD_IS_APU)
7135                 ao_cu_num = 2;
7136         else
7137                 ao_cu_num = adev->gfx.config.max_cu_per_sh;
7138
7139         amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2);
7140
7141         mutex_lock(&adev->grbm_idx_mutex);
7142         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
7143                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
7144                         mask = 1;
7145                         ao_bitmap = 0;
7146                         counter = 0;
7147                         gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
7148                         if (i < 4 && j < 2)
7149                                 gfx_v8_0_set_user_cu_inactive_bitmap(
7150                                         adev, disable_masks[i * 2 + j]);
7151                         bitmap = gfx_v8_0_get_cu_active_bitmap(adev);
7152                         cu_info->bitmap[i][j] = bitmap;
7153
7154                         for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
7155                                 if (bitmap & mask) {
7156                                         if (counter < ao_cu_num)
7157                                                 ao_bitmap |= mask;
7158                                         counter ++;
7159                                 }
7160                                 mask <<= 1;
7161                         }
7162                         active_cu_number += counter;
7163                         if (i < 2 && j < 2)
7164                                 ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
7165                         cu_info->ao_cu_bitmap[i][j] = ao_bitmap;
7166                 }
7167         }
7168         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
7169         mutex_unlock(&adev->grbm_idx_mutex);
7170
7171         cu_info->number = active_cu_number;
7172         cu_info->ao_cu_mask = ao_cu_mask;
7173         cu_info->simd_per_cu = NUM_SIMD_PER_CU;
7174         cu_info->max_waves_per_simd = 10;
7175         cu_info->max_scratch_slots_per_cu = 32;
7176         cu_info->wave_front_size = 64;
7177         cu_info->lds_size = 64;
7178 }
7179
7180 const struct amdgpu_ip_block_version gfx_v8_0_ip_block =
7181 {
7182         .type = AMD_IP_BLOCK_TYPE_GFX,
7183         .major = 8,
7184         .minor = 0,
7185         .rev = 0,
7186         .funcs = &gfx_v8_0_ip_funcs,
7187 };
7188
7189 const struct amdgpu_ip_block_version gfx_v8_1_ip_block =
7190 {
7191         .type = AMD_IP_BLOCK_TYPE_GFX,
7192         .major = 8,
7193         .minor = 1,
7194         .rev = 0,
7195         .funcs = &gfx_v8_0_ip_funcs,
7196 };
7197
7198 static void gfx_v8_0_ring_emit_ce_meta(struct amdgpu_ring *ring)
7199 {
7200         uint64_t ce_payload_addr;
7201         int cnt_ce;
7202         union {
7203                 struct vi_ce_ib_state regular;
7204                 struct vi_ce_ib_state_chained_ib chained;
7205         } ce_payload = {};
7206
7207         if (ring->adev->virt.chained_ib_support) {
7208                 ce_payload_addr = amdgpu_csa_vaddr(ring->adev) +
7209                         offsetof(struct vi_gfx_meta_data_chained_ib, ce_payload);
7210                 cnt_ce = (sizeof(ce_payload.chained) >> 2) + 4 - 2;
7211         } else {
7212                 ce_payload_addr = amdgpu_csa_vaddr(ring->adev) +
7213                         offsetof(struct vi_gfx_meta_data, ce_payload);
7214                 cnt_ce = (sizeof(ce_payload.regular) >> 2) + 4 - 2;
7215         }
7216
7217         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_ce));
7218         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
7219                                 WRITE_DATA_DST_SEL(8) |
7220                                 WR_CONFIRM) |
7221                                 WRITE_DATA_CACHE_POLICY(0));
7222         amdgpu_ring_write(ring, lower_32_bits(ce_payload_addr));
7223         amdgpu_ring_write(ring, upper_32_bits(ce_payload_addr));
7224         amdgpu_ring_write_multiple(ring, (void *)&ce_payload, cnt_ce - 2);
7225 }
7226
7227 static void gfx_v8_0_ring_emit_de_meta(struct amdgpu_ring *ring)
7228 {
7229         uint64_t de_payload_addr, gds_addr, csa_addr;
7230         int cnt_de;
7231         union {
7232                 struct vi_de_ib_state regular;
7233                 struct vi_de_ib_state_chained_ib chained;
7234         } de_payload = {};
7235
7236         csa_addr = amdgpu_csa_vaddr(ring->adev);
7237         gds_addr = csa_addr + 4096;
7238         if (ring->adev->virt.chained_ib_support) {
7239                 de_payload.chained.gds_backup_addrlo = lower_32_bits(gds_addr);
7240                 de_payload.chained.gds_backup_addrhi = upper_32_bits(gds_addr);
7241                 de_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data_chained_ib, de_payload);
7242                 cnt_de = (sizeof(de_payload.chained) >> 2) + 4 - 2;
7243         } else {
7244                 de_payload.regular.gds_backup_addrlo = lower_32_bits(gds_addr);
7245                 de_payload.regular.gds_backup_addrhi = upper_32_bits(gds_addr);
7246                 de_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data, de_payload);
7247                 cnt_de = (sizeof(de_payload.regular) >> 2) + 4 - 2;
7248         }
7249
7250         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_de));
7251         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
7252                                 WRITE_DATA_DST_SEL(8) |
7253                                 WR_CONFIRM) |
7254                                 WRITE_DATA_CACHE_POLICY(0));
7255         amdgpu_ring_write(ring, lower_32_bits(de_payload_addr));
7256         amdgpu_ring_write(ring, upper_32_bits(de_payload_addr));
7257         amdgpu_ring_write_multiple(ring, (void *)&de_payload, cnt_de - 2);
7258 }
This page took 0.47221 seconds and 4 git commands to generate.