]> Git Repo - linux.git/blob - drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
Merge tag 'printk-for-6.14' of git://git.kernel.org/pub/scm/linux/kernel/git/printk...
[linux.git] / drivers / gpu / drm / amd / amdgpu / gfx_v8_0.c
1 /*
2  * Copyright 2014 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23
24 #include <linux/delay.h>
25 #include <linux/kernel.h>
26 #include <linux/firmware.h>
27 #include <linux/module.h>
28 #include <linux/pci.h>
29
30 #include "amdgpu.h"
31 #include "amdgpu_gfx.h"
32 #include "amdgpu_ring.h"
33 #include "vi.h"
34 #include "vi_structs.h"
35 #include "vid.h"
36 #include "amdgpu_ucode.h"
37 #include "amdgpu_atombios.h"
38 #include "atombios_i2c.h"
39 #include "clearstate_vi.h"
40
41 #include "gmc/gmc_8_2_d.h"
42 #include "gmc/gmc_8_2_sh_mask.h"
43
44 #include "oss/oss_3_0_d.h"
45 #include "oss/oss_3_0_sh_mask.h"
46
47 #include "bif/bif_5_0_d.h"
48 #include "bif/bif_5_0_sh_mask.h"
49 #include "gca/gfx_8_0_d.h"
50 #include "gca/gfx_8_0_enum.h"
51 #include "gca/gfx_8_0_sh_mask.h"
52
53 #include "dce/dce_10_0_d.h"
54 #include "dce/dce_10_0_sh_mask.h"
55
56 #include "smu/smu_7_1_3_d.h"
57
58 #include "ivsrcid/ivsrcid_vislands30.h"
59
60 #define GFX8_NUM_GFX_RINGS     1
61 #define GFX8_MEC_HPD_SIZE 4096
62
63 #define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001
64 #define CARRIZO_GB_ADDR_CONFIG_GOLDEN 0x22010001
65 #define POLARIS11_GB_ADDR_CONFIG_GOLDEN 0x22011002
66 #define TONGA_GB_ADDR_CONFIG_GOLDEN 0x22011003
67
68 #define ARRAY_MODE(x)                                   ((x) << GB_TILE_MODE0__ARRAY_MODE__SHIFT)
69 #define PIPE_CONFIG(x)                                  ((x) << GB_TILE_MODE0__PIPE_CONFIG__SHIFT)
70 #define TILE_SPLIT(x)                                   ((x) << GB_TILE_MODE0__TILE_SPLIT__SHIFT)
71 #define MICRO_TILE_MODE_NEW(x)                          ((x) << GB_TILE_MODE0__MICRO_TILE_MODE_NEW__SHIFT)
72 #define SAMPLE_SPLIT(x)                                 ((x) << GB_TILE_MODE0__SAMPLE_SPLIT__SHIFT)
73 #define BANK_WIDTH(x)                                   ((x) << GB_MACROTILE_MODE0__BANK_WIDTH__SHIFT)
74 #define BANK_HEIGHT(x)                                  ((x) << GB_MACROTILE_MODE0__BANK_HEIGHT__SHIFT)
75 #define MACRO_TILE_ASPECT(x)                            ((x) << GB_MACROTILE_MODE0__MACRO_TILE_ASPECT__SHIFT)
76 #define NUM_BANKS(x)                                    ((x) << GB_MACROTILE_MODE0__NUM_BANKS__SHIFT)
77
78 #define RLC_CGTT_MGCG_OVERRIDE__CPF_MASK            0x00000001L
79 #define RLC_CGTT_MGCG_OVERRIDE__RLC_MASK            0x00000002L
80 #define RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK           0x00000004L
81 #define RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK           0x00000008L
82 #define RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK           0x00000010L
83 #define RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK           0x00000020L
84
85 /* BPM SERDES CMD */
86 #define SET_BPM_SERDES_CMD    1
87 #define CLE_BPM_SERDES_CMD    0
88
89 /* BPM Register Address*/
90 enum {
91         BPM_REG_CGLS_EN = 0,        /* Enable/Disable CGLS */
92         BPM_REG_CGLS_ON,            /* ON/OFF CGLS: shall be controlled by RLC FW */
93         BPM_REG_CGCG_OVERRIDE,      /* Set/Clear CGCG Override */
94         BPM_REG_MGCG_OVERRIDE,      /* Set/Clear MGCG Override */
95         BPM_REG_FGCG_OVERRIDE,      /* Set/Clear FGCG Override */
96         BPM_REG_FGCG_MAX
97 };
98
99 #define RLC_FormatDirectRegListLength        14
100
101 MODULE_FIRMWARE("amdgpu/carrizo_ce.bin");
102 MODULE_FIRMWARE("amdgpu/carrizo_pfp.bin");
103 MODULE_FIRMWARE("amdgpu/carrizo_me.bin");
104 MODULE_FIRMWARE("amdgpu/carrizo_mec.bin");
105 MODULE_FIRMWARE("amdgpu/carrizo_mec2.bin");
106 MODULE_FIRMWARE("amdgpu/carrizo_rlc.bin");
107
108 MODULE_FIRMWARE("amdgpu/stoney_ce.bin");
109 MODULE_FIRMWARE("amdgpu/stoney_pfp.bin");
110 MODULE_FIRMWARE("amdgpu/stoney_me.bin");
111 MODULE_FIRMWARE("amdgpu/stoney_mec.bin");
112 MODULE_FIRMWARE("amdgpu/stoney_rlc.bin");
113
114 MODULE_FIRMWARE("amdgpu/tonga_ce.bin");
115 MODULE_FIRMWARE("amdgpu/tonga_pfp.bin");
116 MODULE_FIRMWARE("amdgpu/tonga_me.bin");
117 MODULE_FIRMWARE("amdgpu/tonga_mec.bin");
118 MODULE_FIRMWARE("amdgpu/tonga_mec2.bin");
119 MODULE_FIRMWARE("amdgpu/tonga_rlc.bin");
120
121 MODULE_FIRMWARE("amdgpu/topaz_ce.bin");
122 MODULE_FIRMWARE("amdgpu/topaz_pfp.bin");
123 MODULE_FIRMWARE("amdgpu/topaz_me.bin");
124 MODULE_FIRMWARE("amdgpu/topaz_mec.bin");
125 MODULE_FIRMWARE("amdgpu/topaz_rlc.bin");
126
127 MODULE_FIRMWARE("amdgpu/fiji_ce.bin");
128 MODULE_FIRMWARE("amdgpu/fiji_pfp.bin");
129 MODULE_FIRMWARE("amdgpu/fiji_me.bin");
130 MODULE_FIRMWARE("amdgpu/fiji_mec.bin");
131 MODULE_FIRMWARE("amdgpu/fiji_mec2.bin");
132 MODULE_FIRMWARE("amdgpu/fiji_rlc.bin");
133
134 MODULE_FIRMWARE("amdgpu/polaris10_ce.bin");
135 MODULE_FIRMWARE("amdgpu/polaris10_ce_2.bin");
136 MODULE_FIRMWARE("amdgpu/polaris10_pfp.bin");
137 MODULE_FIRMWARE("amdgpu/polaris10_pfp_2.bin");
138 MODULE_FIRMWARE("amdgpu/polaris10_me.bin");
139 MODULE_FIRMWARE("amdgpu/polaris10_me_2.bin");
140 MODULE_FIRMWARE("amdgpu/polaris10_mec.bin");
141 MODULE_FIRMWARE("amdgpu/polaris10_mec_2.bin");
142 MODULE_FIRMWARE("amdgpu/polaris10_mec2.bin");
143 MODULE_FIRMWARE("amdgpu/polaris10_mec2_2.bin");
144 MODULE_FIRMWARE("amdgpu/polaris10_rlc.bin");
145
146 MODULE_FIRMWARE("amdgpu/polaris11_ce.bin");
147 MODULE_FIRMWARE("amdgpu/polaris11_ce_2.bin");
148 MODULE_FIRMWARE("amdgpu/polaris11_pfp.bin");
149 MODULE_FIRMWARE("amdgpu/polaris11_pfp_2.bin");
150 MODULE_FIRMWARE("amdgpu/polaris11_me.bin");
151 MODULE_FIRMWARE("amdgpu/polaris11_me_2.bin");
152 MODULE_FIRMWARE("amdgpu/polaris11_mec.bin");
153 MODULE_FIRMWARE("amdgpu/polaris11_mec_2.bin");
154 MODULE_FIRMWARE("amdgpu/polaris11_mec2.bin");
155 MODULE_FIRMWARE("amdgpu/polaris11_mec2_2.bin");
156 MODULE_FIRMWARE("amdgpu/polaris11_rlc.bin");
157
158 MODULE_FIRMWARE("amdgpu/polaris12_ce.bin");
159 MODULE_FIRMWARE("amdgpu/polaris12_ce_2.bin");
160 MODULE_FIRMWARE("amdgpu/polaris12_pfp.bin");
161 MODULE_FIRMWARE("amdgpu/polaris12_pfp_2.bin");
162 MODULE_FIRMWARE("amdgpu/polaris12_me.bin");
163 MODULE_FIRMWARE("amdgpu/polaris12_me_2.bin");
164 MODULE_FIRMWARE("amdgpu/polaris12_mec.bin");
165 MODULE_FIRMWARE("amdgpu/polaris12_mec_2.bin");
166 MODULE_FIRMWARE("amdgpu/polaris12_mec2.bin");
167 MODULE_FIRMWARE("amdgpu/polaris12_mec2_2.bin");
168 MODULE_FIRMWARE("amdgpu/polaris12_rlc.bin");
169
170 MODULE_FIRMWARE("amdgpu/vegam_ce.bin");
171 MODULE_FIRMWARE("amdgpu/vegam_pfp.bin");
172 MODULE_FIRMWARE("amdgpu/vegam_me.bin");
173 MODULE_FIRMWARE("amdgpu/vegam_mec.bin");
174 MODULE_FIRMWARE("amdgpu/vegam_mec2.bin");
175 MODULE_FIRMWARE("amdgpu/vegam_rlc.bin");
176
177 static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] =
178 {
179         {mmGDS_VMID0_BASE, mmGDS_VMID0_SIZE, mmGDS_GWS_VMID0, mmGDS_OA_VMID0},
180         {mmGDS_VMID1_BASE, mmGDS_VMID1_SIZE, mmGDS_GWS_VMID1, mmGDS_OA_VMID1},
181         {mmGDS_VMID2_BASE, mmGDS_VMID2_SIZE, mmGDS_GWS_VMID2, mmGDS_OA_VMID2},
182         {mmGDS_VMID3_BASE, mmGDS_VMID3_SIZE, mmGDS_GWS_VMID3, mmGDS_OA_VMID3},
183         {mmGDS_VMID4_BASE, mmGDS_VMID4_SIZE, mmGDS_GWS_VMID4, mmGDS_OA_VMID4},
184         {mmGDS_VMID5_BASE, mmGDS_VMID5_SIZE, mmGDS_GWS_VMID5, mmGDS_OA_VMID5},
185         {mmGDS_VMID6_BASE, mmGDS_VMID6_SIZE, mmGDS_GWS_VMID6, mmGDS_OA_VMID6},
186         {mmGDS_VMID7_BASE, mmGDS_VMID7_SIZE, mmGDS_GWS_VMID7, mmGDS_OA_VMID7},
187         {mmGDS_VMID8_BASE, mmGDS_VMID8_SIZE, mmGDS_GWS_VMID8, mmGDS_OA_VMID8},
188         {mmGDS_VMID9_BASE, mmGDS_VMID9_SIZE, mmGDS_GWS_VMID9, mmGDS_OA_VMID9},
189         {mmGDS_VMID10_BASE, mmGDS_VMID10_SIZE, mmGDS_GWS_VMID10, mmGDS_OA_VMID10},
190         {mmGDS_VMID11_BASE, mmGDS_VMID11_SIZE, mmGDS_GWS_VMID11, mmGDS_OA_VMID11},
191         {mmGDS_VMID12_BASE, mmGDS_VMID12_SIZE, mmGDS_GWS_VMID12, mmGDS_OA_VMID12},
192         {mmGDS_VMID13_BASE, mmGDS_VMID13_SIZE, mmGDS_GWS_VMID13, mmGDS_OA_VMID13},
193         {mmGDS_VMID14_BASE, mmGDS_VMID14_SIZE, mmGDS_GWS_VMID14, mmGDS_OA_VMID14},
194         {mmGDS_VMID15_BASE, mmGDS_VMID15_SIZE, mmGDS_GWS_VMID15, mmGDS_OA_VMID15}
195 };
196
197 static const u32 golden_settings_tonga_a11[] =
198 {
199         mmCB_HW_CONTROL, 0xfffdf3cf, 0x00007208,
200         mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
201         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
202         mmGB_GPU_ID, 0x0000000f, 0x00000000,
203         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
204         mmPA_SC_FIFO_DEPTH_CNTL, 0x000003ff, 0x000000fc,
205         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
206         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
207         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
208         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
209         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
210         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
211         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000002fb,
212         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x0000543b,
213         mmTCP_CHAN_STEER_LO, 0xffffffff, 0xa9210876,
214         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
215 };
216
217 static const u32 tonga_golden_common_all[] =
218 {
219         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
220         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
221         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
222         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
223         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
224         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
225         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
226         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
227 };
228
229 static const u32 tonga_mgcg_cgcg_init[] =
230 {
231         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
232         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
233         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
234         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
235         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
236         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
237         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
238         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
239         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
240         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
241         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
242         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
243         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
244         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
245         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
246         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
247         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
248         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
249         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
250         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
251         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
252         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
253         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
254         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
255         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
256         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
257         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
258         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
259         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
260         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
261         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
262         mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
263         mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
264         mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
265         mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
266         mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
267         mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
268         mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
269         mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
270         mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
271         mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
272         mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
273         mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
274         mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
275         mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
276         mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
277         mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
278         mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
279         mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
280         mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
281         mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
282         mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
283         mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
284         mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
285         mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
286         mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
287         mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
288         mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
289         mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
290         mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
291         mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
292         mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
293         mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
294         mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
295         mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
296         mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
297         mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
298         mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
299         mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
300         mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
301         mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
302         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
303         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
304         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
305         mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
306 };
307
308 static const u32 golden_settings_vegam_a11[] =
309 {
310         mmCB_HW_CONTROL, 0x0001f3cf, 0x00007208,
311         mmCB_HW_CONTROL_2, 0x0f000000, 0x0d000000,
312         mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
313         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
314         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
315         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
316         mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x3a00161a,
317         mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002e,
318         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
319         mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
320         mmSQ_CONFIG, 0x07f80000, 0x01180000,
321         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
322         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
323         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7,
324         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
325         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x32761054,
326         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
327 };
328
329 static const u32 vegam_golden_common_all[] =
330 {
331         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
332         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
333         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
334         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
335         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
336         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
337 };
338
339 static const u32 golden_settings_polaris11_a11[] =
340 {
341         mmCB_HW_CONTROL, 0x0000f3cf, 0x00007208,
342         mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
343         mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
344         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
345         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
346         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
347         mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
348         mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
349         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
350         mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
351         mmSQ_CONFIG, 0x07f80000, 0x01180000,
352         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
353         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
354         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f3,
355         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
356         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003210,
357         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
358 };
359
360 static const u32 polaris11_golden_common_all[] =
361 {
362         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
363         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011002,
364         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
365         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
366         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
367         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
368 };
369
370 static const u32 golden_settings_polaris10_a11[] =
371 {
372         mmATC_MISC_CG, 0x000c0fc0, 0x000c0200,
373         mmCB_HW_CONTROL, 0x0001f3cf, 0x00007208,
374         mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
375         mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
376         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
377         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
378         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
379         mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
380         mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002a,
381         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
382         mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
383         mmSQ_CONFIG, 0x07f80000, 0x07180000,
384         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
385         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
386         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7,
387         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
388         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
389 };
390
391 static const u32 polaris10_golden_common_all[] =
392 {
393         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
394         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
395         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
396         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
397         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
398         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
399         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
400         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
401 };
402
403 static const u32 fiji_golden_common_all[] =
404 {
405         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
406         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x3a00161a,
407         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002e,
408         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
409         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
410         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
411         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
412         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
413         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
414         mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x00000009,
415 };
416
417 static const u32 golden_settings_fiji_a10[] =
418 {
419         mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
420         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
421         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
422         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
423         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
424         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
425         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
426         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
427         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
428         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000ff,
429         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
430 };
431
432 static const u32 fiji_mgcg_cgcg_init[] =
433 {
434         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
435         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
436         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
437         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
438         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
439         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
440         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
441         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
442         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
443         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
444         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
445         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
446         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
447         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
448         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
449         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
450         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
451         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
452         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
453         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
454         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
455         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
456         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
457         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
458         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
459         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
460         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
461         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
462         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
463         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
464         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
465         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
466         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
467         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
468         mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
469 };
470
471 static const u32 golden_settings_iceland_a11[] =
472 {
473         mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
474         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
475         mmDB_DEBUG3, 0xc0000000, 0xc0000000,
476         mmGB_GPU_ID, 0x0000000f, 0x00000000,
477         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
478         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
479         mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x00000002,
480         mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
481         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
482         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
483         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
484         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
485         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
486         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f1,
487         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
488         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010,
489 };
490
491 static const u32 iceland_golden_common_all[] =
492 {
493         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
494         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
495         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
496         mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
497         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
498         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
499         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
500         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
501 };
502
503 static const u32 iceland_mgcg_cgcg_init[] =
504 {
505         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
506         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
507         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
508         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
509         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0xc0000100,
510         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0xc0000100,
511         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0xc0000100,
512         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
513         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
514         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
515         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
516         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
517         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
518         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
519         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
520         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
521         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
522         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
523         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
524         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
525         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
526         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
527         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0xff000100,
528         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
529         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
530         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
531         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
532         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
533         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
534         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
535         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
536         mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
537         mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
538         mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
539         mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
540         mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
541         mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
542         mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
543         mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
544         mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
545         mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
546         mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
547         mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
548         mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
549         mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
550         mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
551         mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
552         mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
553         mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
554         mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
555         mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
556         mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
557         mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
558         mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
559         mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
560         mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
561         mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
562         mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
563         mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
564         mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
565         mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
566         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
567         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
568         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
569 };
570
571 static const u32 cz_golden_settings_a11[] =
572 {
573         mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
574         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
575         mmGB_GPU_ID, 0x0000000f, 0x00000000,
576         mmPA_SC_ENHANCE, 0xffffffff, 0x00000001,
577         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
578         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
579         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
580         mmTA_CNTL_AUX, 0x000f000f, 0x00010000,
581         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
582         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
583         mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f3,
584         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00001302
585 };
586
587 static const u32 cz_golden_common_all[] =
588 {
589         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
590         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
591         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
592         mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
593         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
594         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
595         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
596         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
597 };
598
599 static const u32 cz_mgcg_cgcg_init[] =
600 {
601         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
602         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
603         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
604         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
605         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
606         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
607         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x00000100,
608         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
609         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
610         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
611         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
612         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
613         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
614         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
615         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
616         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
617         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
618         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
619         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
620         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
621         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
622         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
623         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
624         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
625         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
626         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
627         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
628         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
629         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
630         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
631         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
632         mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
633         mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
634         mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
635         mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
636         mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
637         mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
638         mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
639         mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
640         mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
641         mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
642         mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
643         mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
644         mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
645         mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
646         mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
647         mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
648         mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
649         mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
650         mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
651         mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
652         mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
653         mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
654         mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
655         mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
656         mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
657         mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
658         mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
659         mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
660         mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
661         mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
662         mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
663         mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
664         mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
665         mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
666         mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
667         mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
668         mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
669         mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
670         mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
671         mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
672         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
673         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
674         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
675         mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
676 };
677
678 static const u32 stoney_golden_settings_a11[] =
679 {
680         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
681         mmGB_GPU_ID, 0x0000000f, 0x00000000,
682         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
683         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
684         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
685         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
686         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
687         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
688         mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f1,
689         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x10101010,
690 };
691
692 static const u32 stoney_golden_common_all[] =
693 {
694         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
695         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000000,
696         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
697         mmGB_ADDR_CONFIG, 0xffffffff, 0x12010001,
698         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
699         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
700         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
701         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
702 };
703
704 static const u32 stoney_mgcg_cgcg_init[] =
705 {
706         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
707         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
708         mmCP_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
709         mmRLC_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
710         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96940200,
711 };
712
713
714 static const char * const sq_edc_source_names[] = {
715         "SQ_EDC_INFO_SOURCE_INVALID: No EDC error has occurred",
716         "SQ_EDC_INFO_SOURCE_INST: EDC source is Instruction Fetch",
717         "SQ_EDC_INFO_SOURCE_SGPR: EDC source is SGPR or SQC data return",
718         "SQ_EDC_INFO_SOURCE_VGPR: EDC source is VGPR",
719         "SQ_EDC_INFO_SOURCE_LDS: EDC source is LDS",
720         "SQ_EDC_INFO_SOURCE_GDS: EDC source is GDS",
721         "SQ_EDC_INFO_SOURCE_TA: EDC source is TA",
722 };
723
724 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev);
725 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev);
726 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev);
727 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev);
728 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev);
729 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev);
730 static void gfx_v8_0_ring_emit_ce_meta(struct amdgpu_ring *ring);
731 static void gfx_v8_0_ring_emit_de_meta(struct amdgpu_ring *ring);
732
733 #define CG_ACLK_CNTL__ACLK_DIVIDER_MASK                    0x0000007fL
734 #define CG_ACLK_CNTL__ACLK_DIVIDER__SHIFT                  0x00000000L
735
736 static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev)
737 {
738         uint32_t data;
739
740         switch (adev->asic_type) {
741         case CHIP_TOPAZ:
742                 amdgpu_device_program_register_sequence(adev,
743                                                         iceland_mgcg_cgcg_init,
744                                                         ARRAY_SIZE(iceland_mgcg_cgcg_init));
745                 amdgpu_device_program_register_sequence(adev,
746                                                         golden_settings_iceland_a11,
747                                                         ARRAY_SIZE(golden_settings_iceland_a11));
748                 amdgpu_device_program_register_sequence(adev,
749                                                         iceland_golden_common_all,
750                                                         ARRAY_SIZE(iceland_golden_common_all));
751                 break;
752         case CHIP_FIJI:
753                 amdgpu_device_program_register_sequence(adev,
754                                                         fiji_mgcg_cgcg_init,
755                                                         ARRAY_SIZE(fiji_mgcg_cgcg_init));
756                 amdgpu_device_program_register_sequence(adev,
757                                                         golden_settings_fiji_a10,
758                                                         ARRAY_SIZE(golden_settings_fiji_a10));
759                 amdgpu_device_program_register_sequence(adev,
760                                                         fiji_golden_common_all,
761                                                         ARRAY_SIZE(fiji_golden_common_all));
762                 break;
763
764         case CHIP_TONGA:
765                 amdgpu_device_program_register_sequence(adev,
766                                                         tonga_mgcg_cgcg_init,
767                                                         ARRAY_SIZE(tonga_mgcg_cgcg_init));
768                 amdgpu_device_program_register_sequence(adev,
769                                                         golden_settings_tonga_a11,
770                                                         ARRAY_SIZE(golden_settings_tonga_a11));
771                 amdgpu_device_program_register_sequence(adev,
772                                                         tonga_golden_common_all,
773                                                         ARRAY_SIZE(tonga_golden_common_all));
774                 break;
775         case CHIP_VEGAM:
776                 amdgpu_device_program_register_sequence(adev,
777                                                         golden_settings_vegam_a11,
778                                                         ARRAY_SIZE(golden_settings_vegam_a11));
779                 amdgpu_device_program_register_sequence(adev,
780                                                         vegam_golden_common_all,
781                                                         ARRAY_SIZE(vegam_golden_common_all));
782                 break;
783         case CHIP_POLARIS11:
784         case CHIP_POLARIS12:
785                 amdgpu_device_program_register_sequence(adev,
786                                                         golden_settings_polaris11_a11,
787                                                         ARRAY_SIZE(golden_settings_polaris11_a11));
788                 amdgpu_device_program_register_sequence(adev,
789                                                         polaris11_golden_common_all,
790                                                         ARRAY_SIZE(polaris11_golden_common_all));
791                 break;
792         case CHIP_POLARIS10:
793                 amdgpu_device_program_register_sequence(adev,
794                                                         golden_settings_polaris10_a11,
795                                                         ARRAY_SIZE(golden_settings_polaris10_a11));
796                 amdgpu_device_program_register_sequence(adev,
797                                                         polaris10_golden_common_all,
798                                                         ARRAY_SIZE(polaris10_golden_common_all));
799                 data = RREG32_SMC(ixCG_ACLK_CNTL);
800                 data &= ~CG_ACLK_CNTL__ACLK_DIVIDER_MASK;
801                 data |= 0x18 << CG_ACLK_CNTL__ACLK_DIVIDER__SHIFT;
802                 WREG32_SMC(ixCG_ACLK_CNTL, data);
803                 if ((adev->pdev->device == 0x67DF) && (adev->pdev->revision == 0xc7) &&
804                     ((adev->pdev->subsystem_device == 0xb37 && adev->pdev->subsystem_vendor == 0x1002) ||
805                      (adev->pdev->subsystem_device == 0x4a8 && adev->pdev->subsystem_vendor == 0x1043) ||
806                      (adev->pdev->subsystem_device == 0x9480 && adev->pdev->subsystem_vendor == 0x1680))) {
807                         amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1E, 0xDD);
808                         amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1F, 0xD0);
809                 }
810                 break;
811         case CHIP_CARRIZO:
812                 amdgpu_device_program_register_sequence(adev,
813                                                         cz_mgcg_cgcg_init,
814                                                         ARRAY_SIZE(cz_mgcg_cgcg_init));
815                 amdgpu_device_program_register_sequence(adev,
816                                                         cz_golden_settings_a11,
817                                                         ARRAY_SIZE(cz_golden_settings_a11));
818                 amdgpu_device_program_register_sequence(adev,
819                                                         cz_golden_common_all,
820                                                         ARRAY_SIZE(cz_golden_common_all));
821                 break;
822         case CHIP_STONEY:
823                 amdgpu_device_program_register_sequence(adev,
824                                                         stoney_mgcg_cgcg_init,
825                                                         ARRAY_SIZE(stoney_mgcg_cgcg_init));
826                 amdgpu_device_program_register_sequence(adev,
827                                                         stoney_golden_settings_a11,
828                                                         ARRAY_SIZE(stoney_golden_settings_a11));
829                 amdgpu_device_program_register_sequence(adev,
830                                                         stoney_golden_common_all,
831                                                         ARRAY_SIZE(stoney_golden_common_all));
832                 break;
833         default:
834                 break;
835         }
836 }
837
838 static int gfx_v8_0_ring_test_ring(struct amdgpu_ring *ring)
839 {
840         struct amdgpu_device *adev = ring->adev;
841         uint32_t tmp = 0;
842         unsigned i;
843         int r;
844
845         WREG32(mmSCRATCH_REG0, 0xCAFEDEAD);
846         r = amdgpu_ring_alloc(ring, 3);
847         if (r)
848                 return r;
849
850         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
851         amdgpu_ring_write(ring, mmSCRATCH_REG0 - PACKET3_SET_UCONFIG_REG_START);
852         amdgpu_ring_write(ring, 0xDEADBEEF);
853         amdgpu_ring_commit(ring);
854
855         for (i = 0; i < adev->usec_timeout; i++) {
856                 tmp = RREG32(mmSCRATCH_REG0);
857                 if (tmp == 0xDEADBEEF)
858                         break;
859                 udelay(1);
860         }
861
862         if (i >= adev->usec_timeout)
863                 r = -ETIMEDOUT;
864
865         return r;
866 }
867
868 static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
869 {
870         struct amdgpu_device *adev = ring->adev;
871         struct amdgpu_ib ib;
872         struct dma_fence *f = NULL;
873
874         unsigned int index;
875         uint64_t gpu_addr;
876         uint32_t tmp;
877         long r;
878
879         r = amdgpu_device_wb_get(adev, &index);
880         if (r)
881                 return r;
882
883         gpu_addr = adev->wb.gpu_addr + (index * 4);
884         adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
885         memset(&ib, 0, sizeof(ib));
886
887         r = amdgpu_ib_get(adev, NULL, 20, AMDGPU_IB_POOL_DIRECT, &ib);
888         if (r)
889                 goto err1;
890
891         ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3);
892         ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
893         ib.ptr[2] = lower_32_bits(gpu_addr);
894         ib.ptr[3] = upper_32_bits(gpu_addr);
895         ib.ptr[4] = 0xDEADBEEF;
896         ib.length_dw = 5;
897
898         r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
899         if (r)
900                 goto err2;
901
902         r = dma_fence_wait_timeout(f, false, timeout);
903         if (r == 0) {
904                 r = -ETIMEDOUT;
905                 goto err2;
906         } else if (r < 0) {
907                 goto err2;
908         }
909
910         tmp = adev->wb.wb[index];
911         if (tmp == 0xDEADBEEF)
912                 r = 0;
913         else
914                 r = -EINVAL;
915
916 err2:
917         amdgpu_ib_free(adev, &ib, NULL);
918         dma_fence_put(f);
919 err1:
920         amdgpu_device_wb_free(adev, index);
921         return r;
922 }
923
924
925 static void gfx_v8_0_free_microcode(struct amdgpu_device *adev)
926 {
927         amdgpu_ucode_release(&adev->gfx.pfp_fw);
928         amdgpu_ucode_release(&adev->gfx.me_fw);
929         amdgpu_ucode_release(&adev->gfx.ce_fw);
930         amdgpu_ucode_release(&adev->gfx.rlc_fw);
931         amdgpu_ucode_release(&adev->gfx.mec_fw);
932         if ((adev->asic_type != CHIP_STONEY) &&
933             (adev->asic_type != CHIP_TOPAZ))
934                 amdgpu_ucode_release(&adev->gfx.mec2_fw);
935
936         kfree(adev->gfx.rlc.register_list_format);
937 }
938
939 static int gfx_v8_0_init_microcode(struct amdgpu_device *adev)
940 {
941         const char *chip_name;
942         int err;
943         struct amdgpu_firmware_info *info = NULL;
944         const struct common_firmware_header *header = NULL;
945         const struct gfx_firmware_header_v1_0 *cp_hdr;
946         const struct rlc_firmware_header_v2_0 *rlc_hdr;
947         unsigned int *tmp = NULL, i;
948
949         DRM_DEBUG("\n");
950
951         switch (adev->asic_type) {
952         case CHIP_TOPAZ:
953                 chip_name = "topaz";
954                 break;
955         case CHIP_TONGA:
956                 chip_name = "tonga";
957                 break;
958         case CHIP_CARRIZO:
959                 chip_name = "carrizo";
960                 break;
961         case CHIP_FIJI:
962                 chip_name = "fiji";
963                 break;
964         case CHIP_STONEY:
965                 chip_name = "stoney";
966                 break;
967         case CHIP_POLARIS10:
968                 chip_name = "polaris10";
969                 break;
970         case CHIP_POLARIS11:
971                 chip_name = "polaris11";
972                 break;
973         case CHIP_POLARIS12:
974                 chip_name = "polaris12";
975                 break;
976         case CHIP_VEGAM:
977                 chip_name = "vegam";
978                 break;
979         default:
980                 BUG();
981         }
982
983         if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
984                 err = amdgpu_ucode_request(adev, &adev->gfx.pfp_fw,
985                                            "amdgpu/%s_pfp_2.bin", chip_name);
986                 if (err == -ENODEV) {
987                         err = amdgpu_ucode_request(adev, &adev->gfx.pfp_fw,
988                                                    "amdgpu/%s_pfp.bin", chip_name);
989                 }
990         } else {
991                 err = amdgpu_ucode_request(adev, &adev->gfx.pfp_fw,
992                                            "amdgpu/%s_pfp.bin", chip_name);
993         }
994         if (err)
995                 goto out;
996         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
997         adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
998         adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
999
1000         if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1001                 err = amdgpu_ucode_request(adev, &adev->gfx.me_fw,
1002                                            "amdgpu/%s_me_2.bin", chip_name);
1003                 if (err == -ENODEV) {
1004                         err = amdgpu_ucode_request(adev, &adev->gfx.me_fw,
1005                                                    "amdgpu/%s_me.bin", chip_name);
1006                 }
1007         } else {
1008                 err = amdgpu_ucode_request(adev, &adev->gfx.me_fw,
1009                                            "amdgpu/%s_me.bin", chip_name);
1010         }
1011         if (err)
1012                 goto out;
1013         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
1014         adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1015
1016         adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1017
1018         if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1019                 err = amdgpu_ucode_request(adev, &adev->gfx.ce_fw,
1020                                            "amdgpu/%s_ce_2.bin", chip_name);
1021                 if (err == -ENODEV) {
1022                         err = amdgpu_ucode_request(adev, &adev->gfx.ce_fw,
1023                                                    "amdgpu/%s_ce.bin", chip_name);
1024                 }
1025         } else {
1026                 err = amdgpu_ucode_request(adev, &adev->gfx.ce_fw,
1027                                            "amdgpu/%s_ce.bin", chip_name);
1028         }
1029         if (err)
1030                 goto out;
1031         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
1032         adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1033         adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1034
1035         /*
1036          * Support for MCBP/Virtualization in combination with chained IBs is
1037          * formal released on feature version #46
1038          */
1039         if (adev->gfx.ce_feature_version >= 46 &&
1040             adev->gfx.pfp_feature_version >= 46) {
1041                 adev->virt.chained_ib_support = true;
1042                 DRM_INFO("Chained IB support enabled!\n");
1043         } else
1044                 adev->virt.chained_ib_support = false;
1045
1046         err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw,
1047                                    "amdgpu/%s_rlc.bin", chip_name);
1048         if (err)
1049                 goto out;
1050         rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
1051         adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
1052         adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
1053
1054         adev->gfx.rlc.save_and_restore_offset =
1055                         le32_to_cpu(rlc_hdr->save_and_restore_offset);
1056         adev->gfx.rlc.clear_state_descriptor_offset =
1057                         le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
1058         adev->gfx.rlc.avail_scratch_ram_locations =
1059                         le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
1060         adev->gfx.rlc.reg_restore_list_size =
1061                         le32_to_cpu(rlc_hdr->reg_restore_list_size);
1062         adev->gfx.rlc.reg_list_format_start =
1063                         le32_to_cpu(rlc_hdr->reg_list_format_start);
1064         adev->gfx.rlc.reg_list_format_separate_start =
1065                         le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
1066         adev->gfx.rlc.starting_offsets_start =
1067                         le32_to_cpu(rlc_hdr->starting_offsets_start);
1068         adev->gfx.rlc.reg_list_format_size_bytes =
1069                         le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
1070         adev->gfx.rlc.reg_list_size_bytes =
1071                         le32_to_cpu(rlc_hdr->reg_list_size_bytes);
1072
1073         adev->gfx.rlc.register_list_format =
1074                         kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
1075                                         adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
1076
1077         if (!adev->gfx.rlc.register_list_format) {
1078                 err = -ENOMEM;
1079                 goto out;
1080         }
1081
1082         tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1083                         le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
1084         for (i = 0 ; i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2); i++)
1085                 adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]);
1086
1087         adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
1088
1089         tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1090                         le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
1091         for (i = 0 ; i < (adev->gfx.rlc.reg_list_size_bytes >> 2); i++)
1092                 adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
1093
1094         if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1095                 err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw,
1096                                            "amdgpu/%s_mec_2.bin", chip_name);
1097                 if (err == -ENODEV) {
1098                         err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw,
1099                                                    "amdgpu/%s_mec.bin", chip_name);
1100                 }
1101         } else {
1102                 err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw,
1103                                            "amdgpu/%s_mec.bin", chip_name);
1104         }
1105         if (err)
1106                 goto out;
1107         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1108         adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1109         adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1110
1111         if ((adev->asic_type != CHIP_STONEY) &&
1112             (adev->asic_type != CHIP_TOPAZ)) {
1113                 if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1114                         err = amdgpu_ucode_request(adev, &adev->gfx.mec2_fw,
1115                                                    "amdgpu/%s_mec2_2.bin", chip_name);
1116                         if (err == -ENODEV) {
1117                                 err = amdgpu_ucode_request(adev, &adev->gfx.mec2_fw,
1118                                                            "amdgpu/%s_mec2.bin", chip_name);
1119                         }
1120                 } else {
1121                         err = amdgpu_ucode_request(adev, &adev->gfx.mec2_fw,
1122                                                    "amdgpu/%s_mec2.bin", chip_name);
1123                 }
1124                 if (!err) {
1125                         cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1126                                 adev->gfx.mec2_fw->data;
1127                         adev->gfx.mec2_fw_version =
1128                                 le32_to_cpu(cp_hdr->header.ucode_version);
1129                         adev->gfx.mec2_feature_version =
1130                                 le32_to_cpu(cp_hdr->ucode_feature_version);
1131                 } else {
1132                         err = 0;
1133                         adev->gfx.mec2_fw = NULL;
1134                 }
1135         }
1136
1137         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
1138         info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
1139         info->fw = adev->gfx.pfp_fw;
1140         header = (const struct common_firmware_header *)info->fw->data;
1141         adev->firmware.fw_size +=
1142                 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1143
1144         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
1145         info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
1146         info->fw = adev->gfx.me_fw;
1147         header = (const struct common_firmware_header *)info->fw->data;
1148         adev->firmware.fw_size +=
1149                 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1150
1151         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
1152         info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
1153         info->fw = adev->gfx.ce_fw;
1154         header = (const struct common_firmware_header *)info->fw->data;
1155         adev->firmware.fw_size +=
1156                 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1157
1158         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
1159         info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
1160         info->fw = adev->gfx.rlc_fw;
1161         header = (const struct common_firmware_header *)info->fw->data;
1162         adev->firmware.fw_size +=
1163                 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1164
1165         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
1166         info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
1167         info->fw = adev->gfx.mec_fw;
1168         header = (const struct common_firmware_header *)info->fw->data;
1169         adev->firmware.fw_size +=
1170                 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1171
1172         /* we need account JT in */
1173         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1174         adev->firmware.fw_size +=
1175                 ALIGN(le32_to_cpu(cp_hdr->jt_size) << 2, PAGE_SIZE);
1176
1177         if (amdgpu_sriov_vf(adev)) {
1178                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_STORAGE];
1179                 info->ucode_id = AMDGPU_UCODE_ID_STORAGE;
1180                 info->fw = adev->gfx.mec_fw;
1181                 adev->firmware.fw_size +=
1182                         ALIGN(le32_to_cpu(64 * PAGE_SIZE), PAGE_SIZE);
1183         }
1184
1185         if (adev->gfx.mec2_fw) {
1186                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
1187                 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
1188                 info->fw = adev->gfx.mec2_fw;
1189                 header = (const struct common_firmware_header *)info->fw->data;
1190                 adev->firmware.fw_size +=
1191                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1192         }
1193
1194 out:
1195         if (err) {
1196                 dev_err(adev->dev, "gfx8: Failed to load firmware %s gfx firmware\n", chip_name);
1197                 amdgpu_ucode_release(&adev->gfx.pfp_fw);
1198                 amdgpu_ucode_release(&adev->gfx.me_fw);
1199                 amdgpu_ucode_release(&adev->gfx.ce_fw);
1200                 amdgpu_ucode_release(&adev->gfx.rlc_fw);
1201                 amdgpu_ucode_release(&adev->gfx.mec_fw);
1202                 amdgpu_ucode_release(&adev->gfx.mec2_fw);
1203         }
1204         return err;
1205 }
1206
1207 static void gfx_v8_0_get_csb_buffer(struct amdgpu_device *adev,
1208                                     volatile u32 *buffer)
1209 {
1210         u32 count = 0, i;
1211         const struct cs_section_def *sect = NULL;
1212         const struct cs_extent_def *ext = NULL;
1213
1214         if (adev->gfx.rlc.cs_data == NULL)
1215                 return;
1216         if (buffer == NULL)
1217                 return;
1218
1219         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1220         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1221
1222         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
1223         buffer[count++] = cpu_to_le32(0x80000000);
1224         buffer[count++] = cpu_to_le32(0x80000000);
1225
1226         for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
1227                 for (ext = sect->section; ext->extent != NULL; ++ext) {
1228                         if (sect->id == SECT_CONTEXT) {
1229                                 buffer[count++] =
1230                                         cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
1231                                 buffer[count++] = cpu_to_le32(ext->reg_index -
1232                                                 PACKET3_SET_CONTEXT_REG_START);
1233                                 for (i = 0; i < ext->reg_count; i++)
1234                                         buffer[count++] = cpu_to_le32(ext->extent[i]);
1235                         } else {
1236                                 return;
1237                         }
1238                 }
1239         }
1240
1241         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
1242         buffer[count++] = cpu_to_le32(mmPA_SC_RASTER_CONFIG -
1243                         PACKET3_SET_CONTEXT_REG_START);
1244         buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config);
1245         buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config_1);
1246
1247         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1248         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
1249
1250         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
1251         buffer[count++] = cpu_to_le32(0);
1252 }
1253
1254 static int gfx_v8_0_cp_jump_table_num(struct amdgpu_device *adev)
1255 {
1256         if (adev->asic_type == CHIP_CARRIZO)
1257                 return 5;
1258         else
1259                 return 4;
1260 }
1261
1262 static int gfx_v8_0_rlc_init(struct amdgpu_device *adev)
1263 {
1264         const struct cs_section_def *cs_data;
1265         int r;
1266
1267         adev->gfx.rlc.cs_data = vi_cs_data;
1268
1269         cs_data = adev->gfx.rlc.cs_data;
1270
1271         if (cs_data) {
1272                 /* init clear state block */
1273                 r = amdgpu_gfx_rlc_init_csb(adev);
1274                 if (r)
1275                         return r;
1276         }
1277
1278         if ((adev->asic_type == CHIP_CARRIZO) ||
1279             (adev->asic_type == CHIP_STONEY)) {
1280                 adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
1281                 r = amdgpu_gfx_rlc_init_cpt(adev);
1282                 if (r)
1283                         return r;
1284         }
1285
1286         /* init spm vmid with 0xf */
1287         if (adev->gfx.rlc.funcs->update_spm_vmid)
1288                 adev->gfx.rlc.funcs->update_spm_vmid(adev, NULL, 0xf);
1289
1290         return 0;
1291 }
1292
1293 static void gfx_v8_0_mec_fini(struct amdgpu_device *adev)
1294 {
1295         amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
1296 }
1297
1298 static int gfx_v8_0_mec_init(struct amdgpu_device *adev)
1299 {
1300         int r;
1301         u32 *hpd;
1302         size_t mec_hpd_size;
1303
1304         bitmap_zero(adev->gfx.mec_bitmap[0].queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
1305
1306         /* take ownership of the relevant compute queues */
1307         amdgpu_gfx_compute_queue_acquire(adev);
1308
1309         mec_hpd_size = adev->gfx.num_compute_rings * GFX8_MEC_HPD_SIZE;
1310         if (mec_hpd_size) {
1311                 r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
1312                                               AMDGPU_GEM_DOMAIN_VRAM |
1313                                               AMDGPU_GEM_DOMAIN_GTT,
1314                                               &adev->gfx.mec.hpd_eop_obj,
1315                                               &adev->gfx.mec.hpd_eop_gpu_addr,
1316                                               (void **)&hpd);
1317                 if (r) {
1318                         dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
1319                         return r;
1320                 }
1321
1322                 memset(hpd, 0, mec_hpd_size);
1323
1324                 amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
1325                 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1326         }
1327
1328         return 0;
1329 }
1330
1331 static const u32 vgpr_init_compute_shader[] =
1332 {
1333         0x7e000209, 0x7e020208,
1334         0x7e040207, 0x7e060206,
1335         0x7e080205, 0x7e0a0204,
1336         0x7e0c0203, 0x7e0e0202,
1337         0x7e100201, 0x7e120200,
1338         0x7e140209, 0x7e160208,
1339         0x7e180207, 0x7e1a0206,
1340         0x7e1c0205, 0x7e1e0204,
1341         0x7e200203, 0x7e220202,
1342         0x7e240201, 0x7e260200,
1343         0x7e280209, 0x7e2a0208,
1344         0x7e2c0207, 0x7e2e0206,
1345         0x7e300205, 0x7e320204,
1346         0x7e340203, 0x7e360202,
1347         0x7e380201, 0x7e3a0200,
1348         0x7e3c0209, 0x7e3e0208,
1349         0x7e400207, 0x7e420206,
1350         0x7e440205, 0x7e460204,
1351         0x7e480203, 0x7e4a0202,
1352         0x7e4c0201, 0x7e4e0200,
1353         0x7e500209, 0x7e520208,
1354         0x7e540207, 0x7e560206,
1355         0x7e580205, 0x7e5a0204,
1356         0x7e5c0203, 0x7e5e0202,
1357         0x7e600201, 0x7e620200,
1358         0x7e640209, 0x7e660208,
1359         0x7e680207, 0x7e6a0206,
1360         0x7e6c0205, 0x7e6e0204,
1361         0x7e700203, 0x7e720202,
1362         0x7e740201, 0x7e760200,
1363         0x7e780209, 0x7e7a0208,
1364         0x7e7c0207, 0x7e7e0206,
1365         0xbf8a0000, 0xbf810000,
1366 };
1367
1368 static const u32 sgpr_init_compute_shader[] =
1369 {
1370         0xbe8a0100, 0xbe8c0102,
1371         0xbe8e0104, 0xbe900106,
1372         0xbe920108, 0xbe940100,
1373         0xbe960102, 0xbe980104,
1374         0xbe9a0106, 0xbe9c0108,
1375         0xbe9e0100, 0xbea00102,
1376         0xbea20104, 0xbea40106,
1377         0xbea60108, 0xbea80100,
1378         0xbeaa0102, 0xbeac0104,
1379         0xbeae0106, 0xbeb00108,
1380         0xbeb20100, 0xbeb40102,
1381         0xbeb60104, 0xbeb80106,
1382         0xbeba0108, 0xbebc0100,
1383         0xbebe0102, 0xbec00104,
1384         0xbec20106, 0xbec40108,
1385         0xbec60100, 0xbec80102,
1386         0xbee60004, 0xbee70005,
1387         0xbeea0006, 0xbeeb0007,
1388         0xbee80008, 0xbee90009,
1389         0xbefc0000, 0xbf8a0000,
1390         0xbf810000, 0x00000000,
1391 };
1392
1393 static const u32 vgpr_init_regs[] =
1394 {
1395         mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xffffffff,
1396         mmCOMPUTE_RESOURCE_LIMITS, 0x1000000, /* CU_GROUP_COUNT=1 */
1397         mmCOMPUTE_NUM_THREAD_X, 256*4,
1398         mmCOMPUTE_NUM_THREAD_Y, 1,
1399         mmCOMPUTE_NUM_THREAD_Z, 1,
1400         mmCOMPUTE_PGM_RSRC1, 0x100004f, /* VGPRS=15 (64 logical VGPRs), SGPRS=1 (16 SGPRs), BULKY=1 */
1401         mmCOMPUTE_PGM_RSRC2, 20,
1402         mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1403         mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1404         mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1405         mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1406         mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1407         mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1408         mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1409         mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1410         mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1411         mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1412 };
1413
1414 static const u32 sgpr1_init_regs[] =
1415 {
1416         mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0x0f,
1417         mmCOMPUTE_RESOURCE_LIMITS, 0x1000000, /* CU_GROUP_COUNT=1 */
1418         mmCOMPUTE_NUM_THREAD_X, 256*5,
1419         mmCOMPUTE_NUM_THREAD_Y, 1,
1420         mmCOMPUTE_NUM_THREAD_Z, 1,
1421         mmCOMPUTE_PGM_RSRC1, 0x240, /* SGPRS=9 (80 GPRS) */
1422         mmCOMPUTE_PGM_RSRC2, 20,
1423         mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1424         mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1425         mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1426         mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1427         mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1428         mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1429         mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1430         mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1431         mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1432         mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1433 };
1434
1435 static const u32 sgpr2_init_regs[] =
1436 {
1437         mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xf0,
1438         mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1439         mmCOMPUTE_NUM_THREAD_X, 256*5,
1440         mmCOMPUTE_NUM_THREAD_Y, 1,
1441         mmCOMPUTE_NUM_THREAD_Z, 1,
1442         mmCOMPUTE_PGM_RSRC1, 0x240, /* SGPRS=9 (80 GPRS) */
1443         mmCOMPUTE_PGM_RSRC2, 20,
1444         mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1445         mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1446         mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1447         mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1448         mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1449         mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1450         mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1451         mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1452         mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1453         mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1454 };
1455
1456 static const u32 sec_ded_counter_registers[] =
1457 {
1458         mmCPC_EDC_ATC_CNT,
1459         mmCPC_EDC_SCRATCH_CNT,
1460         mmCPC_EDC_UCODE_CNT,
1461         mmCPF_EDC_ATC_CNT,
1462         mmCPF_EDC_ROQ_CNT,
1463         mmCPF_EDC_TAG_CNT,
1464         mmCPG_EDC_ATC_CNT,
1465         mmCPG_EDC_DMA_CNT,
1466         mmCPG_EDC_TAG_CNT,
1467         mmDC_EDC_CSINVOC_CNT,
1468         mmDC_EDC_RESTORE_CNT,
1469         mmDC_EDC_STATE_CNT,
1470         mmGDS_EDC_CNT,
1471         mmGDS_EDC_GRBM_CNT,
1472         mmGDS_EDC_OA_DED,
1473         mmSPI_EDC_CNT,
1474         mmSQC_ATC_EDC_GATCL1_CNT,
1475         mmSQC_EDC_CNT,
1476         mmSQ_EDC_DED_CNT,
1477         mmSQ_EDC_INFO,
1478         mmSQ_EDC_SEC_CNT,
1479         mmTCC_EDC_CNT,
1480         mmTCP_ATC_EDC_GATCL1_CNT,
1481         mmTCP_EDC_CNT,
1482         mmTD_EDC_CNT
1483 };
1484
1485 static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
1486 {
1487         struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
1488         struct amdgpu_ib ib;
1489         struct dma_fence *f = NULL;
1490         int r, i;
1491         u32 tmp;
1492         unsigned total_size, vgpr_offset, sgpr_offset;
1493         u64 gpu_addr;
1494
1495         /* only supported on CZ */
1496         if (adev->asic_type != CHIP_CARRIZO)
1497                 return 0;
1498
1499         /* bail if the compute ring is not ready */
1500         if (!ring->sched.ready)
1501                 return 0;
1502
1503         tmp = RREG32(mmGB_EDC_MODE);
1504         WREG32(mmGB_EDC_MODE, 0);
1505
1506         total_size =
1507                 (((ARRAY_SIZE(vgpr_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1508         total_size +=
1509                 (((ARRAY_SIZE(sgpr1_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1510         total_size +=
1511                 (((ARRAY_SIZE(sgpr2_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1512         total_size = ALIGN(total_size, 256);
1513         vgpr_offset = total_size;
1514         total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256);
1515         sgpr_offset = total_size;
1516         total_size += sizeof(sgpr_init_compute_shader);
1517
1518         /* allocate an indirect buffer to put the commands in */
1519         memset(&ib, 0, sizeof(ib));
1520         r = amdgpu_ib_get(adev, NULL, total_size,
1521                                         AMDGPU_IB_POOL_DIRECT, &ib);
1522         if (r) {
1523                 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
1524                 return r;
1525         }
1526
1527         /* load the compute shaders */
1528         for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++)
1529                 ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i];
1530
1531         for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
1532                 ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
1533
1534         /* init the ib length to 0 */
1535         ib.length_dw = 0;
1536
1537         /* VGPR */
1538         /* write the register state for the compute dispatch */
1539         for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i += 2) {
1540                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1541                 ib.ptr[ib.length_dw++] = vgpr_init_regs[i] - PACKET3_SET_SH_REG_START;
1542                 ib.ptr[ib.length_dw++] = vgpr_init_regs[i + 1];
1543         }
1544         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1545         gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
1546         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1547         ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1548         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1549         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1550
1551         /* write dispatch packet */
1552         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1553         ib.ptr[ib.length_dw++] = 8; /* x */
1554         ib.ptr[ib.length_dw++] = 1; /* y */
1555         ib.ptr[ib.length_dw++] = 1; /* z */
1556         ib.ptr[ib.length_dw++] =
1557                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1558
1559         /* write CS partial flush packet */
1560         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1561         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1562
1563         /* SGPR1 */
1564         /* write the register state for the compute dispatch */
1565         for (i = 0; i < ARRAY_SIZE(sgpr1_init_regs); i += 2) {
1566                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1567                 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i] - PACKET3_SET_SH_REG_START;
1568                 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i + 1];
1569         }
1570         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1571         gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1572         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1573         ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1574         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1575         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1576
1577         /* write dispatch packet */
1578         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1579         ib.ptr[ib.length_dw++] = 8; /* x */
1580         ib.ptr[ib.length_dw++] = 1; /* y */
1581         ib.ptr[ib.length_dw++] = 1; /* z */
1582         ib.ptr[ib.length_dw++] =
1583                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1584
1585         /* write CS partial flush packet */
1586         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1587         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1588
1589         /* SGPR2 */
1590         /* write the register state for the compute dispatch */
1591         for (i = 0; i < ARRAY_SIZE(sgpr2_init_regs); i += 2) {
1592                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1593                 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i] - PACKET3_SET_SH_REG_START;
1594                 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i + 1];
1595         }
1596         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1597         gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1598         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1599         ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1600         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1601         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1602
1603         /* write dispatch packet */
1604         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1605         ib.ptr[ib.length_dw++] = 8; /* x */
1606         ib.ptr[ib.length_dw++] = 1; /* y */
1607         ib.ptr[ib.length_dw++] = 1; /* z */
1608         ib.ptr[ib.length_dw++] =
1609                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1610
1611         /* write CS partial flush packet */
1612         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1613         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1614
1615         /* shedule the ib on the ring */
1616         r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
1617         if (r) {
1618                 DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
1619                 goto fail;
1620         }
1621
1622         /* wait for the GPU to finish processing the IB */
1623         r = dma_fence_wait(f, false);
1624         if (r) {
1625                 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
1626                 goto fail;
1627         }
1628
1629         tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, DED_MODE, 2);
1630         tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, PROP_FED, 1);
1631         WREG32(mmGB_EDC_MODE, tmp);
1632
1633         tmp = RREG32(mmCC_GC_EDC_CONFIG);
1634         tmp = REG_SET_FIELD(tmp, CC_GC_EDC_CONFIG, DIS_EDC, 0) | 1;
1635         WREG32(mmCC_GC_EDC_CONFIG, tmp);
1636
1637
1638         /* read back registers to clear the counters */
1639         for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++)
1640                 RREG32(sec_ded_counter_registers[i]);
1641
1642 fail:
1643         amdgpu_ib_free(adev, &ib, NULL);
1644         dma_fence_put(f);
1645
1646         return r;
1647 }
1648
1649 static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev)
1650 {
1651         u32 gb_addr_config;
1652         u32 mc_arb_ramcfg;
1653         u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map;
1654         u32 tmp;
1655         int ret;
1656
1657         switch (adev->asic_type) {
1658         case CHIP_TOPAZ:
1659                 adev->gfx.config.max_shader_engines = 1;
1660                 adev->gfx.config.max_tile_pipes = 2;
1661                 adev->gfx.config.max_cu_per_sh = 6;
1662                 adev->gfx.config.max_sh_per_se = 1;
1663                 adev->gfx.config.max_backends_per_se = 2;
1664                 adev->gfx.config.max_texture_channel_caches = 2;
1665                 adev->gfx.config.max_gprs = 256;
1666                 adev->gfx.config.max_gs_threads = 32;
1667                 adev->gfx.config.max_hw_contexts = 8;
1668
1669                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1670                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1671                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1672                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1673                 gb_addr_config = TOPAZ_GB_ADDR_CONFIG_GOLDEN;
1674                 break;
1675         case CHIP_FIJI:
1676                 adev->gfx.config.max_shader_engines = 4;
1677                 adev->gfx.config.max_tile_pipes = 16;
1678                 adev->gfx.config.max_cu_per_sh = 16;
1679                 adev->gfx.config.max_sh_per_se = 1;
1680                 adev->gfx.config.max_backends_per_se = 4;
1681                 adev->gfx.config.max_texture_channel_caches = 16;
1682                 adev->gfx.config.max_gprs = 256;
1683                 adev->gfx.config.max_gs_threads = 32;
1684                 adev->gfx.config.max_hw_contexts = 8;
1685
1686                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1687                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1688                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1689                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1690                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1691                 break;
1692         case CHIP_POLARIS11:
1693         case CHIP_POLARIS12:
1694                 ret = amdgpu_atombios_get_gfx_info(adev);
1695                 if (ret)
1696                         return ret;
1697                 adev->gfx.config.max_gprs = 256;
1698                 adev->gfx.config.max_gs_threads = 32;
1699                 adev->gfx.config.max_hw_contexts = 8;
1700
1701                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1702                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1703                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1704                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1705                 gb_addr_config = POLARIS11_GB_ADDR_CONFIG_GOLDEN;
1706                 break;
1707         case CHIP_POLARIS10:
1708         case CHIP_VEGAM:
1709                 ret = amdgpu_atombios_get_gfx_info(adev);
1710                 if (ret)
1711                         return ret;
1712                 adev->gfx.config.max_gprs = 256;
1713                 adev->gfx.config.max_gs_threads = 32;
1714                 adev->gfx.config.max_hw_contexts = 8;
1715
1716                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1717                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1718                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1719                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1720                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1721                 break;
1722         case CHIP_TONGA:
1723                 adev->gfx.config.max_shader_engines = 4;
1724                 adev->gfx.config.max_tile_pipes = 8;
1725                 adev->gfx.config.max_cu_per_sh = 8;
1726                 adev->gfx.config.max_sh_per_se = 1;
1727                 adev->gfx.config.max_backends_per_se = 2;
1728                 adev->gfx.config.max_texture_channel_caches = 8;
1729                 adev->gfx.config.max_gprs = 256;
1730                 adev->gfx.config.max_gs_threads = 32;
1731                 adev->gfx.config.max_hw_contexts = 8;
1732
1733                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1734                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1735                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1736                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1737                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1738                 break;
1739         case CHIP_CARRIZO:
1740                 adev->gfx.config.max_shader_engines = 1;
1741                 adev->gfx.config.max_tile_pipes = 2;
1742                 adev->gfx.config.max_sh_per_se = 1;
1743                 adev->gfx.config.max_backends_per_se = 2;
1744                 adev->gfx.config.max_cu_per_sh = 8;
1745                 adev->gfx.config.max_texture_channel_caches = 2;
1746                 adev->gfx.config.max_gprs = 256;
1747                 adev->gfx.config.max_gs_threads = 32;
1748                 adev->gfx.config.max_hw_contexts = 8;
1749
1750                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1751                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1752                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1753                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1754                 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1755                 break;
1756         case CHIP_STONEY:
1757                 adev->gfx.config.max_shader_engines = 1;
1758                 adev->gfx.config.max_tile_pipes = 2;
1759                 adev->gfx.config.max_sh_per_se = 1;
1760                 adev->gfx.config.max_backends_per_se = 1;
1761                 adev->gfx.config.max_cu_per_sh = 3;
1762                 adev->gfx.config.max_texture_channel_caches = 2;
1763                 adev->gfx.config.max_gprs = 256;
1764                 adev->gfx.config.max_gs_threads = 16;
1765                 adev->gfx.config.max_hw_contexts = 8;
1766
1767                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1768                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1769                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1770                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1771                 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1772                 break;
1773         default:
1774                 adev->gfx.config.max_shader_engines = 2;
1775                 adev->gfx.config.max_tile_pipes = 4;
1776                 adev->gfx.config.max_cu_per_sh = 2;
1777                 adev->gfx.config.max_sh_per_se = 1;
1778                 adev->gfx.config.max_backends_per_se = 2;
1779                 adev->gfx.config.max_texture_channel_caches = 4;
1780                 adev->gfx.config.max_gprs = 256;
1781                 adev->gfx.config.max_gs_threads = 32;
1782                 adev->gfx.config.max_hw_contexts = 8;
1783
1784                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1785                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1786                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1787                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1788                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1789                 break;
1790         }
1791
1792         adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG);
1793         mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg;
1794
1795         adev->gfx.config.num_banks = REG_GET_FIELD(mc_arb_ramcfg,
1796                                 MC_ARB_RAMCFG, NOOFBANK);
1797         adev->gfx.config.num_ranks = REG_GET_FIELD(mc_arb_ramcfg,
1798                                 MC_ARB_RAMCFG, NOOFRANKS);
1799
1800         adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes;
1801         adev->gfx.config.mem_max_burst_length_bytes = 256;
1802         if (adev->flags & AMD_IS_APU) {
1803                 /* Get memory bank mapping mode. */
1804                 tmp = RREG32(mmMC_FUS_DRAM0_BANK_ADDR_MAPPING);
1805                 dimm00_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1806                 dimm01_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1807
1808                 tmp = RREG32(mmMC_FUS_DRAM1_BANK_ADDR_MAPPING);
1809                 dimm10_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1810                 dimm11_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1811
1812                 /* Validate settings in case only one DIMM installed. */
1813                 if ((dimm00_addr_map == 0) || (dimm00_addr_map == 3) || (dimm00_addr_map == 4) || (dimm00_addr_map > 12))
1814                         dimm00_addr_map = 0;
1815                 if ((dimm01_addr_map == 0) || (dimm01_addr_map == 3) || (dimm01_addr_map == 4) || (dimm01_addr_map > 12))
1816                         dimm01_addr_map = 0;
1817                 if ((dimm10_addr_map == 0) || (dimm10_addr_map == 3) || (dimm10_addr_map == 4) || (dimm10_addr_map > 12))
1818                         dimm10_addr_map = 0;
1819                 if ((dimm11_addr_map == 0) || (dimm11_addr_map == 3) || (dimm11_addr_map == 4) || (dimm11_addr_map > 12))
1820                         dimm11_addr_map = 0;
1821
1822                 /* If DIMM Addr map is 8GB, ROW size should be 2KB. Otherwise 1KB. */
1823                 /* If ROW size(DIMM1) != ROW size(DMIMM0), ROW size should be larger one. */
1824                 if ((dimm00_addr_map == 11) || (dimm01_addr_map == 11) || (dimm10_addr_map == 11) || (dimm11_addr_map == 11))
1825                         adev->gfx.config.mem_row_size_in_kb = 2;
1826                 else
1827                         adev->gfx.config.mem_row_size_in_kb = 1;
1828         } else {
1829                 tmp = REG_GET_FIELD(mc_arb_ramcfg, MC_ARB_RAMCFG, NOOFCOLS);
1830                 adev->gfx.config.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
1831                 if (adev->gfx.config.mem_row_size_in_kb > 4)
1832                         adev->gfx.config.mem_row_size_in_kb = 4;
1833         }
1834
1835         adev->gfx.config.shader_engine_tile_size = 32;
1836         adev->gfx.config.num_gpus = 1;
1837         adev->gfx.config.multi_gpu_tile_size = 64;
1838
1839         /* fix up row size */
1840         switch (adev->gfx.config.mem_row_size_in_kb) {
1841         case 1:
1842         default:
1843                 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 0);
1844                 break;
1845         case 2:
1846                 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 1);
1847                 break;
1848         case 4:
1849                 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 2);
1850                 break;
1851         }
1852         adev->gfx.config.gb_addr_config = gb_addr_config;
1853
1854         return 0;
1855 }
1856
1857 static int gfx_v8_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
1858                                         int mec, int pipe, int queue)
1859 {
1860         int r;
1861         unsigned irq_type;
1862         struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
1863         unsigned int hw_prio;
1864
1865         ring = &adev->gfx.compute_ring[ring_id];
1866
1867         /* mec0 is me1 */
1868         ring->me = mec + 1;
1869         ring->pipe = pipe;
1870         ring->queue = queue;
1871
1872         ring->ring_obj = NULL;
1873         ring->use_doorbell = true;
1874         ring->doorbell_index = adev->doorbell_index.mec_ring0 + ring_id;
1875         ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
1876                                 + (ring_id * GFX8_MEC_HPD_SIZE);
1877         sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
1878
1879         irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
1880                 + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
1881                 + ring->pipe;
1882
1883         hw_prio = amdgpu_gfx_is_high_priority_compute_queue(adev, ring) ?
1884                         AMDGPU_RING_PRIO_2 : AMDGPU_RING_PRIO_DEFAULT;
1885         /* type-2 packets are deprecated on MEC, use type-3 instead */
1886         r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, irq_type,
1887                              hw_prio, NULL);
1888         if (r)
1889                 return r;
1890
1891
1892         return 0;
1893 }
1894
1895 static void gfx_v8_0_sq_irq_work_func(struct work_struct *work);
1896
1897 static int gfx_v8_0_sw_init(struct amdgpu_ip_block *ip_block)
1898 {
1899         int i, j, k, r, ring_id;
1900         int xcc_id = 0;
1901         struct amdgpu_ring *ring;
1902         struct amdgpu_device *adev = ip_block->adev;
1903
1904         switch (adev->asic_type) {
1905         case CHIP_TONGA:
1906         case CHIP_CARRIZO:
1907         case CHIP_FIJI:
1908         case CHIP_POLARIS10:
1909         case CHIP_POLARIS11:
1910         case CHIP_POLARIS12:
1911         case CHIP_VEGAM:
1912                 adev->gfx.mec.num_mec = 2;
1913                 break;
1914         case CHIP_TOPAZ:
1915         case CHIP_STONEY:
1916         default:
1917                 adev->gfx.mec.num_mec = 1;
1918                 break;
1919         }
1920
1921         adev->gfx.mec.num_pipe_per_mec = 4;
1922         adev->gfx.mec.num_queue_per_pipe = 8;
1923
1924         /* EOP Event */
1925         r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_END_OF_PIPE, &adev->gfx.eop_irq);
1926         if (r)
1927                 return r;
1928
1929         /* Privileged reg */
1930         r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_PRIV_REG_FAULT,
1931                               &adev->gfx.priv_reg_irq);
1932         if (r)
1933                 return r;
1934
1935         /* Privileged inst */
1936         r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_PRIV_INSTR_FAULT,
1937                               &adev->gfx.priv_inst_irq);
1938         if (r)
1939                 return r;
1940
1941         /* Add CP EDC/ECC irq  */
1942         r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_ECC_ERROR,
1943                               &adev->gfx.cp_ecc_error_irq);
1944         if (r)
1945                 return r;
1946
1947         /* SQ interrupts. */
1948         r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_SQ_INTERRUPT_MSG,
1949                               &adev->gfx.sq_irq);
1950         if (r) {
1951                 DRM_ERROR("amdgpu_irq_add() for SQ failed: %d\n", r);
1952                 return r;
1953         }
1954
1955         INIT_WORK(&adev->gfx.sq_work.work, gfx_v8_0_sq_irq_work_func);
1956
1957         adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
1958
1959         r = gfx_v8_0_init_microcode(adev);
1960         if (r) {
1961                 DRM_ERROR("Failed to load gfx firmware!\n");
1962                 return r;
1963         }
1964
1965         r = adev->gfx.rlc.funcs->init(adev);
1966         if (r) {
1967                 DRM_ERROR("Failed to init rlc BOs!\n");
1968                 return r;
1969         }
1970
1971         r = gfx_v8_0_mec_init(adev);
1972         if (r) {
1973                 DRM_ERROR("Failed to init MEC BOs!\n");
1974                 return r;
1975         }
1976
1977         /* set up the gfx ring */
1978         for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
1979                 ring = &adev->gfx.gfx_ring[i];
1980                 ring->ring_obj = NULL;
1981                 sprintf(ring->name, "gfx");
1982                 /* no gfx doorbells on iceland */
1983                 if (adev->asic_type != CHIP_TOPAZ) {
1984                         ring->use_doorbell = true;
1985                         ring->doorbell_index = adev->doorbell_index.gfx_ring0;
1986                 }
1987
1988                 r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
1989                                      AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP,
1990                                      AMDGPU_RING_PRIO_DEFAULT, NULL);
1991                 if (r)
1992                         return r;
1993         }
1994
1995
1996         /* set up the compute queues - allocate horizontally across pipes */
1997         ring_id = 0;
1998         for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
1999                 for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
2000                         for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
2001                                 if (!amdgpu_gfx_is_mec_queue_enabled(adev, 0, i,
2002                                                                      k, j))
2003                                         continue;
2004
2005                                 r = gfx_v8_0_compute_ring_init(adev,
2006                                                                 ring_id,
2007                                                                 i, k, j);
2008                                 if (r)
2009                                         return r;
2010
2011                                 ring_id++;
2012                         }
2013                 }
2014         }
2015
2016         r = amdgpu_gfx_kiq_init(adev, GFX8_MEC_HPD_SIZE, 0);
2017         if (r) {
2018                 DRM_ERROR("Failed to init KIQ BOs!\n");
2019                 return r;
2020         }
2021
2022         r = amdgpu_gfx_kiq_init_ring(adev, xcc_id);
2023         if (r)
2024                 return r;
2025
2026         /* create MQD for all compute queues as well as KIQ for SRIOV case */
2027         r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct vi_mqd_allocation), 0);
2028         if (r)
2029                 return r;
2030
2031         adev->gfx.ce_ram_size = 0x8000;
2032
2033         r = gfx_v8_0_gpu_early_init(adev);
2034         if (r)
2035                 return r;
2036
2037         return 0;
2038 }
2039
2040 static int gfx_v8_0_sw_fini(struct amdgpu_ip_block *ip_block)
2041 {
2042         struct amdgpu_device *adev = ip_block->adev;
2043         int i;
2044
2045         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2046                 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
2047         for (i = 0; i < adev->gfx.num_compute_rings; i++)
2048                 amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
2049
2050         amdgpu_gfx_mqd_sw_fini(adev, 0);
2051         amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq[0].ring);
2052         amdgpu_gfx_kiq_fini(adev, 0);
2053
2054         gfx_v8_0_mec_fini(adev);
2055         amdgpu_gfx_rlc_fini(adev);
2056         amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj,
2057                                 &adev->gfx.rlc.clear_state_gpu_addr,
2058                                 (void **)&adev->gfx.rlc.cs_ptr);
2059         if ((adev->asic_type == CHIP_CARRIZO) ||
2060             (adev->asic_type == CHIP_STONEY)) {
2061                 amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj,
2062                                 &adev->gfx.rlc.cp_table_gpu_addr,
2063                                 (void **)&adev->gfx.rlc.cp_table_ptr);
2064         }
2065         gfx_v8_0_free_microcode(adev);
2066
2067         return 0;
2068 }
2069
2070 static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev)
2071 {
2072         uint32_t *modearray, *mod2array;
2073         const u32 num_tile_mode_states = ARRAY_SIZE(adev->gfx.config.tile_mode_array);
2074         const u32 num_secondary_tile_mode_states = ARRAY_SIZE(adev->gfx.config.macrotile_mode_array);
2075         u32 reg_offset;
2076
2077         modearray = adev->gfx.config.tile_mode_array;
2078         mod2array = adev->gfx.config.macrotile_mode_array;
2079
2080         for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2081                 modearray[reg_offset] = 0;
2082
2083         for (reg_offset = 0; reg_offset <  num_secondary_tile_mode_states; reg_offset++)
2084                 mod2array[reg_offset] = 0;
2085
2086         switch (adev->asic_type) {
2087         case CHIP_TOPAZ:
2088                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2089                                 PIPE_CONFIG(ADDR_SURF_P2) |
2090                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2091                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2092                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2093                                 PIPE_CONFIG(ADDR_SURF_P2) |
2094                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2095                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2096                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2097                                 PIPE_CONFIG(ADDR_SURF_P2) |
2098                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2099                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2100                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2101                                 PIPE_CONFIG(ADDR_SURF_P2) |
2102                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2103                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2104                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2105                                 PIPE_CONFIG(ADDR_SURF_P2) |
2106                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2107                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2108                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2109                                 PIPE_CONFIG(ADDR_SURF_P2) |
2110                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2111                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2112                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2113                                 PIPE_CONFIG(ADDR_SURF_P2) |
2114                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2115                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2116                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2117                                 PIPE_CONFIG(ADDR_SURF_P2));
2118                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2119                                 PIPE_CONFIG(ADDR_SURF_P2) |
2120                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2121                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2122                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2123                                  PIPE_CONFIG(ADDR_SURF_P2) |
2124                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2125                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2126                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2127                                  PIPE_CONFIG(ADDR_SURF_P2) |
2128                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2129                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2130                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2131                                  PIPE_CONFIG(ADDR_SURF_P2) |
2132                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2133                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2134                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2135                                  PIPE_CONFIG(ADDR_SURF_P2) |
2136                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2137                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2138                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2139                                  PIPE_CONFIG(ADDR_SURF_P2) |
2140                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2141                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2142                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2143                                  PIPE_CONFIG(ADDR_SURF_P2) |
2144                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2145                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2146                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2147                                  PIPE_CONFIG(ADDR_SURF_P2) |
2148                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2149                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2150                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2151                                  PIPE_CONFIG(ADDR_SURF_P2) |
2152                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2153                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2154                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2155                                  PIPE_CONFIG(ADDR_SURF_P2) |
2156                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2157                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2158                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2159                                  PIPE_CONFIG(ADDR_SURF_P2) |
2160                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2161                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2162                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2163                                  PIPE_CONFIG(ADDR_SURF_P2) |
2164                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2165                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2166                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2167                                  PIPE_CONFIG(ADDR_SURF_P2) |
2168                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2169                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2170                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2171                                  PIPE_CONFIG(ADDR_SURF_P2) |
2172                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2173                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2174                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2175                                  PIPE_CONFIG(ADDR_SURF_P2) |
2176                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2177                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2178                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2179                                  PIPE_CONFIG(ADDR_SURF_P2) |
2180                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2181                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2182                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2183                                  PIPE_CONFIG(ADDR_SURF_P2) |
2184                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2185                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2186                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2187                                  PIPE_CONFIG(ADDR_SURF_P2) |
2188                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2189                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2190
2191                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2192                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2193                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2194                                 NUM_BANKS(ADDR_SURF_8_BANK));
2195                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2196                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2197                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2198                                 NUM_BANKS(ADDR_SURF_8_BANK));
2199                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2200                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2201                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2202                                 NUM_BANKS(ADDR_SURF_8_BANK));
2203                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2204                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2205                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2206                                 NUM_BANKS(ADDR_SURF_8_BANK));
2207                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2208                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2209                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2210                                 NUM_BANKS(ADDR_SURF_8_BANK));
2211                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2212                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2213                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2214                                 NUM_BANKS(ADDR_SURF_8_BANK));
2215                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2216                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2217                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2218                                 NUM_BANKS(ADDR_SURF_8_BANK));
2219                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2220                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2221                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2222                                 NUM_BANKS(ADDR_SURF_16_BANK));
2223                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2224                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2225                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2226                                 NUM_BANKS(ADDR_SURF_16_BANK));
2227                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2228                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2229                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2230                                  NUM_BANKS(ADDR_SURF_16_BANK));
2231                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2232                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2233                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2234                                  NUM_BANKS(ADDR_SURF_16_BANK));
2235                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2236                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2237                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2238                                  NUM_BANKS(ADDR_SURF_16_BANK));
2239                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2240                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2241                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2242                                  NUM_BANKS(ADDR_SURF_16_BANK));
2243                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2244                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2245                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2246                                  NUM_BANKS(ADDR_SURF_8_BANK));
2247
2248                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2249                         if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
2250                             reg_offset != 23)
2251                                 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2252
2253                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2254                         if (reg_offset != 7)
2255                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2256
2257                 break;
2258         case CHIP_FIJI:
2259         case CHIP_VEGAM:
2260                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2261                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2262                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2263                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2264                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2265                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2266                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2267                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2268                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2269                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2270                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2271                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2272                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2273                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2274                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2275                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2276                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2277                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2278                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2279                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2280                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2281                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2282                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2283                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2284                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2285                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2286                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2287                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2288                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2289                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2290                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2291                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2292                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2293                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2294                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2295                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2296                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2297                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2298                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2299                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2300                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2301                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2302                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2303                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2304                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2305                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2306                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2307                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2308                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2309                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2310                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2311                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2312                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2313                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2314                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2315                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2316                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2317                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2318                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2319                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2320                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2321                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2322                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2323                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2324                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2325                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2326                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2327                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2328                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2329                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2330                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2331                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2332                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2333                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2334                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2335                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2336                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2337                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2338                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2339                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2340                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2341                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2342                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2343                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2344                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2345                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2346                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2347                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2348                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2349                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2350                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2351                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2352                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2353                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2354                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2355                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2356                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2357                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2358                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2359                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2360                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2361                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2362                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2363                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2364                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2365                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2366                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2367                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2368                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2369                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2370                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2371                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2372                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2373                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2374                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2375                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2376                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2377                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2378                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2379                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2380                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2381                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2382
2383                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2384                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2385                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2386                                 NUM_BANKS(ADDR_SURF_8_BANK));
2387                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2388                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2389                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2390                                 NUM_BANKS(ADDR_SURF_8_BANK));
2391                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2392                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2393                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2394                                 NUM_BANKS(ADDR_SURF_8_BANK));
2395                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2396                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2397                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2398                                 NUM_BANKS(ADDR_SURF_8_BANK));
2399                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2400                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2401                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2402                                 NUM_BANKS(ADDR_SURF_8_BANK));
2403                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2404                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2405                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2406                                 NUM_BANKS(ADDR_SURF_8_BANK));
2407                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2408                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2409                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2410                                 NUM_BANKS(ADDR_SURF_8_BANK));
2411                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2412                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2413                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2414                                 NUM_BANKS(ADDR_SURF_8_BANK));
2415                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2416                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2417                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2418                                 NUM_BANKS(ADDR_SURF_8_BANK));
2419                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2420                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2421                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2422                                  NUM_BANKS(ADDR_SURF_8_BANK));
2423                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2424                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2425                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2426                                  NUM_BANKS(ADDR_SURF_8_BANK));
2427                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2428                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2429                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2430                                  NUM_BANKS(ADDR_SURF_8_BANK));
2431                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2432                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2433                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2434                                  NUM_BANKS(ADDR_SURF_8_BANK));
2435                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2436                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2437                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2438                                  NUM_BANKS(ADDR_SURF_4_BANK));
2439
2440                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2441                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2442
2443                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2444                         if (reg_offset != 7)
2445                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2446
2447                 break;
2448         case CHIP_TONGA:
2449                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2450                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2451                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2452                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2453                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2454                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2455                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2456                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2457                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2458                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2459                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2460                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2461                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2462                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2463                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2464                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2465                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2466                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2467                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2468                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2469                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2470                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2471                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2472                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2473                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2474                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2475                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2476                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2477                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2478                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2479                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2480                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2481                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2482                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2483                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2484                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2485                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2486                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2487                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2488                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2489                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2490                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2491                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2492                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2493                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2494                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2495                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2496                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2497                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2498                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2499                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2500                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2501                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2502                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2503                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2504                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2505                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2506                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2507                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2508                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2509                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2510                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2511                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2512                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2513                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2514                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2515                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2516                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2517                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2518                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2519                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2520                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2521                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2522                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2523                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2524                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2525                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2526                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2527                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2528                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2529                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2530                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2531                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2532                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2533                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2534                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2535                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2536                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2537                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2538                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2539                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2540                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2541                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2542                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2543                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2544                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2545                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2546                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2547                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2548                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2549                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2550                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2551                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2552                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2553                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2554                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2555                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2556                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2557                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2558                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2559                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2560                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2561                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2562                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2563                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2564                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2565                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2566                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2567                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2568                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2569                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2570                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2571
2572                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2573                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2574                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2575                                 NUM_BANKS(ADDR_SURF_16_BANK));
2576                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2577                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2578                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2579                                 NUM_BANKS(ADDR_SURF_16_BANK));
2580                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2581                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2582                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2583                                 NUM_BANKS(ADDR_SURF_16_BANK));
2584                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2585                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2586                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2587                                 NUM_BANKS(ADDR_SURF_16_BANK));
2588                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2589                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2590                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2591                                 NUM_BANKS(ADDR_SURF_16_BANK));
2592                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2593                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2594                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2595                                 NUM_BANKS(ADDR_SURF_16_BANK));
2596                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2597                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2598                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2599                                 NUM_BANKS(ADDR_SURF_16_BANK));
2600                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2601                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2602                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2603                                 NUM_BANKS(ADDR_SURF_16_BANK));
2604                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2605                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2606                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2607                                 NUM_BANKS(ADDR_SURF_16_BANK));
2608                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2609                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2610                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2611                                  NUM_BANKS(ADDR_SURF_16_BANK));
2612                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2613                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2614                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2615                                  NUM_BANKS(ADDR_SURF_16_BANK));
2616                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2617                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2618                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2619                                  NUM_BANKS(ADDR_SURF_8_BANK));
2620                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2621                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2622                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2623                                  NUM_BANKS(ADDR_SURF_4_BANK));
2624                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2625                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2626                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2627                                  NUM_BANKS(ADDR_SURF_4_BANK));
2628
2629                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2630                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2631
2632                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2633                         if (reg_offset != 7)
2634                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2635
2636                 break;
2637         case CHIP_POLARIS11:
2638         case CHIP_POLARIS12:
2639                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2640                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2641                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2642                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2643                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2644                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2645                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2646                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2647                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2648                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2649                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2650                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2651                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2652                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2653                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2654                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2655                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2656                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2657                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2658                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2659                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2660                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2661                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2662                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2663                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2664                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2665                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2666                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2667                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2668                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2669                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2670                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2671                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2672                                 PIPE_CONFIG(ADDR_SURF_P4_16x16));
2673                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2674                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2675                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2676                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2677                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2678                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2679                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2680                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2681                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2682                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2683                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2684                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2685                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2686                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2687                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2688                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2689                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2690                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2691                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2692                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2693                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2694                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2695                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2696                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2697                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2698                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2699                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2700                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2701                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2702                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2703                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2704                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2705                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2706                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2707                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2708                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2709                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2710                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2711                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2712                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2713                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2714                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2715                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2716                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2717                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2718                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2719                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2720                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2721                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2722                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2723                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2724                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2725                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2726                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2727                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2728                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2729                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2730                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2731                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2732                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2733                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2734                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2735                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2736                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2737                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2738                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2739                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2740                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2741                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2742                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2743                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2744                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2745                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2746                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2747                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2748                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2749                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2750                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2751                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2752                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2753                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2754                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2755                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2756                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2757                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2758                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2759                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2760                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2761
2762                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2763                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2764                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2765                                 NUM_BANKS(ADDR_SURF_16_BANK));
2766
2767                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2768                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2769                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2770                                 NUM_BANKS(ADDR_SURF_16_BANK));
2771
2772                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2773                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2774                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2775                                 NUM_BANKS(ADDR_SURF_16_BANK));
2776
2777                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2778                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2779                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2780                                 NUM_BANKS(ADDR_SURF_16_BANK));
2781
2782                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2783                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2784                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2785                                 NUM_BANKS(ADDR_SURF_16_BANK));
2786
2787                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2788                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2789                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2790                                 NUM_BANKS(ADDR_SURF_16_BANK));
2791
2792                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2793                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2794                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2795                                 NUM_BANKS(ADDR_SURF_16_BANK));
2796
2797                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2798                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2799                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2800                                 NUM_BANKS(ADDR_SURF_16_BANK));
2801
2802                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2803                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2804                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2805                                 NUM_BANKS(ADDR_SURF_16_BANK));
2806
2807                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2808                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2809                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2810                                 NUM_BANKS(ADDR_SURF_16_BANK));
2811
2812                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2813                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2814                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2815                                 NUM_BANKS(ADDR_SURF_16_BANK));
2816
2817                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2818                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2819                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2820                                 NUM_BANKS(ADDR_SURF_16_BANK));
2821
2822                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2823                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2824                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2825                                 NUM_BANKS(ADDR_SURF_8_BANK));
2826
2827                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2828                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2829                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2830                                 NUM_BANKS(ADDR_SURF_4_BANK));
2831
2832                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2833                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2834
2835                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2836                         if (reg_offset != 7)
2837                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2838
2839                 break;
2840         case CHIP_POLARIS10:
2841                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2842                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2843                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2844                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2845                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2846                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2847                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2848                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2849                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2850                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2851                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2852                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2853                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2854                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2855                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2856                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2857                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2858                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2859                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2860                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2861                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2862                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2863                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2864                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2865                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2866                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2867                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2868                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2869                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2870                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2871                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2872                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2873                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2874                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2875                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2876                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2877                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2878                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2879                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2880                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2881                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2882                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2883                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2884                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2885                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2886                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2887                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2888                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2889                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2890                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2891                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2892                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2893                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2894                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2895                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2896                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2897                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2898                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2899                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2900                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2901                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2902                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2903                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2904                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2905                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2906                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2907                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2908                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2909                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2910                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2911                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2912                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2913                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2914                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2915                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2916                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2917                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2918                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2919                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2920                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2921                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2922                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2923                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2924                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2925                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2926                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2927                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2928                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2929                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2930                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2931                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2932                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2933                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2934                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2935                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2936                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2937                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2938                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2939                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2940                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2941                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2942                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2943                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2944                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2945                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2946                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2947                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2948                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2949                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2950                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2951                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2952                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2953                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2954                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2955                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2956                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2957                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2958                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2959                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2960                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2961                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2962                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2963
2964                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2965                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2966                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2967                                 NUM_BANKS(ADDR_SURF_16_BANK));
2968
2969                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2970                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2971                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2972                                 NUM_BANKS(ADDR_SURF_16_BANK));
2973
2974                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2975                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2976                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2977                                 NUM_BANKS(ADDR_SURF_16_BANK));
2978
2979                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2980                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2981                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2982                                 NUM_BANKS(ADDR_SURF_16_BANK));
2983
2984                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2985                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2986                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2987                                 NUM_BANKS(ADDR_SURF_16_BANK));
2988
2989                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2990                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2991                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2992                                 NUM_BANKS(ADDR_SURF_16_BANK));
2993
2994                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2995                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2996                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2997                                 NUM_BANKS(ADDR_SURF_16_BANK));
2998
2999                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3000                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3001                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3002                                 NUM_BANKS(ADDR_SURF_16_BANK));
3003
3004                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3005                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3006                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3007                                 NUM_BANKS(ADDR_SURF_16_BANK));
3008
3009                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3010                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3011                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3012                                 NUM_BANKS(ADDR_SURF_16_BANK));
3013
3014                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3015                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3016                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3017                                 NUM_BANKS(ADDR_SURF_16_BANK));
3018
3019                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3020                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3021                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3022                                 NUM_BANKS(ADDR_SURF_8_BANK));
3023
3024                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3025                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3026                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3027                                 NUM_BANKS(ADDR_SURF_4_BANK));
3028
3029                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3030                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3031                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3032                                 NUM_BANKS(ADDR_SURF_4_BANK));
3033
3034                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3035                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3036
3037                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3038                         if (reg_offset != 7)
3039                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3040
3041                 break;
3042         case CHIP_STONEY:
3043                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3044                                 PIPE_CONFIG(ADDR_SURF_P2) |
3045                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3046                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3047                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3048                                 PIPE_CONFIG(ADDR_SURF_P2) |
3049                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3050                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3051                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3052                                 PIPE_CONFIG(ADDR_SURF_P2) |
3053                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3054                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3055                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3056                                 PIPE_CONFIG(ADDR_SURF_P2) |
3057                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3058                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3059                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3060                                 PIPE_CONFIG(ADDR_SURF_P2) |
3061                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3062                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3063                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3064                                 PIPE_CONFIG(ADDR_SURF_P2) |
3065                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3066                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3067                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3068                                 PIPE_CONFIG(ADDR_SURF_P2) |
3069                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3070                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3071                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3072                                 PIPE_CONFIG(ADDR_SURF_P2));
3073                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3074                                 PIPE_CONFIG(ADDR_SURF_P2) |
3075                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3076                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3077                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3078                                  PIPE_CONFIG(ADDR_SURF_P2) |
3079                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3080                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3081                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3082                                  PIPE_CONFIG(ADDR_SURF_P2) |
3083                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3084                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3085                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3086                                  PIPE_CONFIG(ADDR_SURF_P2) |
3087                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3088                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3089                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3090                                  PIPE_CONFIG(ADDR_SURF_P2) |
3091                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3092                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3093                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3094                                  PIPE_CONFIG(ADDR_SURF_P2) |
3095                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3096                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3097                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3098                                  PIPE_CONFIG(ADDR_SURF_P2) |
3099                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3100                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3101                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3102                                  PIPE_CONFIG(ADDR_SURF_P2) |
3103                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3104                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3105                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3106                                  PIPE_CONFIG(ADDR_SURF_P2) |
3107                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3108                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3109                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3110                                  PIPE_CONFIG(ADDR_SURF_P2) |
3111                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3112                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3113                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3114                                  PIPE_CONFIG(ADDR_SURF_P2) |
3115                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3116                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3117                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3118                                  PIPE_CONFIG(ADDR_SURF_P2) |
3119                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3120                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3121                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3122                                  PIPE_CONFIG(ADDR_SURF_P2) |
3123                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3124                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3125                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3126                                  PIPE_CONFIG(ADDR_SURF_P2) |
3127                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3128                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3129                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3130                                  PIPE_CONFIG(ADDR_SURF_P2) |
3131                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3132                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3133                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3134                                  PIPE_CONFIG(ADDR_SURF_P2) |
3135                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3136                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3137                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3138                                  PIPE_CONFIG(ADDR_SURF_P2) |
3139                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3140                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3141                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3142                                  PIPE_CONFIG(ADDR_SURF_P2) |
3143                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3144                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3145
3146                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3147                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3148                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3149                                 NUM_BANKS(ADDR_SURF_8_BANK));
3150                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3151                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3152                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3153                                 NUM_BANKS(ADDR_SURF_8_BANK));
3154                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3155                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3156                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3157                                 NUM_BANKS(ADDR_SURF_8_BANK));
3158                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3159                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3160                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3161                                 NUM_BANKS(ADDR_SURF_8_BANK));
3162                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3163                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3164                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3165                                 NUM_BANKS(ADDR_SURF_8_BANK));
3166                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3167                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3168                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3169                                 NUM_BANKS(ADDR_SURF_8_BANK));
3170                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3171                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3172                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3173                                 NUM_BANKS(ADDR_SURF_8_BANK));
3174                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3175                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3176                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3177                                 NUM_BANKS(ADDR_SURF_16_BANK));
3178                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3179                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3180                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3181                                 NUM_BANKS(ADDR_SURF_16_BANK));
3182                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3183                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3184                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3185                                  NUM_BANKS(ADDR_SURF_16_BANK));
3186                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3187                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3188                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3189                                  NUM_BANKS(ADDR_SURF_16_BANK));
3190                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3191                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3192                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3193                                  NUM_BANKS(ADDR_SURF_16_BANK));
3194                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3195                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3196                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3197                                  NUM_BANKS(ADDR_SURF_16_BANK));
3198                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3199                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3200                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3201                                  NUM_BANKS(ADDR_SURF_8_BANK));
3202
3203                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3204                         if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3205                             reg_offset != 23)
3206                                 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3207
3208                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3209                         if (reg_offset != 7)
3210                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3211
3212                 break;
3213         default:
3214                 dev_warn(adev->dev,
3215                          "Unknown chip type (%d) in function gfx_v8_0_tiling_mode_table_init() falling through to CHIP_CARRIZO\n",
3216                          adev->asic_type);
3217                 fallthrough;
3218
3219         case CHIP_CARRIZO:
3220                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3221                                 PIPE_CONFIG(ADDR_SURF_P2) |
3222                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3223                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3224                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3225                                 PIPE_CONFIG(ADDR_SURF_P2) |
3226                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3227                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3228                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3229                                 PIPE_CONFIG(ADDR_SURF_P2) |
3230                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3231                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3232                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3233                                 PIPE_CONFIG(ADDR_SURF_P2) |
3234                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3235                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3236                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3237                                 PIPE_CONFIG(ADDR_SURF_P2) |
3238                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3239                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3240                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3241                                 PIPE_CONFIG(ADDR_SURF_P2) |
3242                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3243                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3244                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3245                                 PIPE_CONFIG(ADDR_SURF_P2) |
3246                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3247                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3248                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3249                                 PIPE_CONFIG(ADDR_SURF_P2));
3250                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3251                                 PIPE_CONFIG(ADDR_SURF_P2) |
3252                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3253                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3254                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3255                                  PIPE_CONFIG(ADDR_SURF_P2) |
3256                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3257                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3258                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3259                                  PIPE_CONFIG(ADDR_SURF_P2) |
3260                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3261                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3262                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3263                                  PIPE_CONFIG(ADDR_SURF_P2) |
3264                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3265                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3266                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3267                                  PIPE_CONFIG(ADDR_SURF_P2) |
3268                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3269                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3270                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3271                                  PIPE_CONFIG(ADDR_SURF_P2) |
3272                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3273                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3274                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3275                                  PIPE_CONFIG(ADDR_SURF_P2) |
3276                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3277                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3278                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3279                                  PIPE_CONFIG(ADDR_SURF_P2) |
3280                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3281                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3282                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3283                                  PIPE_CONFIG(ADDR_SURF_P2) |
3284                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3285                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3286                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3287                                  PIPE_CONFIG(ADDR_SURF_P2) |
3288                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3289                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3290                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3291                                  PIPE_CONFIG(ADDR_SURF_P2) |
3292                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3293                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3294                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3295                                  PIPE_CONFIG(ADDR_SURF_P2) |
3296                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3297                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3298                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3299                                  PIPE_CONFIG(ADDR_SURF_P2) |
3300                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3301                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3302                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3303                                  PIPE_CONFIG(ADDR_SURF_P2) |
3304                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3305                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3306                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3307                                  PIPE_CONFIG(ADDR_SURF_P2) |
3308                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3309                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3310                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3311                                  PIPE_CONFIG(ADDR_SURF_P2) |
3312                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3313                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3314                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3315                                  PIPE_CONFIG(ADDR_SURF_P2) |
3316                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3317                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3318                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3319                                  PIPE_CONFIG(ADDR_SURF_P2) |
3320                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3321                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3322
3323                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3324                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3325                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3326                                 NUM_BANKS(ADDR_SURF_8_BANK));
3327                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3328                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3329                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3330                                 NUM_BANKS(ADDR_SURF_8_BANK));
3331                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3332                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3333                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3334                                 NUM_BANKS(ADDR_SURF_8_BANK));
3335                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3336                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3337                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3338                                 NUM_BANKS(ADDR_SURF_8_BANK));
3339                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3340                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3341                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3342                                 NUM_BANKS(ADDR_SURF_8_BANK));
3343                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3344                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3345                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3346                                 NUM_BANKS(ADDR_SURF_8_BANK));
3347                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3348                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3349                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3350                                 NUM_BANKS(ADDR_SURF_8_BANK));
3351                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3352                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3353                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3354                                 NUM_BANKS(ADDR_SURF_16_BANK));
3355                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3356                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3357                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3358                                 NUM_BANKS(ADDR_SURF_16_BANK));
3359                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3360                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3361                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3362                                  NUM_BANKS(ADDR_SURF_16_BANK));
3363                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3364                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3365                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3366                                  NUM_BANKS(ADDR_SURF_16_BANK));
3367                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3368                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3369                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3370                                  NUM_BANKS(ADDR_SURF_16_BANK));
3371                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3372                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3373                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3374                                  NUM_BANKS(ADDR_SURF_16_BANK));
3375                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3376                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3377                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3378                                  NUM_BANKS(ADDR_SURF_8_BANK));
3379
3380                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3381                         if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3382                             reg_offset != 23)
3383                                 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3384
3385                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3386                         if (reg_offset != 7)
3387                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3388
3389                 break;
3390         }
3391 }
3392
3393 static void gfx_v8_0_select_se_sh(struct amdgpu_device *adev,
3394                                   u32 se_num, u32 sh_num, u32 instance,
3395                                   int xcc_id)
3396 {
3397         u32 data;
3398
3399         if (instance == 0xffffffff)
3400                 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
3401         else
3402                 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
3403
3404         if (se_num == 0xffffffff)
3405                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
3406         else
3407                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
3408
3409         if (sh_num == 0xffffffff)
3410                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
3411         else
3412                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
3413
3414         WREG32(mmGRBM_GFX_INDEX, data);
3415 }
3416
3417 static void gfx_v8_0_select_me_pipe_q(struct amdgpu_device *adev,
3418                                   u32 me, u32 pipe, u32 q, u32 vm, u32 xcc_id)
3419 {
3420         vi_srbm_select(adev, me, pipe, q, vm);
3421 }
3422
3423 static u32 gfx_v8_0_get_rb_active_bitmap(struct amdgpu_device *adev)
3424 {
3425         u32 data, mask;
3426
3427         data =  RREG32(mmCC_RB_BACKEND_DISABLE) |
3428                 RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3429
3430         data = REG_GET_FIELD(data, GC_USER_RB_BACKEND_DISABLE, BACKEND_DISABLE);
3431
3432         mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se /
3433                                          adev->gfx.config.max_sh_per_se);
3434
3435         return (~data) & mask;
3436 }
3437
3438 static void
3439 gfx_v8_0_raster_config(struct amdgpu_device *adev, u32 *rconf, u32 *rconf1)
3440 {
3441         switch (adev->asic_type) {
3442         case CHIP_FIJI:
3443         case CHIP_VEGAM:
3444                 *rconf |= RB_MAP_PKR0(2) | RB_MAP_PKR1(2) |
3445                           RB_XSEL2(1) | PKR_MAP(2) |
3446                           PKR_XSEL(1) | PKR_YSEL(1) |
3447                           SE_MAP(2) | SE_XSEL(2) | SE_YSEL(3);
3448                 *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(3) |
3449                            SE_PAIR_YSEL(2);
3450                 break;
3451         case CHIP_TONGA:
3452         case CHIP_POLARIS10:
3453                 *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3454                           SE_XSEL(1) | SE_YSEL(1);
3455                 *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(2) |
3456                            SE_PAIR_YSEL(2);
3457                 break;
3458         case CHIP_TOPAZ:
3459         case CHIP_CARRIZO:
3460                 *rconf |= RB_MAP_PKR0(2);
3461                 *rconf1 |= 0x0;
3462                 break;
3463         case CHIP_POLARIS11:
3464         case CHIP_POLARIS12:
3465                 *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3466                           SE_XSEL(1) | SE_YSEL(1);
3467                 *rconf1 |= 0x0;
3468                 break;
3469         case CHIP_STONEY:
3470                 *rconf |= 0x0;
3471                 *rconf1 |= 0x0;
3472                 break;
3473         default:
3474                 DRM_ERROR("unknown asic: 0x%x\n", adev->asic_type);
3475                 break;
3476         }
3477 }
3478
3479 static void
3480 gfx_v8_0_write_harvested_raster_configs(struct amdgpu_device *adev,
3481                                         u32 raster_config, u32 raster_config_1,
3482                                         unsigned rb_mask, unsigned num_rb)
3483 {
3484         unsigned sh_per_se = max_t(unsigned, adev->gfx.config.max_sh_per_se, 1);
3485         unsigned num_se = max_t(unsigned, adev->gfx.config.max_shader_engines, 1);
3486         unsigned rb_per_pkr = min_t(unsigned, num_rb / num_se / sh_per_se, 2);
3487         unsigned rb_per_se = num_rb / num_se;
3488         unsigned se_mask[4];
3489         unsigned se;
3490
3491         se_mask[0] = ((1 << rb_per_se) - 1) & rb_mask;
3492         se_mask[1] = (se_mask[0] << rb_per_se) & rb_mask;
3493         se_mask[2] = (se_mask[1] << rb_per_se) & rb_mask;
3494         se_mask[3] = (se_mask[2] << rb_per_se) & rb_mask;
3495
3496         WARN_ON(!(num_se == 1 || num_se == 2 || num_se == 4));
3497         WARN_ON(!(sh_per_se == 1 || sh_per_se == 2));
3498         WARN_ON(!(rb_per_pkr == 1 || rb_per_pkr == 2));
3499
3500         if ((num_se > 2) && ((!se_mask[0] && !se_mask[1]) ||
3501                              (!se_mask[2] && !se_mask[3]))) {
3502                 raster_config_1 &= ~SE_PAIR_MAP_MASK;
3503
3504                 if (!se_mask[0] && !se_mask[1]) {
3505                         raster_config_1 |=
3506                                 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_3);
3507                 } else {
3508                         raster_config_1 |=
3509                                 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_0);
3510                 }
3511         }
3512
3513         for (se = 0; se < num_se; se++) {
3514                 unsigned raster_config_se = raster_config;
3515                 unsigned pkr0_mask = ((1 << rb_per_pkr) - 1) << (se * rb_per_se);
3516                 unsigned pkr1_mask = pkr0_mask << rb_per_pkr;
3517                 int idx = (se / 2) * 2;
3518
3519                 if ((num_se > 1) && (!se_mask[idx] || !se_mask[idx + 1])) {
3520                         raster_config_se &= ~SE_MAP_MASK;
3521
3522                         if (!se_mask[idx]) {
3523                                 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_3);
3524                         } else {
3525                                 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_0);
3526                         }
3527                 }
3528
3529                 pkr0_mask &= rb_mask;
3530                 pkr1_mask &= rb_mask;
3531                 if (rb_per_se > 2 && (!pkr0_mask || !pkr1_mask)) {
3532                         raster_config_se &= ~PKR_MAP_MASK;
3533
3534                         if (!pkr0_mask) {
3535                                 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_3);
3536                         } else {
3537                                 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_0);
3538                         }
3539                 }
3540
3541                 if (rb_per_se >= 2) {
3542                         unsigned rb0_mask = 1 << (se * rb_per_se);
3543                         unsigned rb1_mask = rb0_mask << 1;
3544
3545                         rb0_mask &= rb_mask;
3546                         rb1_mask &= rb_mask;
3547                         if (!rb0_mask || !rb1_mask) {
3548                                 raster_config_se &= ~RB_MAP_PKR0_MASK;
3549
3550                                 if (!rb0_mask) {
3551                                         raster_config_se |=
3552                                                 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_3);
3553                                 } else {
3554                                         raster_config_se |=
3555                                                 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_0);
3556                                 }
3557                         }
3558
3559                         if (rb_per_se > 2) {
3560                                 rb0_mask = 1 << (se * rb_per_se + rb_per_pkr);
3561                                 rb1_mask = rb0_mask << 1;
3562                                 rb0_mask &= rb_mask;
3563                                 rb1_mask &= rb_mask;
3564                                 if (!rb0_mask || !rb1_mask) {
3565                                         raster_config_se &= ~RB_MAP_PKR1_MASK;
3566
3567                                         if (!rb0_mask) {
3568                                                 raster_config_se |=
3569                                                         RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_3);
3570                                         } else {
3571                                                 raster_config_se |=
3572                                                         RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_0);
3573                                         }
3574                                 }
3575                         }
3576                 }
3577
3578                 /* GRBM_GFX_INDEX has a different offset on VI */
3579                 gfx_v8_0_select_se_sh(adev, se, 0xffffffff, 0xffffffff, 0);
3580                 WREG32(mmPA_SC_RASTER_CONFIG, raster_config_se);
3581                 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3582         }
3583
3584         /* GRBM_GFX_INDEX has a different offset on VI */
3585         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
3586 }
3587
3588 static void gfx_v8_0_setup_rb(struct amdgpu_device *adev)
3589 {
3590         int i, j;
3591         u32 data;
3592         u32 raster_config = 0, raster_config_1 = 0;
3593         u32 active_rbs = 0;
3594         u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
3595                                         adev->gfx.config.max_sh_per_se;
3596         unsigned num_rb_pipes;
3597
3598         mutex_lock(&adev->grbm_idx_mutex);
3599         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3600                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3601                         gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff, 0);
3602                         data = gfx_v8_0_get_rb_active_bitmap(adev);
3603                         active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
3604                                                rb_bitmap_width_per_sh);
3605                 }
3606         }
3607         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
3608
3609         adev->gfx.config.backend_enable_mask = active_rbs;
3610         adev->gfx.config.num_rbs = hweight32(active_rbs);
3611
3612         num_rb_pipes = min_t(unsigned, adev->gfx.config.max_backends_per_se *
3613                              adev->gfx.config.max_shader_engines, 16);
3614
3615         gfx_v8_0_raster_config(adev, &raster_config, &raster_config_1);
3616
3617         if (!adev->gfx.config.backend_enable_mask ||
3618                         adev->gfx.config.num_rbs >= num_rb_pipes) {
3619                 WREG32(mmPA_SC_RASTER_CONFIG, raster_config);
3620                 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3621         } else {
3622                 gfx_v8_0_write_harvested_raster_configs(adev, raster_config, raster_config_1,
3623                                                         adev->gfx.config.backend_enable_mask,
3624                                                         num_rb_pipes);
3625         }
3626
3627         /* cache the values for userspace */
3628         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3629                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3630                         gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff, 0);
3631                         adev->gfx.config.rb_config[i][j].rb_backend_disable =
3632                                 RREG32(mmCC_RB_BACKEND_DISABLE);
3633                         adev->gfx.config.rb_config[i][j].user_rb_backend_disable =
3634                                 RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3635                         adev->gfx.config.rb_config[i][j].raster_config =
3636                                 RREG32(mmPA_SC_RASTER_CONFIG);
3637                         adev->gfx.config.rb_config[i][j].raster_config_1 =
3638                                 RREG32(mmPA_SC_RASTER_CONFIG_1);
3639                 }
3640         }
3641         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
3642         mutex_unlock(&adev->grbm_idx_mutex);
3643 }
3644
3645 #define DEFAULT_SH_MEM_BASES    (0x6000)
3646 /**
3647  * gfx_v8_0_init_compute_vmid - gart enable
3648  *
3649  * @adev: amdgpu_device pointer
3650  *
3651  * Initialize compute vmid sh_mem registers
3652  *
3653  */
3654 static void gfx_v8_0_init_compute_vmid(struct amdgpu_device *adev)
3655 {
3656         int i;
3657         uint32_t sh_mem_config;
3658         uint32_t sh_mem_bases;
3659
3660         /*
3661          * Configure apertures:
3662          * LDS:         0x60000000'00000000 - 0x60000001'00000000 (4GB)
3663          * Scratch:     0x60000001'00000000 - 0x60000002'00000000 (4GB)
3664          * GPUVM:       0x60010000'00000000 - 0x60020000'00000000 (1TB)
3665          */
3666         sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
3667
3668         sh_mem_config = SH_MEM_ADDRESS_MODE_HSA64 <<
3669                         SH_MEM_CONFIG__ADDRESS_MODE__SHIFT |
3670                         SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
3671                         SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT |
3672                         MTYPE_CC << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT |
3673                         SH_MEM_CONFIG__PRIVATE_ATC_MASK;
3674
3675         mutex_lock(&adev->srbm_mutex);
3676         for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
3677                 vi_srbm_select(adev, 0, 0, 0, i);
3678                 /* CP and shaders */
3679                 WREG32(mmSH_MEM_CONFIG, sh_mem_config);
3680                 WREG32(mmSH_MEM_APE1_BASE, 1);
3681                 WREG32(mmSH_MEM_APE1_LIMIT, 0);
3682                 WREG32(mmSH_MEM_BASES, sh_mem_bases);
3683         }
3684         vi_srbm_select(adev, 0, 0, 0, 0);
3685         mutex_unlock(&adev->srbm_mutex);
3686
3687         /* Initialize all compute VMIDs to have no GDS, GWS, or OA
3688            access. These should be enabled by FW for target VMIDs. */
3689         for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
3690                 WREG32(amdgpu_gds_reg_offset[i].mem_base, 0);
3691                 WREG32(amdgpu_gds_reg_offset[i].mem_size, 0);
3692                 WREG32(amdgpu_gds_reg_offset[i].gws, 0);
3693                 WREG32(amdgpu_gds_reg_offset[i].oa, 0);
3694         }
3695 }
3696
3697 static void gfx_v8_0_init_gds_vmid(struct amdgpu_device *adev)
3698 {
3699         int vmid;
3700
3701         /*
3702          * Initialize all compute and user-gfx VMIDs to have no GDS, GWS, or OA
3703          * access. Compute VMIDs should be enabled by FW for target VMIDs,
3704          * the driver can enable them for graphics. VMID0 should maintain
3705          * access so that HWS firmware can save/restore entries.
3706          */
3707         for (vmid = 1; vmid < AMDGPU_NUM_VMID; vmid++) {
3708                 WREG32(amdgpu_gds_reg_offset[vmid].mem_base, 0);
3709                 WREG32(amdgpu_gds_reg_offset[vmid].mem_size, 0);
3710                 WREG32(amdgpu_gds_reg_offset[vmid].gws, 0);
3711                 WREG32(amdgpu_gds_reg_offset[vmid].oa, 0);
3712         }
3713 }
3714
3715 static void gfx_v8_0_config_init(struct amdgpu_device *adev)
3716 {
3717         switch (adev->asic_type) {
3718         default:
3719                 adev->gfx.config.double_offchip_lds_buf = 1;
3720                 break;
3721         case CHIP_CARRIZO:
3722         case CHIP_STONEY:
3723                 adev->gfx.config.double_offchip_lds_buf = 0;
3724                 break;
3725         }
3726 }
3727
3728 static void gfx_v8_0_constants_init(struct amdgpu_device *adev)
3729 {
3730         u32 tmp, sh_static_mem_cfg;
3731         int i;
3732
3733         WREG32_FIELD(GRBM_CNTL, READ_TIMEOUT, 0xFF);
3734         WREG32(mmGB_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3735         WREG32(mmHDP_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3736         WREG32(mmDMIF_ADDR_CALC, adev->gfx.config.gb_addr_config);
3737
3738         gfx_v8_0_tiling_mode_table_init(adev);
3739         gfx_v8_0_setup_rb(adev);
3740         gfx_v8_0_get_cu_info(adev);
3741         gfx_v8_0_config_init(adev);
3742
3743         /* XXX SH_MEM regs */
3744         /* where to put LDS, scratch, GPUVM in FSA64 space */
3745         sh_static_mem_cfg = REG_SET_FIELD(0, SH_STATIC_MEM_CONFIG,
3746                                    SWIZZLE_ENABLE, 1);
3747         sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG,
3748                                    ELEMENT_SIZE, 1);
3749         sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG,
3750                                    INDEX_STRIDE, 3);
3751         WREG32(mmSH_STATIC_MEM_CONFIG, sh_static_mem_cfg);
3752
3753         mutex_lock(&adev->srbm_mutex);
3754         for (i = 0; i < adev->vm_manager.id_mgr[0].num_ids; i++) {
3755                 vi_srbm_select(adev, 0, 0, 0, i);
3756                 /* CP and shaders */
3757                 if (i == 0) {
3758                         tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_UC);
3759                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
3760                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3761                                             SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3762                         WREG32(mmSH_MEM_CONFIG, tmp);
3763                         WREG32(mmSH_MEM_BASES, 0);
3764                 } else {
3765                         tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_NC);
3766                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
3767                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3768                                             SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3769                         WREG32(mmSH_MEM_CONFIG, tmp);
3770                         tmp = adev->gmc.shared_aperture_start >> 48;
3771                         WREG32(mmSH_MEM_BASES, tmp);
3772                 }
3773
3774                 WREG32(mmSH_MEM_APE1_BASE, 1);
3775                 WREG32(mmSH_MEM_APE1_LIMIT, 0);
3776         }
3777         vi_srbm_select(adev, 0, 0, 0, 0);
3778         mutex_unlock(&adev->srbm_mutex);
3779
3780         gfx_v8_0_init_compute_vmid(adev);
3781         gfx_v8_0_init_gds_vmid(adev);
3782
3783         mutex_lock(&adev->grbm_idx_mutex);
3784         /*
3785          * making sure that the following register writes will be broadcasted
3786          * to all the shaders
3787          */
3788         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
3789
3790         WREG32(mmPA_SC_FIFO_SIZE,
3791                    (adev->gfx.config.sc_prim_fifo_size_frontend <<
3792                         PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) |
3793                    (adev->gfx.config.sc_prim_fifo_size_backend <<
3794                         PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT) |
3795                    (adev->gfx.config.sc_hiz_tile_fifo_size <<
3796                         PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) |
3797                    (adev->gfx.config.sc_earlyz_tile_fifo_size <<
3798                         PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT));
3799
3800         tmp = RREG32(mmSPI_ARB_PRIORITY);
3801         tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS0, 2);
3802         tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS1, 2);
3803         tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS2, 2);
3804         tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS3, 2);
3805         WREG32(mmSPI_ARB_PRIORITY, tmp);
3806
3807         mutex_unlock(&adev->grbm_idx_mutex);
3808
3809 }
3810
3811 static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
3812 {
3813         u32 i, j, k;
3814         u32 mask;
3815
3816         mutex_lock(&adev->grbm_idx_mutex);
3817         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3818                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3819                         gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff, 0);
3820                         for (k = 0; k < adev->usec_timeout; k++) {
3821                                 if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0)
3822                                         break;
3823                                 udelay(1);
3824                         }
3825                         if (k == adev->usec_timeout) {
3826                                 gfx_v8_0_select_se_sh(adev, 0xffffffff,
3827                                                       0xffffffff, 0xffffffff, 0);
3828                                 mutex_unlock(&adev->grbm_idx_mutex);
3829                                 DRM_INFO("Timeout wait for RLC serdes %u,%u\n",
3830                                          i, j);
3831                                 return;
3832                         }
3833                 }
3834         }
3835         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
3836         mutex_unlock(&adev->grbm_idx_mutex);
3837
3838         mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
3839                 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
3840                 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
3841                 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
3842         for (k = 0; k < adev->usec_timeout; k++) {
3843                 if ((RREG32(mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
3844                         break;
3845                 udelay(1);
3846         }
3847 }
3848
3849 static void gfx_v8_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
3850                                                bool enable)
3851 {
3852         u32 tmp = RREG32(mmCP_INT_CNTL_RING0);
3853
3854         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
3855         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
3856         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
3857         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
3858
3859         WREG32(mmCP_INT_CNTL_RING0, tmp);
3860 }
3861
3862 static void gfx_v8_0_init_csb(struct amdgpu_device *adev)
3863 {
3864         adev->gfx.rlc.funcs->get_csb_buffer(adev, adev->gfx.rlc.cs_ptr);
3865         /* csib */
3866         WREG32(mmRLC_CSIB_ADDR_HI,
3867                         adev->gfx.rlc.clear_state_gpu_addr >> 32);
3868         WREG32(mmRLC_CSIB_ADDR_LO,
3869                         adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
3870         WREG32(mmRLC_CSIB_LENGTH,
3871                         adev->gfx.rlc.clear_state_size);
3872 }
3873
3874 static void gfx_v8_0_parse_ind_reg_list(int *register_list_format,
3875                                 int ind_offset,
3876                                 int list_size,
3877                                 int *unique_indices,
3878                                 int *indices_count,
3879                                 int max_indices,
3880                                 int *ind_start_offsets,
3881                                 int *offset_count,
3882                                 int max_offset)
3883 {
3884         int indices;
3885         bool new_entry = true;
3886
3887         for (; ind_offset < list_size; ind_offset++) {
3888
3889                 if (new_entry) {
3890                         new_entry = false;
3891                         ind_start_offsets[*offset_count] = ind_offset;
3892                         *offset_count = *offset_count + 1;
3893                         BUG_ON(*offset_count >= max_offset);
3894                 }
3895
3896                 if (register_list_format[ind_offset] == 0xFFFFFFFF) {
3897                         new_entry = true;
3898                         continue;
3899                 }
3900
3901                 ind_offset += 2;
3902
3903                 /* look for the matching indice */
3904                 for (indices = 0;
3905                         indices < *indices_count;
3906                         indices++) {
3907                         if (unique_indices[indices] ==
3908                                 register_list_format[ind_offset])
3909                                 break;
3910                 }
3911
3912                 if (indices >= *indices_count) {
3913                         unique_indices[*indices_count] =
3914                                 register_list_format[ind_offset];
3915                         indices = *indices_count;
3916                         *indices_count = *indices_count + 1;
3917                         BUG_ON(*indices_count >= max_indices);
3918                 }
3919
3920                 register_list_format[ind_offset] = indices;
3921         }
3922 }
3923
3924 static int gfx_v8_0_init_save_restore_list(struct amdgpu_device *adev)
3925 {
3926         int i, temp, data;
3927         int unique_indices[] = {0, 0, 0, 0, 0, 0, 0, 0};
3928         int indices_count = 0;
3929         int indirect_start_offsets[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
3930         int offset_count = 0;
3931
3932         int list_size;
3933         unsigned int *register_list_format =
3934                 kmemdup(adev->gfx.rlc.register_list_format,
3935                         adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
3936         if (!register_list_format)
3937                 return -ENOMEM;
3938
3939         gfx_v8_0_parse_ind_reg_list(register_list_format,
3940                                 RLC_FormatDirectRegListLength,
3941                                 adev->gfx.rlc.reg_list_format_size_bytes >> 2,
3942                                 unique_indices,
3943                                 &indices_count,
3944                                 ARRAY_SIZE(unique_indices),
3945                                 indirect_start_offsets,
3946                                 &offset_count,
3947                                 ARRAY_SIZE(indirect_start_offsets));
3948
3949         /* save and restore list */
3950         WREG32_FIELD(RLC_SRM_CNTL, AUTO_INCR_ADDR, 1);
3951
3952         WREG32(mmRLC_SRM_ARAM_ADDR, 0);
3953         for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
3954                 WREG32(mmRLC_SRM_ARAM_DATA, adev->gfx.rlc.register_restore[i]);
3955
3956         /* indirect list */
3957         WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_list_format_start);
3958         for (i = 0; i < adev->gfx.rlc.reg_list_format_size_bytes >> 2; i++)
3959                 WREG32(mmRLC_GPM_SCRATCH_DATA, register_list_format[i]);
3960
3961         list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
3962         list_size = list_size >> 1;
3963         WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_restore_list_size);
3964         WREG32(mmRLC_GPM_SCRATCH_DATA, list_size);
3965
3966         /* starting offsets starts */
3967         WREG32(mmRLC_GPM_SCRATCH_ADDR,
3968                 adev->gfx.rlc.starting_offsets_start);
3969         for (i = 0; i < ARRAY_SIZE(indirect_start_offsets); i++)
3970                 WREG32(mmRLC_GPM_SCRATCH_DATA,
3971                                 indirect_start_offsets[i]);
3972
3973         /* unique indices */
3974         temp = mmRLC_SRM_INDEX_CNTL_ADDR_0;
3975         data = mmRLC_SRM_INDEX_CNTL_DATA_0;
3976         for (i = 0; i < ARRAY_SIZE(unique_indices); i++) {
3977                 if (unique_indices[i] != 0) {
3978                         WREG32(temp + i, unique_indices[i] & 0x3FFFF);
3979                         WREG32(data + i, unique_indices[i] >> 20);
3980                 }
3981         }
3982         kfree(register_list_format);
3983
3984         return 0;
3985 }
3986
3987 static void gfx_v8_0_enable_save_restore_machine(struct amdgpu_device *adev)
3988 {
3989         WREG32_FIELD(RLC_SRM_CNTL, SRM_ENABLE, 1);
3990 }
3991
3992 static void gfx_v8_0_init_power_gating(struct amdgpu_device *adev)
3993 {
3994         uint32_t data;
3995
3996         WREG32_FIELD(CP_RB_WPTR_POLL_CNTL, IDLE_POLL_COUNT, 0x60);
3997
3998         data = REG_SET_FIELD(0, RLC_PG_DELAY, POWER_UP_DELAY, 0x10);
3999         data = REG_SET_FIELD(data, RLC_PG_DELAY, POWER_DOWN_DELAY, 0x10);
4000         data = REG_SET_FIELD(data, RLC_PG_DELAY, CMD_PROPAGATE_DELAY, 0x10);
4001         data = REG_SET_FIELD(data, RLC_PG_DELAY, MEM_SLEEP_DELAY, 0x10);
4002         WREG32(mmRLC_PG_DELAY, data);
4003
4004         WREG32_FIELD(RLC_PG_DELAY_2, SERDES_CMD_DELAY, 0x3);
4005         WREG32_FIELD(RLC_AUTO_PG_CTRL, GRBM_REG_SAVE_GFX_IDLE_THRESHOLD, 0x55f0);
4006
4007 }
4008
4009 static void cz_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
4010                                                 bool enable)
4011 {
4012         WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PU_ENABLE, enable ? 1 : 0);
4013 }
4014
4015 static void cz_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
4016                                                   bool enable)
4017 {
4018         WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PD_ENABLE, enable ? 1 : 0);
4019 }
4020
4021 static void cz_enable_cp_power_gating(struct amdgpu_device *adev, bool enable)
4022 {
4023         WREG32_FIELD(RLC_PG_CNTL, CP_PG_DISABLE, enable ? 0 : 1);
4024 }
4025
4026 static void gfx_v8_0_init_pg(struct amdgpu_device *adev)
4027 {
4028         if ((adev->asic_type == CHIP_CARRIZO) ||
4029             (adev->asic_type == CHIP_STONEY)) {
4030                 gfx_v8_0_init_csb(adev);
4031                 gfx_v8_0_init_save_restore_list(adev);
4032                 gfx_v8_0_enable_save_restore_machine(adev);
4033                 WREG32(mmRLC_JUMP_TABLE_RESTORE, adev->gfx.rlc.cp_table_gpu_addr >> 8);
4034                 gfx_v8_0_init_power_gating(adev);
4035                 WREG32(mmRLC_PG_ALWAYS_ON_CU_MASK, adev->gfx.cu_info.ao_cu_mask);
4036         } else if ((adev->asic_type == CHIP_POLARIS11) ||
4037                    (adev->asic_type == CHIP_POLARIS12) ||
4038                    (adev->asic_type == CHIP_VEGAM)) {
4039                 gfx_v8_0_init_csb(adev);
4040                 gfx_v8_0_init_save_restore_list(adev);
4041                 gfx_v8_0_enable_save_restore_machine(adev);
4042                 gfx_v8_0_init_power_gating(adev);
4043         }
4044
4045 }
4046
4047 static void gfx_v8_0_rlc_stop(struct amdgpu_device *adev)
4048 {
4049         WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 0);
4050
4051         gfx_v8_0_enable_gui_idle_interrupt(adev, false);
4052         gfx_v8_0_wait_for_rlc_serdes(adev);
4053 }
4054
4055 static void gfx_v8_0_rlc_reset(struct amdgpu_device *adev)
4056 {
4057         WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
4058         udelay(50);
4059
4060         WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
4061         udelay(50);
4062 }
4063
4064 static void gfx_v8_0_rlc_start(struct amdgpu_device *adev)
4065 {
4066         WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 1);
4067
4068         /* carrizo do enable cp interrupt after cp inited */
4069         if (!(adev->flags & AMD_IS_APU))
4070                 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
4071
4072         udelay(50);
4073 }
4074
4075 static int gfx_v8_0_rlc_resume(struct amdgpu_device *adev)
4076 {
4077         if (amdgpu_sriov_vf(adev)) {
4078                 gfx_v8_0_init_csb(adev);
4079                 return 0;
4080         }
4081
4082         adev->gfx.rlc.funcs->stop(adev);
4083         adev->gfx.rlc.funcs->reset(adev);
4084         gfx_v8_0_init_pg(adev);
4085         adev->gfx.rlc.funcs->start(adev);
4086
4087         return 0;
4088 }
4089
4090 static void gfx_v8_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
4091 {
4092         u32 tmp = RREG32(mmCP_ME_CNTL);
4093
4094         if (enable) {
4095                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 0);
4096                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 0);
4097                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 0);
4098         } else {
4099                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 1);
4100                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 1);
4101                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 1);
4102         }
4103         WREG32(mmCP_ME_CNTL, tmp);
4104         udelay(50);
4105 }
4106
4107 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev)
4108 {
4109         u32 count = 0;
4110         const struct cs_section_def *sect = NULL;
4111         const struct cs_extent_def *ext = NULL;
4112
4113         /* begin clear state */
4114         count += 2;
4115         /* context control state */
4116         count += 3;
4117
4118         for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4119                 for (ext = sect->section; ext->extent != NULL; ++ext) {
4120                         if (sect->id == SECT_CONTEXT)
4121                                 count += 2 + ext->reg_count;
4122                         else
4123                                 return 0;
4124                 }
4125         }
4126         /* pa_sc_raster_config/pa_sc_raster_config1 */
4127         count += 4;
4128         /* end clear state */
4129         count += 2;
4130         /* clear state */
4131         count += 2;
4132
4133         return count;
4134 }
4135
4136 static int gfx_v8_0_cp_gfx_start(struct amdgpu_device *adev)
4137 {
4138         struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
4139         const struct cs_section_def *sect = NULL;
4140         const struct cs_extent_def *ext = NULL;
4141         int r, i;
4142
4143         /* init the CP */
4144         WREG32(mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
4145         WREG32(mmCP_ENDIAN_SWAP, 0);
4146         WREG32(mmCP_DEVICE_ID, 1);
4147
4148         gfx_v8_0_cp_gfx_enable(adev, true);
4149
4150         r = amdgpu_ring_alloc(ring, gfx_v8_0_get_csb_size(adev) + 4);
4151         if (r) {
4152                 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
4153                 return r;
4154         }
4155
4156         /* clear state buffer */
4157         amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4158         amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
4159
4160         amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4161         amdgpu_ring_write(ring, 0x80000000);
4162         amdgpu_ring_write(ring, 0x80000000);
4163
4164         for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4165                 for (ext = sect->section; ext->extent != NULL; ++ext) {
4166                         if (sect->id == SECT_CONTEXT) {
4167                                 amdgpu_ring_write(ring,
4168                                        PACKET3(PACKET3_SET_CONTEXT_REG,
4169                                                ext->reg_count));
4170                                 amdgpu_ring_write(ring,
4171                                        ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
4172                                 for (i = 0; i < ext->reg_count; i++)
4173                                         amdgpu_ring_write(ring, ext->extent[i]);
4174                         }
4175                 }
4176         }
4177
4178         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
4179         amdgpu_ring_write(ring, mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
4180         amdgpu_ring_write(ring, adev->gfx.config.rb_config[0][0].raster_config);
4181         amdgpu_ring_write(ring, adev->gfx.config.rb_config[0][0].raster_config_1);
4182
4183         amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4184         amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
4185
4186         amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
4187         amdgpu_ring_write(ring, 0);
4188
4189         /* init the CE partitions */
4190         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
4191         amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
4192         amdgpu_ring_write(ring, 0x8000);
4193         amdgpu_ring_write(ring, 0x8000);
4194
4195         amdgpu_ring_commit(ring);
4196
4197         return 0;
4198 }
4199 static void gfx_v8_0_set_cpg_door_bell(struct amdgpu_device *adev, struct amdgpu_ring *ring)
4200 {
4201         u32 tmp;
4202         /* no gfx doorbells on iceland */
4203         if (adev->asic_type == CHIP_TOPAZ)
4204                 return;
4205
4206         tmp = RREG32(mmCP_RB_DOORBELL_CONTROL);
4207
4208         if (ring->use_doorbell) {
4209                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4210                                 DOORBELL_OFFSET, ring->doorbell_index);
4211                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4212                                                 DOORBELL_HIT, 0);
4213                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4214                                             DOORBELL_EN, 1);
4215         } else {
4216                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0);
4217         }
4218
4219         WREG32(mmCP_RB_DOORBELL_CONTROL, tmp);
4220
4221         if (adev->flags & AMD_IS_APU)
4222                 return;
4223
4224         tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
4225                                         DOORBELL_RANGE_LOWER,
4226                                         adev->doorbell_index.gfx_ring0);
4227         WREG32(mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
4228
4229         WREG32(mmCP_RB_DOORBELL_RANGE_UPPER,
4230                 CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
4231 }
4232
4233 static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev)
4234 {
4235         struct amdgpu_ring *ring;
4236         u32 tmp;
4237         u32 rb_bufsz;
4238         u64 rb_addr, rptr_addr, wptr_gpu_addr;
4239
4240         /* Set the write pointer delay */
4241         WREG32(mmCP_RB_WPTR_DELAY, 0);
4242
4243         /* set the RB to use vmid 0 */
4244         WREG32(mmCP_RB_VMID, 0);
4245
4246         /* Set ring buffer size */
4247         ring = &adev->gfx.gfx_ring[0];
4248         rb_bufsz = order_base_2(ring->ring_size / 8);
4249         tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
4250         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
4251         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MTYPE, 3);
4252         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MIN_IB_AVAILSZ, 1);
4253 #ifdef __BIG_ENDIAN
4254         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
4255 #endif
4256         WREG32(mmCP_RB0_CNTL, tmp);
4257
4258         /* Initialize the ring buffer's read and write pointers */
4259         WREG32(mmCP_RB0_CNTL, tmp | CP_RB0_CNTL__RB_RPTR_WR_ENA_MASK);
4260         ring->wptr = 0;
4261         WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
4262
4263         /* set the wb address whether it's enabled or not */
4264         rptr_addr = ring->rptr_gpu_addr;
4265         WREG32(mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
4266         WREG32(mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 0xFF);
4267
4268         wptr_gpu_addr = ring->wptr_gpu_addr;
4269         WREG32(mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr));
4270         WREG32(mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr));
4271         mdelay(1);
4272         WREG32(mmCP_RB0_CNTL, tmp);
4273
4274         rb_addr = ring->gpu_addr >> 8;
4275         WREG32(mmCP_RB0_BASE, rb_addr);
4276         WREG32(mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
4277
4278         gfx_v8_0_set_cpg_door_bell(adev, ring);
4279         /* start the ring */
4280         amdgpu_ring_clear_ring(ring);
4281         gfx_v8_0_cp_gfx_start(adev);
4282
4283         return 0;
4284 }
4285
4286 static void gfx_v8_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
4287 {
4288         if (enable) {
4289                 WREG32(mmCP_MEC_CNTL, 0);
4290         } else {
4291                 WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
4292                 adev->gfx.kiq[0].ring.sched.ready = false;
4293         }
4294         udelay(50);
4295 }
4296
4297 /* KIQ functions */
4298 static void gfx_v8_0_kiq_setting(struct amdgpu_ring *ring)
4299 {
4300         uint32_t tmp;
4301         struct amdgpu_device *adev = ring->adev;
4302
4303         /* tell RLC which is KIQ queue */
4304         tmp = RREG32(mmRLC_CP_SCHEDULERS);
4305         tmp &= 0xffffff00;
4306         tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
4307         WREG32(mmRLC_CP_SCHEDULERS, tmp);
4308         tmp |= 0x80;
4309         WREG32(mmRLC_CP_SCHEDULERS, tmp);
4310 }
4311
4312 static int gfx_v8_0_kiq_kcq_enable(struct amdgpu_device *adev)
4313 {
4314         struct amdgpu_ring *kiq_ring = &adev->gfx.kiq[0].ring;
4315         uint64_t queue_mask = 0;
4316         int r, i;
4317
4318         for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) {
4319                 if (!test_bit(i, adev->gfx.mec_bitmap[0].queue_bitmap))
4320                         continue;
4321
4322                 /* This situation may be hit in the future if a new HW
4323                  * generation exposes more than 64 queues. If so, the
4324                  * definition of queue_mask needs updating */
4325                 if (WARN_ON(i >= (sizeof(queue_mask)*8))) {
4326                         DRM_ERROR("Invalid KCQ enabled: %d\n", i);
4327                         break;
4328                 }
4329
4330                 queue_mask |= (1ull << i);
4331         }
4332
4333         r = amdgpu_ring_alloc(kiq_ring, (8 * adev->gfx.num_compute_rings) + 8);
4334         if (r) {
4335                 DRM_ERROR("Failed to lock KIQ (%d).\n", r);
4336                 return r;
4337         }
4338         /* set resources */
4339         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
4340         amdgpu_ring_write(kiq_ring, 0); /* vmid_mask:0 queue_type:0 (KIQ) */
4341         amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask)); /* queue mask lo */
4342         amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask)); /* queue mask hi */
4343         amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */
4344         amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */
4345         amdgpu_ring_write(kiq_ring, 0); /* oac mask */
4346         amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */
4347         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4348                 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4349                 uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
4350                 uint64_t wptr_addr = ring->wptr_gpu_addr;
4351
4352                 /* map queues */
4353                 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
4354                 /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
4355                 amdgpu_ring_write(kiq_ring,
4356                                   PACKET3_MAP_QUEUES_NUM_QUEUES(1));
4357                 amdgpu_ring_write(kiq_ring,
4358                                   PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index) |
4359                                   PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
4360                                   PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
4361                                   PACKET3_MAP_QUEUES_ME(ring->me == 1 ? 0 : 1)); /* doorbell */
4362                 amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
4363                 amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
4364                 amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
4365                 amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
4366         }
4367
4368         amdgpu_ring_commit(kiq_ring);
4369
4370         return 0;
4371 }
4372
4373 static int gfx_v8_0_deactivate_hqd(struct amdgpu_device *adev, u32 req)
4374 {
4375         int i, r = 0;
4376
4377         if (RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK) {
4378                 WREG32_FIELD(CP_HQD_DEQUEUE_REQUEST, DEQUEUE_REQ, req);
4379                 for (i = 0; i < adev->usec_timeout; i++) {
4380                         if (!(RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK))
4381                                 break;
4382                         udelay(1);
4383                 }
4384                 if (i == adev->usec_timeout)
4385                         r = -ETIMEDOUT;
4386         }
4387         WREG32(mmCP_HQD_DEQUEUE_REQUEST, 0);
4388         WREG32(mmCP_HQD_PQ_RPTR, 0);
4389         WREG32(mmCP_HQD_PQ_WPTR, 0);
4390
4391         return r;
4392 }
4393
4394 static void gfx_v8_0_mqd_set_priority(struct amdgpu_ring *ring, struct vi_mqd *mqd)
4395 {
4396         struct amdgpu_device *adev = ring->adev;
4397
4398         if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
4399                 if (amdgpu_gfx_is_high_priority_compute_queue(adev, ring)) {
4400                         mqd->cp_hqd_pipe_priority = AMDGPU_GFX_PIPE_PRIO_HIGH;
4401                         mqd->cp_hqd_queue_priority =
4402                                 AMDGPU_GFX_QUEUE_PRIORITY_MAXIMUM;
4403                 }
4404         }
4405 }
4406
4407 static int gfx_v8_0_mqd_init(struct amdgpu_ring *ring)
4408 {
4409         struct amdgpu_device *adev = ring->adev;
4410         struct vi_mqd *mqd = ring->mqd_ptr;
4411         uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
4412         uint32_t tmp;
4413
4414         mqd->header = 0xC0310800;
4415         mqd->compute_pipelinestat_enable = 0x00000001;
4416         mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
4417         mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
4418         mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
4419         mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
4420         mqd->compute_misc_reserved = 0x00000003;
4421         mqd->dynamic_cu_mask_addr_lo = lower_32_bits(ring->mqd_gpu_addr
4422                                                      + offsetof(struct vi_mqd_allocation, dynamic_cu_mask));
4423         mqd->dynamic_cu_mask_addr_hi = upper_32_bits(ring->mqd_gpu_addr
4424                                                      + offsetof(struct vi_mqd_allocation, dynamic_cu_mask));
4425         eop_base_addr = ring->eop_gpu_addr >> 8;
4426         mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
4427         mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
4428
4429         /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4430         tmp = RREG32(mmCP_HQD_EOP_CONTROL);
4431         tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
4432                         (order_base_2(GFX8_MEC_HPD_SIZE / 4) - 1));
4433
4434         mqd->cp_hqd_eop_control = tmp;
4435
4436         /* enable doorbell? */
4437         tmp = REG_SET_FIELD(RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL),
4438                             CP_HQD_PQ_DOORBELL_CONTROL,
4439                             DOORBELL_EN,
4440                             ring->use_doorbell ? 1 : 0);
4441
4442         mqd->cp_hqd_pq_doorbell_control = tmp;
4443
4444         /* set the pointer to the MQD */
4445         mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
4446         mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
4447
4448         /* set MQD vmid to 0 */
4449         tmp = RREG32(mmCP_MQD_CONTROL);
4450         tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
4451         mqd->cp_mqd_control = tmp;
4452
4453         /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4454         hqd_gpu_addr = ring->gpu_addr >> 8;
4455         mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
4456         mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4457
4458         /* set up the HQD, this is similar to CP_RB0_CNTL */
4459         tmp = RREG32(mmCP_HQD_PQ_CONTROL);
4460         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
4461                             (order_base_2(ring->ring_size / 4) - 1));
4462         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
4463                         (order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1));
4464 #ifdef __BIG_ENDIAN
4465         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
4466 #endif
4467         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
4468         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
4469         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
4470         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
4471         mqd->cp_hqd_pq_control = tmp;
4472
4473         /* set the wb address whether it's enabled or not */
4474         wb_gpu_addr = ring->rptr_gpu_addr;
4475         mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
4476         mqd->cp_hqd_pq_rptr_report_addr_hi =
4477                 upper_32_bits(wb_gpu_addr) & 0xffff;
4478
4479         /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
4480         wb_gpu_addr = ring->wptr_gpu_addr;
4481         mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
4482         mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4483
4484         tmp = 0;
4485         /* enable the doorbell if requested */
4486         if (ring->use_doorbell) {
4487                 tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
4488                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4489                                 DOORBELL_OFFSET, ring->doorbell_index);
4490
4491                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4492                                          DOORBELL_EN, 1);
4493                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4494                                          DOORBELL_SOURCE, 0);
4495                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4496                                          DOORBELL_HIT, 0);
4497         }
4498
4499         mqd->cp_hqd_pq_doorbell_control = tmp;
4500
4501         /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4502         ring->wptr = 0;
4503         mqd->cp_hqd_pq_wptr = ring->wptr;
4504         mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
4505
4506         /* set the vmid for the queue */
4507         mqd->cp_hqd_vmid = 0;
4508
4509         tmp = RREG32(mmCP_HQD_PERSISTENT_STATE);
4510         tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
4511         mqd->cp_hqd_persistent_state = tmp;
4512
4513         /* set MTYPE */
4514         tmp = RREG32(mmCP_HQD_IB_CONTROL);
4515         tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
4516         tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MTYPE, 3);
4517         mqd->cp_hqd_ib_control = tmp;
4518
4519         tmp = RREG32(mmCP_HQD_IQ_TIMER);
4520         tmp = REG_SET_FIELD(tmp, CP_HQD_IQ_TIMER, MTYPE, 3);
4521         mqd->cp_hqd_iq_timer = tmp;
4522
4523         tmp = RREG32(mmCP_HQD_CTX_SAVE_CONTROL);
4524         tmp = REG_SET_FIELD(tmp, CP_HQD_CTX_SAVE_CONTROL, MTYPE, 3);
4525         mqd->cp_hqd_ctx_save_control = tmp;
4526
4527         /* defaults */
4528         mqd->cp_hqd_eop_rptr = RREG32(mmCP_HQD_EOP_RPTR);
4529         mqd->cp_hqd_eop_wptr = RREG32(mmCP_HQD_EOP_WPTR);
4530         mqd->cp_hqd_ctx_save_base_addr_lo = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_LO);
4531         mqd->cp_hqd_ctx_save_base_addr_hi = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_HI);
4532         mqd->cp_hqd_cntl_stack_offset = RREG32(mmCP_HQD_CNTL_STACK_OFFSET);
4533         mqd->cp_hqd_cntl_stack_size = RREG32(mmCP_HQD_CNTL_STACK_SIZE);
4534         mqd->cp_hqd_wg_state_offset = RREG32(mmCP_HQD_WG_STATE_OFFSET);
4535         mqd->cp_hqd_ctx_save_size = RREG32(mmCP_HQD_CTX_SAVE_SIZE);
4536         mqd->cp_hqd_eop_done_events = RREG32(mmCP_HQD_EOP_EVENTS);
4537         mqd->cp_hqd_error = RREG32(mmCP_HQD_ERROR);
4538         mqd->cp_hqd_eop_wptr_mem = RREG32(mmCP_HQD_EOP_WPTR_MEM);
4539         mqd->cp_hqd_eop_dones = RREG32(mmCP_HQD_EOP_DONES);
4540
4541         /* set static priority for a queue/ring */
4542         gfx_v8_0_mqd_set_priority(ring, mqd);
4543         mqd->cp_hqd_quantum = RREG32(mmCP_HQD_QUANTUM);
4544
4545         /* map_queues packet doesn't need activate the queue,
4546          * so only kiq need set this field.
4547          */
4548         if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)
4549                 mqd->cp_hqd_active = 1;
4550
4551         return 0;
4552 }
4553
4554 static int gfx_v8_0_mqd_commit(struct amdgpu_device *adev,
4555                         struct vi_mqd *mqd)
4556 {
4557         uint32_t mqd_reg;
4558         uint32_t *mqd_data;
4559
4560         /* HQD registers extend from mmCP_MQD_BASE_ADDR to mmCP_HQD_ERROR */
4561         mqd_data = &mqd->cp_mqd_base_addr_lo;
4562
4563         /* disable wptr polling */
4564         WREG32_FIELD(CP_PQ_WPTR_POLL_CNTL, EN, 0);
4565
4566         /* program all HQD registers */
4567         for (mqd_reg = mmCP_HQD_VMID; mqd_reg <= mmCP_HQD_EOP_CONTROL; mqd_reg++)
4568                 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4569
4570         /* Tonga errata: EOP RPTR/WPTR should be left unmodified.
4571          * This is safe since EOP RPTR==WPTR for any inactive HQD
4572          * on ASICs that do not support context-save.
4573          * EOP writes/reads can start anywhere in the ring.
4574          */
4575         if (adev->asic_type != CHIP_TONGA) {
4576                 WREG32(mmCP_HQD_EOP_RPTR, mqd->cp_hqd_eop_rptr);
4577                 WREG32(mmCP_HQD_EOP_WPTR, mqd->cp_hqd_eop_wptr);
4578                 WREG32(mmCP_HQD_EOP_WPTR_MEM, mqd->cp_hqd_eop_wptr_mem);
4579         }
4580
4581         for (mqd_reg = mmCP_HQD_EOP_EVENTS; mqd_reg <= mmCP_HQD_ERROR; mqd_reg++)
4582                 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4583
4584         /* activate the HQD */
4585         for (mqd_reg = mmCP_MQD_BASE_ADDR; mqd_reg <= mmCP_HQD_ACTIVE; mqd_reg++)
4586                 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4587
4588         return 0;
4589 }
4590
4591 static int gfx_v8_0_kiq_init_queue(struct amdgpu_ring *ring)
4592 {
4593         struct amdgpu_device *adev = ring->adev;
4594         struct vi_mqd *mqd = ring->mqd_ptr;
4595
4596         gfx_v8_0_kiq_setting(ring);
4597
4598         if (amdgpu_in_reset(adev)) { /* for GPU_RESET case */
4599                 /* reset MQD to a clean status */
4600                 if (adev->gfx.kiq[0].mqd_backup)
4601                         memcpy(mqd, adev->gfx.kiq[0].mqd_backup, sizeof(struct vi_mqd_allocation));
4602
4603                 /* reset ring buffer */
4604                 ring->wptr = 0;
4605                 amdgpu_ring_clear_ring(ring);
4606                 mutex_lock(&adev->srbm_mutex);
4607                 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4608                 gfx_v8_0_mqd_commit(adev, mqd);
4609                 vi_srbm_select(adev, 0, 0, 0, 0);
4610                 mutex_unlock(&adev->srbm_mutex);
4611         } else {
4612                 memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation));
4613                 ((struct vi_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
4614                 ((struct vi_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
4615                 if (amdgpu_sriov_vf(adev) && adev->in_suspend)
4616                         amdgpu_ring_clear_ring(ring);
4617                 mutex_lock(&adev->srbm_mutex);
4618                 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4619                 gfx_v8_0_mqd_init(ring);
4620                 gfx_v8_0_mqd_commit(adev, mqd);
4621                 vi_srbm_select(adev, 0, 0, 0, 0);
4622                 mutex_unlock(&adev->srbm_mutex);
4623
4624                 if (adev->gfx.kiq[0].mqd_backup)
4625                         memcpy(adev->gfx.kiq[0].mqd_backup, mqd, sizeof(struct vi_mqd_allocation));
4626         }
4627
4628         return 0;
4629 }
4630
4631 static int gfx_v8_0_kcq_init_queue(struct amdgpu_ring *ring)
4632 {
4633         struct amdgpu_device *adev = ring->adev;
4634         struct vi_mqd *mqd = ring->mqd_ptr;
4635         int mqd_idx = ring - &adev->gfx.compute_ring[0];
4636
4637         if (!amdgpu_in_reset(adev) && !adev->in_suspend) {
4638                 memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation));
4639                 ((struct vi_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
4640                 ((struct vi_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
4641                 mutex_lock(&adev->srbm_mutex);
4642                 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4643                 gfx_v8_0_mqd_init(ring);
4644                 vi_srbm_select(adev, 0, 0, 0, 0);
4645                 mutex_unlock(&adev->srbm_mutex);
4646
4647                 if (adev->gfx.mec.mqd_backup[mqd_idx])
4648                         memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation));
4649         } else {
4650                 /* restore MQD to a clean status */
4651                 if (adev->gfx.mec.mqd_backup[mqd_idx])
4652                         memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation));
4653                 /* reset ring buffer */
4654                 ring->wptr = 0;
4655                 amdgpu_ring_clear_ring(ring);
4656         }
4657         return 0;
4658 }
4659
4660 static void gfx_v8_0_set_mec_doorbell_range(struct amdgpu_device *adev)
4661 {
4662         if (adev->asic_type > CHIP_TONGA) {
4663                 WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER, adev->doorbell_index.kiq << 2);
4664                 WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER, adev->doorbell_index.mec_ring7 << 2);
4665         }
4666         /* enable doorbells */
4667         WREG32_FIELD(CP_PQ_STATUS, DOORBELL_ENABLE, 1);
4668 }
4669
4670 static int gfx_v8_0_kiq_resume(struct amdgpu_device *adev)
4671 {
4672         struct amdgpu_ring *ring;
4673         int r;
4674
4675         ring = &adev->gfx.kiq[0].ring;
4676
4677         r = amdgpu_bo_reserve(ring->mqd_obj, false);
4678         if (unlikely(r != 0))
4679                 return r;
4680
4681         r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr);
4682         if (unlikely(r != 0)) {
4683                 amdgpu_bo_unreserve(ring->mqd_obj);
4684                 return r;
4685         }
4686
4687         gfx_v8_0_kiq_init_queue(ring);
4688         amdgpu_bo_kunmap(ring->mqd_obj);
4689         ring->mqd_ptr = NULL;
4690         amdgpu_bo_unreserve(ring->mqd_obj);
4691         return 0;
4692 }
4693
4694 static int gfx_v8_0_kcq_resume(struct amdgpu_device *adev)
4695 {
4696         struct amdgpu_ring *ring = NULL;
4697         int r = 0, i;
4698
4699         gfx_v8_0_cp_compute_enable(adev, true);
4700
4701         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4702                 ring = &adev->gfx.compute_ring[i];
4703
4704                 r = amdgpu_bo_reserve(ring->mqd_obj, false);
4705                 if (unlikely(r != 0))
4706                         goto done;
4707                 r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr);
4708                 if (!r) {
4709                         r = gfx_v8_0_kcq_init_queue(ring);
4710                         amdgpu_bo_kunmap(ring->mqd_obj);
4711                         ring->mqd_ptr = NULL;
4712                 }
4713                 amdgpu_bo_unreserve(ring->mqd_obj);
4714                 if (r)
4715                         goto done;
4716         }
4717
4718         gfx_v8_0_set_mec_doorbell_range(adev);
4719
4720         r = gfx_v8_0_kiq_kcq_enable(adev);
4721         if (r)
4722                 goto done;
4723
4724 done:
4725         return r;
4726 }
4727
4728 static int gfx_v8_0_cp_test_all_rings(struct amdgpu_device *adev)
4729 {
4730         int r, i;
4731         struct amdgpu_ring *ring;
4732
4733         /* collect all the ring_tests here, gfx, kiq, compute */
4734         ring = &adev->gfx.gfx_ring[0];
4735         r = amdgpu_ring_test_helper(ring);
4736         if (r)
4737                 return r;
4738
4739         ring = &adev->gfx.kiq[0].ring;
4740         r = amdgpu_ring_test_helper(ring);
4741         if (r)
4742                 return r;
4743
4744         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4745                 ring = &adev->gfx.compute_ring[i];
4746                 amdgpu_ring_test_helper(ring);
4747         }
4748
4749         return 0;
4750 }
4751
4752 static int gfx_v8_0_cp_resume(struct amdgpu_device *adev)
4753 {
4754         int r;
4755
4756         if (!(adev->flags & AMD_IS_APU))
4757                 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
4758
4759         r = gfx_v8_0_kiq_resume(adev);
4760         if (r)
4761                 return r;
4762
4763         r = gfx_v8_0_cp_gfx_resume(adev);
4764         if (r)
4765                 return r;
4766
4767         r = gfx_v8_0_kcq_resume(adev);
4768         if (r)
4769                 return r;
4770
4771         r = gfx_v8_0_cp_test_all_rings(adev);
4772         if (r)
4773                 return r;
4774
4775         gfx_v8_0_enable_gui_idle_interrupt(adev, true);
4776
4777         return 0;
4778 }
4779
4780 static void gfx_v8_0_cp_enable(struct amdgpu_device *adev, bool enable)
4781 {
4782         gfx_v8_0_cp_gfx_enable(adev, enable);
4783         gfx_v8_0_cp_compute_enable(adev, enable);
4784 }
4785
4786 static int gfx_v8_0_hw_init(struct amdgpu_ip_block *ip_block)
4787 {
4788         int r;
4789         struct amdgpu_device *adev = ip_block->adev;
4790
4791         gfx_v8_0_init_golden_registers(adev);
4792         gfx_v8_0_constants_init(adev);
4793
4794         r = adev->gfx.rlc.funcs->resume(adev);
4795         if (r)
4796                 return r;
4797
4798         r = gfx_v8_0_cp_resume(adev);
4799
4800         return r;
4801 }
4802
4803 static int gfx_v8_0_kcq_disable(struct amdgpu_device *adev)
4804 {
4805         int r, i;
4806         struct amdgpu_ring *kiq_ring = &adev->gfx.kiq[0].ring;
4807
4808         r = amdgpu_ring_alloc(kiq_ring, 6 * adev->gfx.num_compute_rings);
4809         if (r)
4810                 DRM_ERROR("Failed to lock KIQ (%d).\n", r);
4811
4812         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4813                 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4814
4815                 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
4816                 amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
4817                                                 PACKET3_UNMAP_QUEUES_ACTION(1) | /* RESET_QUEUES */
4818                                                 PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) |
4819                                                 PACKET3_UNMAP_QUEUES_ENGINE_SEL(0) |
4820                                                 PACKET3_UNMAP_QUEUES_NUM_QUEUES(1));
4821                 amdgpu_ring_write(kiq_ring, PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
4822                 amdgpu_ring_write(kiq_ring, 0);
4823                 amdgpu_ring_write(kiq_ring, 0);
4824                 amdgpu_ring_write(kiq_ring, 0);
4825         }
4826         /* Submit unmap queue packet */
4827         amdgpu_ring_commit(kiq_ring);
4828         /*
4829          * Ring test will do a basic scratch register change check. Just run
4830          * this to ensure that unmap queues that is submitted before got
4831          * processed successfully before returning.
4832          */
4833         r = amdgpu_ring_test_helper(kiq_ring);
4834         if (r)
4835                 DRM_ERROR("KCQ disable failed\n");
4836
4837         return r;
4838 }
4839
4840 static bool gfx_v8_0_is_idle(void *handle)
4841 {
4842         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4843
4844         if (REG_GET_FIELD(RREG32(mmGRBM_STATUS), GRBM_STATUS, GUI_ACTIVE)
4845                 || RREG32(mmGRBM_STATUS2) != 0x8)
4846                 return false;
4847         else
4848                 return true;
4849 }
4850
4851 static bool gfx_v8_0_rlc_is_idle(void *handle)
4852 {
4853         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4854
4855         if (RREG32(mmGRBM_STATUS2) != 0x8)
4856                 return false;
4857         else
4858                 return true;
4859 }
4860
4861 static int gfx_v8_0_wait_for_rlc_idle(void *handle)
4862 {
4863         unsigned int i;
4864         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4865
4866         for (i = 0; i < adev->usec_timeout; i++) {
4867                 if (gfx_v8_0_rlc_is_idle(handle))
4868                         return 0;
4869
4870                 udelay(1);
4871         }
4872         return -ETIMEDOUT;
4873 }
4874
4875 static int gfx_v8_0_wait_for_idle(struct amdgpu_ip_block *ip_block)
4876 {
4877         unsigned int i;
4878         struct amdgpu_device *adev = ip_block->adev;
4879
4880         for (i = 0; i < adev->usec_timeout; i++) {
4881                 if (gfx_v8_0_is_idle(adev))
4882                         return 0;
4883
4884                 udelay(1);
4885         }
4886         return -ETIMEDOUT;
4887 }
4888
4889 static int gfx_v8_0_hw_fini(struct amdgpu_ip_block *ip_block)
4890 {
4891         struct amdgpu_device *adev = ip_block->adev;
4892
4893         amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
4894         amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
4895
4896         amdgpu_irq_put(adev, &adev->gfx.cp_ecc_error_irq, 0);
4897
4898         amdgpu_irq_put(adev, &adev->gfx.sq_irq, 0);
4899
4900         /* disable KCQ to avoid CPC touch memory not valid anymore */
4901         gfx_v8_0_kcq_disable(adev);
4902
4903         if (amdgpu_sriov_vf(adev)) {
4904                 pr_debug("For SRIOV client, shouldn't do anything.\n");
4905                 return 0;
4906         }
4907
4908         amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
4909         if (!gfx_v8_0_wait_for_idle(ip_block))
4910                 gfx_v8_0_cp_enable(adev, false);
4911         else
4912                 pr_err("cp is busy, skip halt cp\n");
4913         if (!gfx_v8_0_wait_for_rlc_idle(adev))
4914                 adev->gfx.rlc.funcs->stop(adev);
4915         else
4916                 pr_err("rlc is busy, skip halt rlc\n");
4917         amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
4918
4919         return 0;
4920 }
4921
4922 static int gfx_v8_0_suspend(struct amdgpu_ip_block *ip_block)
4923 {
4924         return gfx_v8_0_hw_fini(ip_block);
4925 }
4926
4927 static int gfx_v8_0_resume(struct amdgpu_ip_block *ip_block)
4928 {
4929         return gfx_v8_0_hw_init(ip_block);
4930 }
4931
4932 static bool gfx_v8_0_check_soft_reset(struct amdgpu_ip_block *ip_block)
4933 {
4934         struct amdgpu_device *adev = ip_block->adev;
4935         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
4936         u32 tmp;
4937
4938         /* GRBM_STATUS */
4939         tmp = RREG32(mmGRBM_STATUS);
4940         if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
4941                    GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
4942                    GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
4943                    GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
4944                    GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
4945                    GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK |
4946                    GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
4947                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4948                                                 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
4949                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4950                                                 GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
4951                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
4952                                                 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
4953         }
4954
4955         /* GRBM_STATUS2 */
4956         tmp = RREG32(mmGRBM_STATUS2);
4957         if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
4958                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4959                                                 GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
4960
4961         if (REG_GET_FIELD(tmp, GRBM_STATUS2, CPF_BUSY) ||
4962             REG_GET_FIELD(tmp, GRBM_STATUS2, CPC_BUSY) ||
4963             REG_GET_FIELD(tmp, GRBM_STATUS2, CPG_BUSY)) {
4964                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
4965                                                 SOFT_RESET_CPF, 1);
4966                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
4967                                                 SOFT_RESET_CPC, 1);
4968                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
4969                                                 SOFT_RESET_CPG, 1);
4970                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET,
4971                                                 SOFT_RESET_GRBM, 1);
4972         }
4973
4974         /* SRBM_STATUS */
4975         tmp = RREG32(mmSRBM_STATUS);
4976         if (REG_GET_FIELD(tmp, SRBM_STATUS, GRBM_RQ_PENDING))
4977                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
4978                                                 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
4979         if (REG_GET_FIELD(tmp, SRBM_STATUS, SEM_BUSY))
4980                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
4981                                                 SRBM_SOFT_RESET, SOFT_RESET_SEM, 1);
4982
4983         if (grbm_soft_reset || srbm_soft_reset) {
4984                 adev->gfx.grbm_soft_reset = grbm_soft_reset;
4985                 adev->gfx.srbm_soft_reset = srbm_soft_reset;
4986                 return true;
4987         } else {
4988                 adev->gfx.grbm_soft_reset = 0;
4989                 adev->gfx.srbm_soft_reset = 0;
4990                 return false;
4991         }
4992 }
4993
4994 static int gfx_v8_0_pre_soft_reset(struct amdgpu_ip_block *ip_block)
4995 {
4996         struct amdgpu_device *adev = ip_block->adev;
4997         u32 grbm_soft_reset = 0;
4998
4999         if ((!adev->gfx.grbm_soft_reset) &&
5000             (!adev->gfx.srbm_soft_reset))
5001                 return 0;
5002
5003         grbm_soft_reset = adev->gfx.grbm_soft_reset;
5004
5005         /* stop the rlc */
5006         adev->gfx.rlc.funcs->stop(adev);
5007
5008         if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5009             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
5010                 /* Disable GFX parsing/prefetching */
5011                 gfx_v8_0_cp_gfx_enable(adev, false);
5012
5013         if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5014             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
5015             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
5016             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
5017                 int i;
5018
5019                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5020                         struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5021
5022                         mutex_lock(&adev->srbm_mutex);
5023                         vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5024                         gfx_v8_0_deactivate_hqd(adev, 2);
5025                         vi_srbm_select(adev, 0, 0, 0, 0);
5026                         mutex_unlock(&adev->srbm_mutex);
5027                 }
5028                 /* Disable MEC parsing/prefetching */
5029                 gfx_v8_0_cp_compute_enable(adev, false);
5030         }
5031
5032         return 0;
5033 }
5034
5035 static int gfx_v8_0_soft_reset(struct amdgpu_ip_block *ip_block)
5036 {
5037         struct amdgpu_device *adev = ip_block->adev;
5038         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5039         u32 tmp;
5040
5041         if ((!adev->gfx.grbm_soft_reset) &&
5042             (!adev->gfx.srbm_soft_reset))
5043                 return 0;
5044
5045         grbm_soft_reset = adev->gfx.grbm_soft_reset;
5046         srbm_soft_reset = adev->gfx.srbm_soft_reset;
5047
5048         if (grbm_soft_reset || srbm_soft_reset) {
5049                 tmp = RREG32(mmGMCON_DEBUG);
5050                 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 1);
5051                 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 1);
5052                 WREG32(mmGMCON_DEBUG, tmp);
5053                 udelay(50);
5054         }
5055
5056         if (grbm_soft_reset) {
5057                 tmp = RREG32(mmGRBM_SOFT_RESET);
5058                 tmp |= grbm_soft_reset;
5059                 dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
5060                 WREG32(mmGRBM_SOFT_RESET, tmp);
5061                 tmp = RREG32(mmGRBM_SOFT_RESET);
5062
5063                 udelay(50);
5064
5065                 tmp &= ~grbm_soft_reset;
5066                 WREG32(mmGRBM_SOFT_RESET, tmp);
5067                 tmp = RREG32(mmGRBM_SOFT_RESET);
5068         }
5069
5070         if (srbm_soft_reset) {
5071                 tmp = RREG32(mmSRBM_SOFT_RESET);
5072                 tmp |= srbm_soft_reset;
5073                 dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
5074                 WREG32(mmSRBM_SOFT_RESET, tmp);
5075                 tmp = RREG32(mmSRBM_SOFT_RESET);
5076
5077                 udelay(50);
5078
5079                 tmp &= ~srbm_soft_reset;
5080                 WREG32(mmSRBM_SOFT_RESET, tmp);
5081                 tmp = RREG32(mmSRBM_SOFT_RESET);
5082         }
5083
5084         if (grbm_soft_reset || srbm_soft_reset) {
5085                 tmp = RREG32(mmGMCON_DEBUG);
5086                 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 0);
5087                 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 0);
5088                 WREG32(mmGMCON_DEBUG, tmp);
5089         }
5090
5091         /* Wait a little for things to settle down */
5092         udelay(50);
5093
5094         return 0;
5095 }
5096
5097 static int gfx_v8_0_post_soft_reset(struct amdgpu_ip_block *ip_block)
5098 {
5099         struct amdgpu_device *adev = ip_block->adev;
5100         u32 grbm_soft_reset = 0;
5101
5102         if ((!adev->gfx.grbm_soft_reset) &&
5103             (!adev->gfx.srbm_soft_reset))
5104                 return 0;
5105
5106         grbm_soft_reset = adev->gfx.grbm_soft_reset;
5107
5108         if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5109             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
5110             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
5111             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
5112                 int i;
5113
5114                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5115                         struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5116
5117                         mutex_lock(&adev->srbm_mutex);
5118                         vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5119                         gfx_v8_0_deactivate_hqd(adev, 2);
5120                         vi_srbm_select(adev, 0, 0, 0, 0);
5121                         mutex_unlock(&adev->srbm_mutex);
5122                 }
5123                 gfx_v8_0_kiq_resume(adev);
5124                 gfx_v8_0_kcq_resume(adev);
5125         }
5126
5127         if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5128             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
5129                 gfx_v8_0_cp_gfx_resume(adev);
5130
5131         gfx_v8_0_cp_test_all_rings(adev);
5132
5133         adev->gfx.rlc.funcs->start(adev);
5134
5135         return 0;
5136 }
5137
5138 /**
5139  * gfx_v8_0_get_gpu_clock_counter - return GPU clock counter snapshot
5140  *
5141  * @adev: amdgpu_device pointer
5142  *
5143  * Fetches a GPU clock counter snapshot.
5144  * Returns the 64 bit clock counter snapshot.
5145  */
5146 static uint64_t gfx_v8_0_get_gpu_clock_counter(struct amdgpu_device *adev)
5147 {
5148         uint64_t clock;
5149
5150         mutex_lock(&adev->gfx.gpu_clock_mutex);
5151         WREG32(mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
5152         clock = (uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_LSB) |
5153                 ((uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
5154         mutex_unlock(&adev->gfx.gpu_clock_mutex);
5155         return clock;
5156 }
5157
5158 static void gfx_v8_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
5159                                           uint32_t vmid,
5160                                           uint32_t gds_base, uint32_t gds_size,
5161                                           uint32_t gws_base, uint32_t gws_size,
5162                                           uint32_t oa_base, uint32_t oa_size)
5163 {
5164         /* GDS Base */
5165         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5166         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5167                                 WRITE_DATA_DST_SEL(0)));
5168         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_base);
5169         amdgpu_ring_write(ring, 0);
5170         amdgpu_ring_write(ring, gds_base);
5171
5172         /* GDS Size */
5173         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5174         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5175                                 WRITE_DATA_DST_SEL(0)));
5176         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_size);
5177         amdgpu_ring_write(ring, 0);
5178         amdgpu_ring_write(ring, gds_size);
5179
5180         /* GWS */
5181         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5182         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5183                                 WRITE_DATA_DST_SEL(0)));
5184         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].gws);
5185         amdgpu_ring_write(ring, 0);
5186         amdgpu_ring_write(ring, gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
5187
5188         /* OA */
5189         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5190         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5191                                 WRITE_DATA_DST_SEL(0)));
5192         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].oa);
5193         amdgpu_ring_write(ring, 0);
5194         amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << oa_base));
5195 }
5196
5197 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
5198 {
5199         WREG32(mmSQ_IND_INDEX,
5200                 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
5201                 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
5202                 (address << SQ_IND_INDEX__INDEX__SHIFT) |
5203                 (SQ_IND_INDEX__FORCE_READ_MASK));
5204         return RREG32(mmSQ_IND_DATA);
5205 }
5206
5207 static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
5208                            uint32_t wave, uint32_t thread,
5209                            uint32_t regno, uint32_t num, uint32_t *out)
5210 {
5211         WREG32(mmSQ_IND_INDEX,
5212                 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
5213                 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
5214                 (regno << SQ_IND_INDEX__INDEX__SHIFT) |
5215                 (thread << SQ_IND_INDEX__THREAD_ID__SHIFT) |
5216                 (SQ_IND_INDEX__FORCE_READ_MASK) |
5217                 (SQ_IND_INDEX__AUTO_INCR_MASK));
5218         while (num--)
5219                 *(out++) = RREG32(mmSQ_IND_DATA);
5220 }
5221
5222 static void gfx_v8_0_read_wave_data(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
5223 {
5224         /* type 0 wave data */
5225         dst[(*no_fields)++] = 0;
5226         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS);
5227         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO);
5228         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI);
5229         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO);
5230         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI);
5231         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID);
5232         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0);
5233         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1);
5234         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC);
5235         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC);
5236         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS);
5237         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS);
5238         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_LO);
5239         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_HI);
5240         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_LO);
5241         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_HI);
5242         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0);
5243         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0);
5244         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_MODE);
5245 }
5246
5247 static void gfx_v8_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd,
5248                                      uint32_t wave, uint32_t start,
5249                                      uint32_t size, uint32_t *dst)
5250 {
5251         wave_read_regs(
5252                 adev, simd, wave, 0,
5253                 start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
5254 }
5255
5256
5257 static const struct amdgpu_gfx_funcs gfx_v8_0_gfx_funcs = {
5258         .get_gpu_clock_counter = &gfx_v8_0_get_gpu_clock_counter,
5259         .select_se_sh = &gfx_v8_0_select_se_sh,
5260         .read_wave_data = &gfx_v8_0_read_wave_data,
5261         .read_wave_sgprs = &gfx_v8_0_read_wave_sgprs,
5262         .select_me_pipe_q = &gfx_v8_0_select_me_pipe_q
5263 };
5264
5265 static int gfx_v8_0_early_init(struct amdgpu_ip_block *ip_block)
5266 {
5267         struct amdgpu_device *adev = ip_block->adev;
5268
5269         adev->gfx.xcc_mask = 1;
5270         adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS;
5271         adev->gfx.num_compute_rings = min(amdgpu_gfx_get_num_kcq(adev),
5272                                           AMDGPU_MAX_COMPUTE_RINGS);
5273         adev->gfx.funcs = &gfx_v8_0_gfx_funcs;
5274         gfx_v8_0_set_ring_funcs(adev);
5275         gfx_v8_0_set_irq_funcs(adev);
5276         gfx_v8_0_set_gds_init(adev);
5277         gfx_v8_0_set_rlc_funcs(adev);
5278
5279         return 0;
5280 }
5281
5282 static int gfx_v8_0_late_init(struct amdgpu_ip_block *ip_block)
5283 {
5284         struct amdgpu_device *adev = ip_block->adev;
5285         int r;
5286
5287         r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
5288         if (r)
5289                 return r;
5290
5291         r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
5292         if (r)
5293                 return r;
5294
5295         /* requires IBs so do in late init after IB pool is initialized */
5296         r = gfx_v8_0_do_edc_gpr_workarounds(adev);
5297         if (r)
5298                 return r;
5299
5300         r = amdgpu_irq_get(adev, &adev->gfx.cp_ecc_error_irq, 0);
5301         if (r) {
5302                 DRM_ERROR("amdgpu_irq_get() failed to get IRQ for EDC, r: %d.\n", r);
5303                 return r;
5304         }
5305
5306         r = amdgpu_irq_get(adev, &adev->gfx.sq_irq, 0);
5307         if (r) {
5308                 DRM_ERROR(
5309                         "amdgpu_irq_get() failed to get IRQ for SQ, r: %d.\n",
5310                         r);
5311                 return r;
5312         }
5313
5314         return 0;
5315 }
5316
5317 static void gfx_v8_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
5318                                                        bool enable)
5319 {
5320         if ((adev->asic_type == CHIP_POLARIS11) ||
5321             (adev->asic_type == CHIP_POLARIS12) ||
5322             (adev->asic_type == CHIP_VEGAM))
5323                 /* Send msg to SMU via Powerplay */
5324                 amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, enable);
5325
5326         WREG32_FIELD(RLC_PG_CNTL, STATIC_PER_CU_PG_ENABLE, enable ? 1 : 0);
5327 }
5328
5329 static void gfx_v8_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
5330                                                         bool enable)
5331 {
5332         WREG32_FIELD(RLC_PG_CNTL, DYN_PER_CU_PG_ENABLE, enable ? 1 : 0);
5333 }
5334
5335 static void polaris11_enable_gfx_quick_mg_power_gating(struct amdgpu_device *adev,
5336                 bool enable)
5337 {
5338         WREG32_FIELD(RLC_PG_CNTL, QUICK_PG_ENABLE, enable ? 1 : 0);
5339 }
5340
5341 static void cz_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
5342                                           bool enable)
5343 {
5344         WREG32_FIELD(RLC_PG_CNTL, GFX_POWER_GATING_ENABLE, enable ? 1 : 0);
5345 }
5346
5347 static void cz_enable_gfx_pipeline_power_gating(struct amdgpu_device *adev,
5348                                                 bool enable)
5349 {
5350         WREG32_FIELD(RLC_PG_CNTL, GFX_PIPELINE_PG_ENABLE, enable ? 1 : 0);
5351
5352         /* Read any GFX register to wake up GFX. */
5353         if (!enable)
5354                 RREG32(mmDB_RENDER_CONTROL);
5355 }
5356
5357 static void cz_update_gfx_cg_power_gating(struct amdgpu_device *adev,
5358                                           bool enable)
5359 {
5360         if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
5361                 cz_enable_gfx_cg_power_gating(adev, true);
5362                 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
5363                         cz_enable_gfx_pipeline_power_gating(adev, true);
5364         } else {
5365                 cz_enable_gfx_cg_power_gating(adev, false);
5366                 cz_enable_gfx_pipeline_power_gating(adev, false);
5367         }
5368 }
5369
5370 static int gfx_v8_0_set_powergating_state(void *handle,
5371                                           enum amd_powergating_state state)
5372 {
5373         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5374         bool enable = (state == AMD_PG_STATE_GATE);
5375
5376         if (amdgpu_sriov_vf(adev))
5377                 return 0;
5378
5379         if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_SMG |
5380                                 AMD_PG_SUPPORT_RLC_SMU_HS |
5381                                 AMD_PG_SUPPORT_CP |
5382                                 AMD_PG_SUPPORT_GFX_DMG))
5383                 amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
5384         switch (adev->asic_type) {
5385         case CHIP_CARRIZO:
5386         case CHIP_STONEY:
5387
5388                 if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
5389                         cz_enable_sck_slow_down_on_power_up(adev, true);
5390                         cz_enable_sck_slow_down_on_power_down(adev, true);
5391                 } else {
5392                         cz_enable_sck_slow_down_on_power_up(adev, false);
5393                         cz_enable_sck_slow_down_on_power_down(adev, false);
5394                 }
5395                 if (adev->pg_flags & AMD_PG_SUPPORT_CP)
5396                         cz_enable_cp_power_gating(adev, true);
5397                 else
5398                         cz_enable_cp_power_gating(adev, false);
5399
5400                 cz_update_gfx_cg_power_gating(adev, enable);
5401
5402                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5403                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5404                 else
5405                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5406
5407                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5408                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5409                 else
5410                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5411                 break;
5412         case CHIP_POLARIS11:
5413         case CHIP_POLARIS12:
5414         case CHIP_VEGAM:
5415                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5416                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5417                 else
5418                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5419
5420                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5421                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5422                 else
5423                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5424
5425                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_QUICK_MG) && enable)
5426                         polaris11_enable_gfx_quick_mg_power_gating(adev, true);
5427                 else
5428                         polaris11_enable_gfx_quick_mg_power_gating(adev, false);
5429                 break;
5430         default:
5431                 break;
5432         }
5433         if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_SMG |
5434                                 AMD_PG_SUPPORT_RLC_SMU_HS |
5435                                 AMD_PG_SUPPORT_CP |
5436                                 AMD_PG_SUPPORT_GFX_DMG))
5437                 amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
5438         return 0;
5439 }
5440
5441 static void gfx_v8_0_get_clockgating_state(void *handle, u64 *flags)
5442 {
5443         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5444         int data;
5445
5446         if (amdgpu_sriov_vf(adev))
5447                 *flags = 0;
5448
5449         /* AMD_CG_SUPPORT_GFX_MGCG */
5450         data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5451         if (!(data & RLC_CGTT_MGCG_OVERRIDE__CPF_MASK))
5452                 *flags |= AMD_CG_SUPPORT_GFX_MGCG;
5453
5454         /* AMD_CG_SUPPORT_GFX_CGLG */
5455         data = RREG32(mmRLC_CGCG_CGLS_CTRL);
5456         if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
5457                 *flags |= AMD_CG_SUPPORT_GFX_CGCG;
5458
5459         /* AMD_CG_SUPPORT_GFX_CGLS */
5460         if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
5461                 *flags |= AMD_CG_SUPPORT_GFX_CGLS;
5462
5463         /* AMD_CG_SUPPORT_GFX_CGTS */
5464         data = RREG32(mmCGTS_SM_CTRL_REG);
5465         if (!(data & CGTS_SM_CTRL_REG__OVERRIDE_MASK))
5466                 *flags |= AMD_CG_SUPPORT_GFX_CGTS;
5467
5468         /* AMD_CG_SUPPORT_GFX_CGTS_LS */
5469         if (!(data & CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK))
5470                 *flags |= AMD_CG_SUPPORT_GFX_CGTS_LS;
5471
5472         /* AMD_CG_SUPPORT_GFX_RLC_LS */
5473         data = RREG32(mmRLC_MEM_SLP_CNTL);
5474         if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK)
5475                 *flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS;
5476
5477         /* AMD_CG_SUPPORT_GFX_CP_LS */
5478         data = RREG32(mmCP_MEM_SLP_CNTL);
5479         if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK)
5480                 *flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS;
5481 }
5482
5483 static void gfx_v8_0_send_serdes_cmd(struct amdgpu_device *adev,
5484                                      uint32_t reg_addr, uint32_t cmd)
5485 {
5486         uint32_t data;
5487
5488         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
5489
5490         WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5491         WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5492
5493         data = RREG32(mmRLC_SERDES_WR_CTRL);
5494         if (adev->asic_type == CHIP_STONEY)
5495                 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5496                           RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5497                           RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5498                           RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5499                           RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5500                           RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5501                           RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5502                           RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5503                           RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5504         else
5505                 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5506                           RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5507                           RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5508                           RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5509                           RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5510                           RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5511                           RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5512                           RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5513                           RLC_SERDES_WR_CTRL__BPM_DATA_MASK |
5514                           RLC_SERDES_WR_CTRL__REG_ADDR_MASK |
5515                           RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5516         data |= (RLC_SERDES_WR_CTRL__RSVD_BPM_ADDR_MASK |
5517                  (cmd << RLC_SERDES_WR_CTRL__BPM_DATA__SHIFT) |
5518                  (reg_addr << RLC_SERDES_WR_CTRL__REG_ADDR__SHIFT) |
5519                  (0xff << RLC_SERDES_WR_CTRL__BPM_ADDR__SHIFT));
5520
5521         WREG32(mmRLC_SERDES_WR_CTRL, data);
5522 }
5523
5524 #define MSG_ENTER_RLC_SAFE_MODE     1
5525 #define MSG_EXIT_RLC_SAFE_MODE      0
5526 #define RLC_GPR_REG2__REQ_MASK 0x00000001
5527 #define RLC_GPR_REG2__REQ__SHIFT 0
5528 #define RLC_GPR_REG2__MESSAGE__SHIFT 0x00000001
5529 #define RLC_GPR_REG2__MESSAGE_MASK 0x0000001e
5530
5531 static bool gfx_v8_0_is_rlc_enabled(struct amdgpu_device *adev)
5532 {
5533         uint32_t rlc_setting;
5534
5535         rlc_setting = RREG32(mmRLC_CNTL);
5536         if (!(rlc_setting & RLC_CNTL__RLC_ENABLE_F32_MASK))
5537                 return false;
5538
5539         return true;
5540 }
5541
5542 static void gfx_v8_0_set_safe_mode(struct amdgpu_device *adev, int xcc_id)
5543 {
5544         uint32_t data;
5545         unsigned i;
5546         data = RREG32(mmRLC_CNTL);
5547         data |= RLC_SAFE_MODE__CMD_MASK;
5548         data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5549         data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
5550         WREG32(mmRLC_SAFE_MODE, data);
5551
5552         /* wait for RLC_SAFE_MODE */
5553         for (i = 0; i < adev->usec_timeout; i++) {
5554                 if ((RREG32(mmRLC_GPM_STAT) &
5555                      (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5556                       RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) ==
5557                     (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5558                      RLC_GPM_STAT__GFX_POWER_STATUS_MASK))
5559                         break;
5560                 udelay(1);
5561         }
5562         for (i = 0; i < adev->usec_timeout; i++) {
5563                 if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
5564                         break;
5565                 udelay(1);
5566         }
5567 }
5568
5569 static void gfx_v8_0_unset_safe_mode(struct amdgpu_device *adev, int xcc_id)
5570 {
5571         uint32_t data;
5572         unsigned i;
5573
5574         data = RREG32(mmRLC_CNTL);
5575         data |= RLC_SAFE_MODE__CMD_MASK;
5576         data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5577         WREG32(mmRLC_SAFE_MODE, data);
5578
5579         for (i = 0; i < adev->usec_timeout; i++) {
5580                 if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
5581                         break;
5582                 udelay(1);
5583         }
5584 }
5585
5586 static void gfx_v8_0_update_spm_vmid(struct amdgpu_device *adev, struct amdgpu_ring *ring, unsigned vmid)
5587 {
5588         u32 data;
5589
5590         amdgpu_gfx_off_ctrl(adev, false);
5591
5592         if (amdgpu_sriov_is_pp_one_vf(adev))
5593                 data = RREG32_NO_KIQ(mmRLC_SPM_VMID);
5594         else
5595                 data = RREG32(mmRLC_SPM_VMID);
5596
5597         data &= ~RLC_SPM_VMID__RLC_SPM_VMID_MASK;
5598         data |= (vmid & RLC_SPM_VMID__RLC_SPM_VMID_MASK) << RLC_SPM_VMID__RLC_SPM_VMID__SHIFT;
5599
5600         if (amdgpu_sriov_is_pp_one_vf(adev))
5601                 WREG32_NO_KIQ(mmRLC_SPM_VMID, data);
5602         else
5603                 WREG32(mmRLC_SPM_VMID, data);
5604
5605         amdgpu_gfx_off_ctrl(adev, true);
5606 }
5607
5608 static const struct amdgpu_rlc_funcs iceland_rlc_funcs = {
5609         .is_rlc_enabled = gfx_v8_0_is_rlc_enabled,
5610         .set_safe_mode = gfx_v8_0_set_safe_mode,
5611         .unset_safe_mode = gfx_v8_0_unset_safe_mode,
5612         .init = gfx_v8_0_rlc_init,
5613         .get_csb_size = gfx_v8_0_get_csb_size,
5614         .get_csb_buffer = gfx_v8_0_get_csb_buffer,
5615         .get_cp_table_num = gfx_v8_0_cp_jump_table_num,
5616         .resume = gfx_v8_0_rlc_resume,
5617         .stop = gfx_v8_0_rlc_stop,
5618         .reset = gfx_v8_0_rlc_reset,
5619         .start = gfx_v8_0_rlc_start,
5620         .update_spm_vmid = gfx_v8_0_update_spm_vmid
5621 };
5622
5623 static void gfx_v8_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
5624                                                       bool enable)
5625 {
5626         uint32_t temp, data;
5627
5628         amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
5629
5630         /* It is disabled by HW by default */
5631         if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
5632                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5633                         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS)
5634                                 /* 1 - RLC memory Light sleep */
5635                                 WREG32_FIELD(RLC_MEM_SLP_CNTL, RLC_MEM_LS_EN, 1);
5636
5637                         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS)
5638                                 WREG32_FIELD(CP_MEM_SLP_CNTL, CP_MEM_LS_EN, 1);
5639                 }
5640
5641                 /* 3 - RLC_CGTT_MGCG_OVERRIDE */
5642                 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5643                 if (adev->flags & AMD_IS_APU)
5644                         data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5645                                   RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5646                                   RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK);
5647                 else
5648                         data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5649                                   RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5650                                   RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5651                                   RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5652
5653                 if (temp != data)
5654                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5655
5656                 /* 4 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5657                 gfx_v8_0_wait_for_rlc_serdes(adev);
5658
5659                 /* 5 - clear mgcg override */
5660                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5661
5662                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS) {
5663                         /* 6 - Enable CGTS(Tree Shade) MGCG /MGLS */
5664                         temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5665                         data &= ~(CGTS_SM_CTRL_REG__SM_MODE_MASK);
5666                         data |= (0x2 << CGTS_SM_CTRL_REG__SM_MODE__SHIFT);
5667                         data |= CGTS_SM_CTRL_REG__SM_MODE_ENABLE_MASK;
5668                         data &= ~CGTS_SM_CTRL_REG__OVERRIDE_MASK;
5669                         if ((adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) &&
5670                             (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS_LS))
5671                                 data &= ~CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK;
5672                         data |= CGTS_SM_CTRL_REG__ON_MONITOR_ADD_EN_MASK;
5673                         data |= (0x96 << CGTS_SM_CTRL_REG__ON_MONITOR_ADD__SHIFT);
5674                         if (temp != data)
5675                                 WREG32(mmCGTS_SM_CTRL_REG, data);
5676                 }
5677                 udelay(50);
5678
5679                 /* 7 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5680                 gfx_v8_0_wait_for_rlc_serdes(adev);
5681         } else {
5682                 /* 1 - MGCG_OVERRIDE[0] for CP and MGCG_OVERRIDE[1] for RLC */
5683                 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5684                 data |= (RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5685                                 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5686                                 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5687                                 RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5688                 if (temp != data)
5689                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5690
5691                 /* 2 - disable MGLS in RLC */
5692                 data = RREG32(mmRLC_MEM_SLP_CNTL);
5693                 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
5694                         data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
5695                         WREG32(mmRLC_MEM_SLP_CNTL, data);
5696                 }
5697
5698                 /* 3 - disable MGLS in CP */
5699                 data = RREG32(mmCP_MEM_SLP_CNTL);
5700                 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
5701                         data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
5702                         WREG32(mmCP_MEM_SLP_CNTL, data);
5703                 }
5704
5705                 /* 4 - Disable CGTS(Tree Shade) MGCG and MGLS */
5706                 temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5707                 data |= (CGTS_SM_CTRL_REG__OVERRIDE_MASK |
5708                                 CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK);
5709                 if (temp != data)
5710                         WREG32(mmCGTS_SM_CTRL_REG, data);
5711
5712                 /* 5 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5713                 gfx_v8_0_wait_for_rlc_serdes(adev);
5714
5715                 /* 6 - set mgcg override */
5716                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, SET_BPM_SERDES_CMD);
5717
5718                 udelay(50);
5719
5720                 /* 7- wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5721                 gfx_v8_0_wait_for_rlc_serdes(adev);
5722         }
5723
5724         amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
5725 }
5726
5727 static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
5728                                                       bool enable)
5729 {
5730         uint32_t temp, temp1, data, data1;
5731
5732         temp = data = RREG32(mmRLC_CGCG_CGLS_CTRL);
5733
5734         amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
5735
5736         if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
5737                 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5738                 data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK;
5739                 if (temp1 != data1)
5740                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5741
5742                 /* : wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5743                 gfx_v8_0_wait_for_rlc_serdes(adev);
5744
5745                 /* 2 - clear cgcg override */
5746                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5747
5748                 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5749                 gfx_v8_0_wait_for_rlc_serdes(adev);
5750
5751                 /* 3 - write cmd to set CGLS */
5752                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, SET_BPM_SERDES_CMD);
5753
5754                 /* 4 - enable cgcg */
5755                 data |= RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
5756
5757                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5758                         /* enable cgls*/
5759                         data |= RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5760
5761                         temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5762                         data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK;
5763
5764                         if (temp1 != data1)
5765                                 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5766                 } else {
5767                         data &= ~RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5768                 }
5769
5770                 if (temp != data)
5771                         WREG32(mmRLC_CGCG_CGLS_CTRL, data);
5772
5773                 /* 5 enable cntx_empty_int_enable/cntx_busy_int_enable/
5774                  * Cmp_busy/GFX_Idle interrupts
5775                  */
5776                 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
5777         } else {
5778                 /* disable cntx_empty_int_enable & GFX Idle interrupt */
5779                 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
5780
5781                 /* TEST CGCG */
5782                 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5783                 data1 |= (RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK |
5784                                 RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK);
5785                 if (temp1 != data1)
5786                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5787
5788                 /* read gfx register to wake up cgcg */
5789                 RREG32(mmCB_CGTT_SCLK_CTRL);
5790                 RREG32(mmCB_CGTT_SCLK_CTRL);
5791                 RREG32(mmCB_CGTT_SCLK_CTRL);
5792                 RREG32(mmCB_CGTT_SCLK_CTRL);
5793
5794                 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5795                 gfx_v8_0_wait_for_rlc_serdes(adev);
5796
5797                 /* write cmd to Set CGCG Override */
5798                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, SET_BPM_SERDES_CMD);
5799
5800                 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5801                 gfx_v8_0_wait_for_rlc_serdes(adev);
5802
5803                 /* write cmd to Clear CGLS */
5804                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, CLE_BPM_SERDES_CMD);
5805
5806                 /* disable cgcg, cgls should be disabled too. */
5807                 data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
5808                           RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
5809                 if (temp != data)
5810                         WREG32(mmRLC_CGCG_CGLS_CTRL, data);
5811                 /* enable interrupts again for PG */
5812                 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
5813         }
5814
5815         gfx_v8_0_wait_for_rlc_serdes(adev);
5816
5817         amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
5818 }
5819 static int gfx_v8_0_update_gfx_clock_gating(struct amdgpu_device *adev,
5820                                             bool enable)
5821 {
5822         if (enable) {
5823                 /* CGCG/CGLS should be enabled after MGCG/MGLS/TS(CG/LS)
5824                  * ===  MGCG + MGLS + TS(CG/LS) ===
5825                  */
5826                 gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
5827                 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
5828         } else {
5829                 /* CGCG/CGLS should be disabled before MGCG/MGLS/TS(CG/LS)
5830                  * ===  CGCG + CGLS ===
5831                  */
5832                 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
5833                 gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
5834         }
5835         return 0;
5836 }
5837
5838 static int gfx_v8_0_tonga_update_gfx_clock_gating(struct amdgpu_device *adev,
5839                                           enum amd_clockgating_state state)
5840 {
5841         uint32_t msg_id, pp_state = 0;
5842         uint32_t pp_support_state = 0;
5843
5844         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) {
5845                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5846                         pp_support_state = PP_STATE_SUPPORT_LS;
5847                         pp_state = PP_STATE_LS;
5848                 }
5849                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
5850                         pp_support_state |= PP_STATE_SUPPORT_CG;
5851                         pp_state |= PP_STATE_CG;
5852                 }
5853                 if (state == AMD_CG_STATE_UNGATE)
5854                         pp_state = 0;
5855
5856                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5857                                 PP_BLOCK_GFX_CG,
5858                                 pp_support_state,
5859                                 pp_state);
5860                 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5861         }
5862
5863         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) {
5864                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5865                         pp_support_state = PP_STATE_SUPPORT_LS;
5866                         pp_state = PP_STATE_LS;
5867                 }
5868
5869                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
5870                         pp_support_state |= PP_STATE_SUPPORT_CG;
5871                         pp_state |= PP_STATE_CG;
5872                 }
5873
5874                 if (state == AMD_CG_STATE_UNGATE)
5875                         pp_state = 0;
5876
5877                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5878                                 PP_BLOCK_GFX_MG,
5879                                 pp_support_state,
5880                                 pp_state);
5881                 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5882         }
5883
5884         return 0;
5885 }
5886
5887 static int gfx_v8_0_polaris_update_gfx_clock_gating(struct amdgpu_device *adev,
5888                                           enum amd_clockgating_state state)
5889 {
5890
5891         uint32_t msg_id, pp_state = 0;
5892         uint32_t pp_support_state = 0;
5893
5894         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) {
5895                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5896                         pp_support_state = PP_STATE_SUPPORT_LS;
5897                         pp_state = PP_STATE_LS;
5898                 }
5899                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
5900                         pp_support_state |= PP_STATE_SUPPORT_CG;
5901                         pp_state |= PP_STATE_CG;
5902                 }
5903                 if (state == AMD_CG_STATE_UNGATE)
5904                         pp_state = 0;
5905
5906                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5907                                 PP_BLOCK_GFX_CG,
5908                                 pp_support_state,
5909                                 pp_state);
5910                 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5911         }
5912
5913         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_3D_CGCG | AMD_CG_SUPPORT_GFX_3D_CGLS)) {
5914                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) {
5915                         pp_support_state = PP_STATE_SUPPORT_LS;
5916                         pp_state = PP_STATE_LS;
5917                 }
5918                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG) {
5919                         pp_support_state |= PP_STATE_SUPPORT_CG;
5920                         pp_state |= PP_STATE_CG;
5921                 }
5922                 if (state == AMD_CG_STATE_UNGATE)
5923                         pp_state = 0;
5924
5925                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5926                                 PP_BLOCK_GFX_3D,
5927                                 pp_support_state,
5928                                 pp_state);
5929                 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5930         }
5931
5932         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) {
5933                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5934                         pp_support_state = PP_STATE_SUPPORT_LS;
5935                         pp_state = PP_STATE_LS;
5936                 }
5937
5938                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
5939                         pp_support_state |= PP_STATE_SUPPORT_CG;
5940                         pp_state |= PP_STATE_CG;
5941                 }
5942
5943                 if (state == AMD_CG_STATE_UNGATE)
5944                         pp_state = 0;
5945
5946                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5947                                 PP_BLOCK_GFX_MG,
5948                                 pp_support_state,
5949                                 pp_state);
5950                 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5951         }
5952
5953         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
5954                 pp_support_state = PP_STATE_SUPPORT_LS;
5955
5956                 if (state == AMD_CG_STATE_UNGATE)
5957                         pp_state = 0;
5958                 else
5959                         pp_state = PP_STATE_LS;
5960
5961                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5962                                 PP_BLOCK_GFX_RLC,
5963                                 pp_support_state,
5964                                 pp_state);
5965                 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5966         }
5967
5968         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
5969                 pp_support_state = PP_STATE_SUPPORT_LS;
5970
5971                 if (state == AMD_CG_STATE_UNGATE)
5972                         pp_state = 0;
5973                 else
5974                         pp_state = PP_STATE_LS;
5975                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5976                         PP_BLOCK_GFX_CP,
5977                         pp_support_state,
5978                         pp_state);
5979                 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5980         }
5981
5982         return 0;
5983 }
5984
5985 static int gfx_v8_0_set_clockgating_state(void *handle,
5986                                           enum amd_clockgating_state state)
5987 {
5988         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5989
5990         if (amdgpu_sriov_vf(adev))
5991                 return 0;
5992
5993         switch (adev->asic_type) {
5994         case CHIP_FIJI:
5995         case CHIP_CARRIZO:
5996         case CHIP_STONEY:
5997                 gfx_v8_0_update_gfx_clock_gating(adev,
5998                                                  state == AMD_CG_STATE_GATE);
5999                 break;
6000         case CHIP_TONGA:
6001                 gfx_v8_0_tonga_update_gfx_clock_gating(adev, state);
6002                 break;
6003         case CHIP_POLARIS10:
6004         case CHIP_POLARIS11:
6005         case CHIP_POLARIS12:
6006         case CHIP_VEGAM:
6007                 gfx_v8_0_polaris_update_gfx_clock_gating(adev, state);
6008                 break;
6009         default:
6010                 break;
6011         }
6012         return 0;
6013 }
6014
6015 static u64 gfx_v8_0_ring_get_rptr(struct amdgpu_ring *ring)
6016 {
6017         return *ring->rptr_cpu_addr;
6018 }
6019
6020 static u64 gfx_v8_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
6021 {
6022         struct amdgpu_device *adev = ring->adev;
6023
6024         if (ring->use_doorbell)
6025                 /* XXX check if swapping is necessary on BE */
6026                 return *ring->wptr_cpu_addr;
6027         else
6028                 return RREG32(mmCP_RB0_WPTR);
6029 }
6030
6031 static void gfx_v8_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
6032 {
6033         struct amdgpu_device *adev = ring->adev;
6034
6035         if (ring->use_doorbell) {
6036                 /* XXX check if swapping is necessary on BE */
6037                 *ring->wptr_cpu_addr = lower_32_bits(ring->wptr);
6038                 WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
6039         } else {
6040                 WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
6041                 (void)RREG32(mmCP_RB0_WPTR);
6042         }
6043 }
6044
6045 static void gfx_v8_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
6046 {
6047         u32 ref_and_mask, reg_mem_engine;
6048
6049         if ((ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) ||
6050             (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)) {
6051                 switch (ring->me) {
6052                 case 1:
6053                         ref_and_mask = GPU_HDP_FLUSH_DONE__CP2_MASK << ring->pipe;
6054                         break;
6055                 case 2:
6056                         ref_and_mask = GPU_HDP_FLUSH_DONE__CP6_MASK << ring->pipe;
6057                         break;
6058                 default:
6059                         return;
6060                 }
6061                 reg_mem_engine = 0;
6062         } else {
6063                 ref_and_mask = GPU_HDP_FLUSH_DONE__CP0_MASK;
6064                 reg_mem_engine = WAIT_REG_MEM_ENGINE(1); /* pfp */
6065         }
6066
6067         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6068         amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
6069                                  WAIT_REG_MEM_FUNCTION(3) |  /* == */
6070                                  reg_mem_engine));
6071         amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_REQ);
6072         amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_DONE);
6073         amdgpu_ring_write(ring, ref_and_mask);
6074         amdgpu_ring_write(ring, ref_and_mask);
6075         amdgpu_ring_write(ring, 0x20); /* poll interval */
6076 }
6077
6078 static void gfx_v8_0_ring_emit_vgt_flush(struct amdgpu_ring *ring)
6079 {
6080         amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
6081         amdgpu_ring_write(ring, EVENT_TYPE(VS_PARTIAL_FLUSH) |
6082                 EVENT_INDEX(4));
6083
6084         amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
6085         amdgpu_ring_write(ring, EVENT_TYPE(VGT_FLUSH) |
6086                 EVENT_INDEX(0));
6087 }
6088
6089 static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
6090                                         struct amdgpu_job *job,
6091                                         struct amdgpu_ib *ib,
6092                                         uint32_t flags)
6093 {
6094         unsigned vmid = AMDGPU_JOB_GET_VMID(job);
6095         u32 header, control = 0;
6096
6097         if (ib->flags & AMDGPU_IB_FLAG_CE)
6098                 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
6099         else
6100                 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
6101
6102         control |= ib->length_dw | (vmid << 24);
6103
6104         if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) {
6105                 control |= INDIRECT_BUFFER_PRE_ENB(1);
6106
6107                 if (!(ib->flags & AMDGPU_IB_FLAG_CE) && vmid)
6108                         gfx_v8_0_ring_emit_de_meta(ring);
6109         }
6110
6111         amdgpu_ring_write(ring, header);
6112         amdgpu_ring_write(ring,
6113 #ifdef __BIG_ENDIAN
6114                           (2 << 0) |
6115 #endif
6116                           (ib->gpu_addr & 0xFFFFFFFC));
6117         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6118         amdgpu_ring_write(ring, control);
6119 }
6120
6121 static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
6122                                           struct amdgpu_job *job,
6123                                           struct amdgpu_ib *ib,
6124                                           uint32_t flags)
6125 {
6126         unsigned vmid = AMDGPU_JOB_GET_VMID(job);
6127         u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
6128
6129         /* Currently, there is a high possibility to get wave ID mismatch
6130          * between ME and GDS, leading to a hw deadlock, because ME generates
6131          * different wave IDs than the GDS expects. This situation happens
6132          * randomly when at least 5 compute pipes use GDS ordered append.
6133          * The wave IDs generated by ME are also wrong after suspend/resume.
6134          * Those are probably bugs somewhere else in the kernel driver.
6135          *
6136          * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and
6137          * GDS to 0 for this ring (me/pipe).
6138          */
6139         if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) {
6140                 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
6141                 amdgpu_ring_write(ring, mmGDS_COMPUTE_MAX_WAVE_ID - PACKET3_SET_CONFIG_REG_START);
6142                 amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id);
6143         }
6144
6145         amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
6146         amdgpu_ring_write(ring,
6147 #ifdef __BIG_ENDIAN
6148                                 (2 << 0) |
6149 #endif
6150                                 (ib->gpu_addr & 0xFFFFFFFC));
6151         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6152         amdgpu_ring_write(ring, control);
6153 }
6154
6155 static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr,
6156                                          u64 seq, unsigned flags)
6157 {
6158         bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6159         bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6160         bool exec = flags & AMDGPU_FENCE_FLAG_EXEC;
6161
6162         /* Workaround for cache flush problems. First send a dummy EOP
6163          * event down the pipe with seq one below.
6164          */
6165         amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
6166         amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6167                                  EOP_TC_ACTION_EN |
6168                                  EOP_TC_WB_ACTION_EN |
6169                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6170                                  EVENT_INDEX(5)));
6171         amdgpu_ring_write(ring, addr & 0xfffffffc);
6172         amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
6173                                 DATA_SEL(1) | INT_SEL(0));
6174         amdgpu_ring_write(ring, lower_32_bits(seq - 1));
6175         amdgpu_ring_write(ring, upper_32_bits(seq - 1));
6176
6177         /* Then send the real EOP event down the pipe:
6178          * EVENT_WRITE_EOP - flush caches, send int */
6179         amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
6180         amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6181                                  EOP_TC_ACTION_EN |
6182                                  EOP_TC_WB_ACTION_EN |
6183                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6184                                  EVENT_INDEX(5) |
6185                                  (exec ? EOP_EXEC : 0)));
6186         amdgpu_ring_write(ring, addr & 0xfffffffc);
6187         amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
6188                           DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
6189         amdgpu_ring_write(ring, lower_32_bits(seq));
6190         amdgpu_ring_write(ring, upper_32_bits(seq));
6191
6192 }
6193
6194 static void gfx_v8_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
6195 {
6196         int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
6197         uint32_t seq = ring->fence_drv.sync_seq;
6198         uint64_t addr = ring->fence_drv.gpu_addr;
6199
6200         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6201         amdgpu_ring_write(ring, (WAIT_REG_MEM_MEM_SPACE(1) | /* memory */
6202                                  WAIT_REG_MEM_FUNCTION(3) | /* equal */
6203                                  WAIT_REG_MEM_ENGINE(usepfp))); /* pfp or me */
6204         amdgpu_ring_write(ring, addr & 0xfffffffc);
6205         amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
6206         amdgpu_ring_write(ring, seq);
6207         amdgpu_ring_write(ring, 0xffffffff);
6208         amdgpu_ring_write(ring, 4); /* poll interval */
6209 }
6210
6211 static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
6212                                         unsigned vmid, uint64_t pd_addr)
6213 {
6214         int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
6215
6216         amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
6217
6218         /* wait for the invalidate to complete */
6219         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6220         amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
6221                                  WAIT_REG_MEM_FUNCTION(0) |  /* always */
6222                                  WAIT_REG_MEM_ENGINE(0))); /* me */
6223         amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
6224         amdgpu_ring_write(ring, 0);
6225         amdgpu_ring_write(ring, 0); /* ref */
6226         amdgpu_ring_write(ring, 0); /* mask */
6227         amdgpu_ring_write(ring, 0x20); /* poll interval */
6228
6229         /* compute doesn't have PFP */
6230         if (usepfp) {
6231                 /* sync PFP to ME, otherwise we might get invalid PFP reads */
6232                 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
6233                 amdgpu_ring_write(ring, 0x0);
6234         }
6235 }
6236
6237 static u64 gfx_v8_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
6238 {
6239         return *ring->wptr_cpu_addr;
6240 }
6241
6242 static void gfx_v8_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
6243 {
6244         struct amdgpu_device *adev = ring->adev;
6245
6246         /* XXX check if swapping is necessary on BE */
6247         *ring->wptr_cpu_addr = lower_32_bits(ring->wptr);
6248         WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
6249 }
6250
6251 static void gfx_v8_0_ring_emit_fence_compute(struct amdgpu_ring *ring,
6252                                              u64 addr, u64 seq,
6253                                              unsigned flags)
6254 {
6255         bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6256         bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6257
6258         /* RELEASE_MEM - flush caches, send int */
6259         amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
6260         amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6261                                  EOP_TC_ACTION_EN |
6262                                  EOP_TC_WB_ACTION_EN |
6263                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6264                                  EVENT_INDEX(5)));
6265         amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
6266         amdgpu_ring_write(ring, addr & 0xfffffffc);
6267         amdgpu_ring_write(ring, upper_32_bits(addr));
6268         amdgpu_ring_write(ring, lower_32_bits(seq));
6269         amdgpu_ring_write(ring, upper_32_bits(seq));
6270 }
6271
6272 static void gfx_v8_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
6273                                          u64 seq, unsigned int flags)
6274 {
6275         /* we only allocate 32bit for each seq wb address */
6276         BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
6277
6278         /* write fence seq to the "addr" */
6279         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6280         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6281                                  WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
6282         amdgpu_ring_write(ring, lower_32_bits(addr));
6283         amdgpu_ring_write(ring, upper_32_bits(addr));
6284         amdgpu_ring_write(ring, lower_32_bits(seq));
6285
6286         if (flags & AMDGPU_FENCE_FLAG_INT) {
6287                 /* set register to trigger INT */
6288                 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6289                 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6290                                          WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
6291                 amdgpu_ring_write(ring, mmCPC_INT_STATUS);
6292                 amdgpu_ring_write(ring, 0);
6293                 amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
6294         }
6295 }
6296
6297 static void gfx_v8_ring_emit_sb(struct amdgpu_ring *ring)
6298 {
6299         amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
6300         amdgpu_ring_write(ring, 0);
6301 }
6302
6303 static void gfx_v8_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
6304 {
6305         uint32_t dw2 = 0;
6306
6307         if (amdgpu_sriov_vf(ring->adev))
6308                 gfx_v8_0_ring_emit_ce_meta(ring);
6309
6310         dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
6311         if (flags & AMDGPU_HAVE_CTX_SWITCH) {
6312                 gfx_v8_0_ring_emit_vgt_flush(ring);
6313                 /* set load_global_config & load_global_uconfig */
6314                 dw2 |= 0x8001;
6315                 /* set load_cs_sh_regs */
6316                 dw2 |= 0x01000000;
6317                 /* set load_per_context_state & load_gfx_sh_regs for GFX */
6318                 dw2 |= 0x10002;
6319
6320                 /* set load_ce_ram if preamble presented */
6321                 if (AMDGPU_PREAMBLE_IB_PRESENT & flags)
6322                         dw2 |= 0x10000000;
6323         } else {
6324                 /* still load_ce_ram if this is the first time preamble presented
6325                  * although there is no context switch happens.
6326                  */
6327                 if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags)
6328                         dw2 |= 0x10000000;
6329         }
6330
6331         amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
6332         amdgpu_ring_write(ring, dw2);
6333         amdgpu_ring_write(ring, 0);
6334 }
6335
6336 static unsigned gfx_v8_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring,
6337                                                   uint64_t addr)
6338 {
6339         unsigned ret;
6340
6341         amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
6342         amdgpu_ring_write(ring, lower_32_bits(addr));
6343         amdgpu_ring_write(ring, upper_32_bits(addr));
6344         /* discard following DWs if *cond_exec_gpu_addr==0 */
6345         amdgpu_ring_write(ring, 0);
6346         ret = ring->wptr & ring->buf_mask;
6347         /* patch dummy value later */
6348         amdgpu_ring_write(ring, 0);
6349         return ret;
6350 }
6351
6352 static void gfx_v8_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg,
6353                                     uint32_t reg_val_offs)
6354 {
6355         struct amdgpu_device *adev = ring->adev;
6356
6357         amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
6358         amdgpu_ring_write(ring, 0 |     /* src: register*/
6359                                 (5 << 8) |      /* dst: memory */
6360                                 (1 << 20));     /* write confirm */
6361         amdgpu_ring_write(ring, reg);
6362         amdgpu_ring_write(ring, 0);
6363         amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
6364                                 reg_val_offs * 4));
6365         amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
6366                                 reg_val_offs * 4));
6367 }
6368
6369 static void gfx_v8_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
6370                                   uint32_t val)
6371 {
6372         uint32_t cmd;
6373
6374         switch (ring->funcs->type) {
6375         case AMDGPU_RING_TYPE_GFX:
6376                 cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM;
6377                 break;
6378         case AMDGPU_RING_TYPE_KIQ:
6379                 cmd = 1 << 16; /* no inc addr */
6380                 break;
6381         default:
6382                 cmd = WR_CONFIRM;
6383                 break;
6384         }
6385
6386         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6387         amdgpu_ring_write(ring, cmd);
6388         amdgpu_ring_write(ring, reg);
6389         amdgpu_ring_write(ring, 0);
6390         amdgpu_ring_write(ring, val);
6391 }
6392
6393 static void gfx_v8_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel,
6394                                   int mem_space, int opt, uint32_t addr0,
6395                                   uint32_t addr1, uint32_t ref, uint32_t mask,
6396                                   uint32_t inv)
6397 {
6398         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6399         amdgpu_ring_write(ring,
6400                           /* memory (1) or register (0) */
6401                           (WAIT_REG_MEM_MEM_SPACE(mem_space) |
6402                            WAIT_REG_MEM_OPERATION(opt) | /* wait */
6403                            WAIT_REG_MEM_FUNCTION(3) |  /* equal */
6404                            WAIT_REG_MEM_ENGINE(eng_sel)));
6405
6406         if (mem_space)
6407                 BUG_ON(addr0 & 0x3); /* Dword align */
6408         amdgpu_ring_write(ring, addr0);
6409         amdgpu_ring_write(ring, addr1);
6410         amdgpu_ring_write(ring, ref);
6411         amdgpu_ring_write(ring, mask);
6412         amdgpu_ring_write(ring, inv); /* poll interval */
6413 }
6414
6415 static void gfx_v8_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
6416                                         uint32_t val, uint32_t mask)
6417 {
6418         gfx_v8_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20);
6419 }
6420
6421 static void gfx_v8_0_ring_soft_recovery(struct amdgpu_ring *ring, unsigned vmid)
6422 {
6423         struct amdgpu_device *adev = ring->adev;
6424         uint32_t value = 0;
6425
6426         value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03);
6427         value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01);
6428         value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1);
6429         value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid);
6430         WREG32(mmSQ_CMD, value);
6431 }
6432
6433 static void gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
6434                                                  enum amdgpu_interrupt_state state)
6435 {
6436         WREG32_FIELD(CP_INT_CNTL_RING0, TIME_STAMP_INT_ENABLE,
6437                      state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6438 }
6439
6440 static void gfx_v8_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
6441                                                      int me, int pipe,
6442                                                      enum amdgpu_interrupt_state state)
6443 {
6444         u32 mec_int_cntl, mec_int_cntl_reg;
6445
6446         /*
6447          * amdgpu controls only the first MEC. That's why this function only
6448          * handles the setting of interrupts for this specific MEC. All other
6449          * pipes' interrupts are set by amdkfd.
6450          */
6451
6452         if (me == 1) {
6453                 switch (pipe) {
6454                 case 0:
6455                         mec_int_cntl_reg = mmCP_ME1_PIPE0_INT_CNTL;
6456                         break;
6457                 case 1:
6458                         mec_int_cntl_reg = mmCP_ME1_PIPE1_INT_CNTL;
6459                         break;
6460                 case 2:
6461                         mec_int_cntl_reg = mmCP_ME1_PIPE2_INT_CNTL;
6462                         break;
6463                 case 3:
6464                         mec_int_cntl_reg = mmCP_ME1_PIPE3_INT_CNTL;
6465                         break;
6466                 default:
6467                         DRM_DEBUG("invalid pipe %d\n", pipe);
6468                         return;
6469                 }
6470         } else {
6471                 DRM_DEBUG("invalid me %d\n", me);
6472                 return;
6473         }
6474
6475         switch (state) {
6476         case AMDGPU_IRQ_STATE_DISABLE:
6477                 mec_int_cntl = RREG32(mec_int_cntl_reg);
6478                 mec_int_cntl &= ~CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK;
6479                 WREG32(mec_int_cntl_reg, mec_int_cntl);
6480                 break;
6481         case AMDGPU_IRQ_STATE_ENABLE:
6482                 mec_int_cntl = RREG32(mec_int_cntl_reg);
6483                 mec_int_cntl |= CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK;
6484                 WREG32(mec_int_cntl_reg, mec_int_cntl);
6485                 break;
6486         default:
6487                 break;
6488         }
6489 }
6490
6491 static int gfx_v8_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
6492                                              struct amdgpu_irq_src *source,
6493                                              unsigned type,
6494                                              enum amdgpu_interrupt_state state)
6495 {
6496         WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_REG_INT_ENABLE,
6497                      state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6498
6499         return 0;
6500 }
6501
6502 static int gfx_v8_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
6503                                               struct amdgpu_irq_src *source,
6504                                               unsigned type,
6505                                               enum amdgpu_interrupt_state state)
6506 {
6507         WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_INSTR_INT_ENABLE,
6508                      state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6509
6510         return 0;
6511 }
6512
6513 static int gfx_v8_0_set_eop_interrupt_state(struct amdgpu_device *adev,
6514                                             struct amdgpu_irq_src *src,
6515                                             unsigned type,
6516                                             enum amdgpu_interrupt_state state)
6517 {
6518         switch (type) {
6519         case AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP:
6520                 gfx_v8_0_set_gfx_eop_interrupt_state(adev, state);
6521                 break;
6522         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
6523                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
6524                 break;
6525         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
6526                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
6527                 break;
6528         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
6529                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
6530                 break;
6531         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
6532                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
6533                 break;
6534         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
6535                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
6536                 break;
6537         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
6538                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
6539                 break;
6540         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
6541                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
6542                 break;
6543         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
6544                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
6545                 break;
6546         default:
6547                 break;
6548         }
6549         return 0;
6550 }
6551
6552 static int gfx_v8_0_set_cp_ecc_int_state(struct amdgpu_device *adev,
6553                                          struct amdgpu_irq_src *source,
6554                                          unsigned int type,
6555                                          enum amdgpu_interrupt_state state)
6556 {
6557         int enable_flag;
6558
6559         switch (state) {
6560         case AMDGPU_IRQ_STATE_DISABLE:
6561                 enable_flag = 0;
6562                 break;
6563
6564         case AMDGPU_IRQ_STATE_ENABLE:
6565                 enable_flag = 1;
6566                 break;
6567
6568         default:
6569                 return -EINVAL;
6570         }
6571
6572         WREG32_FIELD(CP_INT_CNTL, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6573         WREG32_FIELD(CP_INT_CNTL_RING0, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6574         WREG32_FIELD(CP_INT_CNTL_RING1, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6575         WREG32_FIELD(CP_INT_CNTL_RING2, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6576         WREG32_FIELD(CPC_INT_CNTL, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6577         WREG32_FIELD(CP_ME1_PIPE0_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6578                      enable_flag);
6579         WREG32_FIELD(CP_ME1_PIPE1_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6580                      enable_flag);
6581         WREG32_FIELD(CP_ME1_PIPE2_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6582                      enable_flag);
6583         WREG32_FIELD(CP_ME1_PIPE3_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6584                      enable_flag);
6585         WREG32_FIELD(CP_ME2_PIPE0_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6586                      enable_flag);
6587         WREG32_FIELD(CP_ME2_PIPE1_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6588                      enable_flag);
6589         WREG32_FIELD(CP_ME2_PIPE2_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6590                      enable_flag);
6591         WREG32_FIELD(CP_ME2_PIPE3_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6592                      enable_flag);
6593
6594         return 0;
6595 }
6596
6597 static int gfx_v8_0_set_sq_int_state(struct amdgpu_device *adev,
6598                                      struct amdgpu_irq_src *source,
6599                                      unsigned int type,
6600                                      enum amdgpu_interrupt_state state)
6601 {
6602         int enable_flag;
6603
6604         switch (state) {
6605         case AMDGPU_IRQ_STATE_DISABLE:
6606                 enable_flag = 1;
6607                 break;
6608
6609         case AMDGPU_IRQ_STATE_ENABLE:
6610                 enable_flag = 0;
6611                 break;
6612
6613         default:
6614                 return -EINVAL;
6615         }
6616
6617         WREG32_FIELD(SQ_INTERRUPT_MSG_CTRL, STALL,
6618                      enable_flag);
6619
6620         return 0;
6621 }
6622
6623 static int gfx_v8_0_eop_irq(struct amdgpu_device *adev,
6624                             struct amdgpu_irq_src *source,
6625                             struct amdgpu_iv_entry *entry)
6626 {
6627         int i;
6628         u8 me_id, pipe_id, queue_id;
6629         struct amdgpu_ring *ring;
6630
6631         DRM_DEBUG("IH: CP EOP\n");
6632         me_id = (entry->ring_id & 0x0c) >> 2;
6633         pipe_id = (entry->ring_id & 0x03) >> 0;
6634         queue_id = (entry->ring_id & 0x70) >> 4;
6635
6636         switch (me_id) {
6637         case 0:
6638                 amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
6639                 break;
6640         case 1:
6641         case 2:
6642                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
6643                         ring = &adev->gfx.compute_ring[i];
6644                         /* Per-queue interrupt is supported for MEC starting from VI.
6645                           * The interrupt can only be enabled/disabled per pipe instead of per queue.
6646                           */
6647                         if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
6648                                 amdgpu_fence_process(ring);
6649                 }
6650                 break;
6651         }
6652         return 0;
6653 }
6654
6655 static void gfx_v8_0_fault(struct amdgpu_device *adev,
6656                            struct amdgpu_iv_entry *entry)
6657 {
6658         u8 me_id, pipe_id, queue_id;
6659         struct amdgpu_ring *ring;
6660         int i;
6661
6662         me_id = (entry->ring_id & 0x0c) >> 2;
6663         pipe_id = (entry->ring_id & 0x03) >> 0;
6664         queue_id = (entry->ring_id & 0x70) >> 4;
6665
6666         switch (me_id) {
6667         case 0:
6668                 drm_sched_fault(&adev->gfx.gfx_ring[0].sched);
6669                 break;
6670         case 1:
6671         case 2:
6672                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
6673                         ring = &adev->gfx.compute_ring[i];
6674                         if (ring->me == me_id && ring->pipe == pipe_id &&
6675                             ring->queue == queue_id)
6676                                 drm_sched_fault(&ring->sched);
6677                 }
6678                 break;
6679         }
6680 }
6681
6682 static int gfx_v8_0_priv_reg_irq(struct amdgpu_device *adev,
6683                                  struct amdgpu_irq_src *source,
6684                                  struct amdgpu_iv_entry *entry)
6685 {
6686         DRM_ERROR("Illegal register access in command stream\n");
6687         gfx_v8_0_fault(adev, entry);
6688         return 0;
6689 }
6690
6691 static int gfx_v8_0_priv_inst_irq(struct amdgpu_device *adev,
6692                                   struct amdgpu_irq_src *source,
6693                                   struct amdgpu_iv_entry *entry)
6694 {
6695         DRM_ERROR("Illegal instruction in command stream\n");
6696         gfx_v8_0_fault(adev, entry);
6697         return 0;
6698 }
6699
6700 static int gfx_v8_0_cp_ecc_error_irq(struct amdgpu_device *adev,
6701                                      struct amdgpu_irq_src *source,
6702                                      struct amdgpu_iv_entry *entry)
6703 {
6704         DRM_ERROR("CP EDC/ECC error detected.");
6705         return 0;
6706 }
6707
6708 static void gfx_v8_0_parse_sq_irq(struct amdgpu_device *adev, unsigned ih_data,
6709                                   bool from_wq)
6710 {
6711         u32 enc, se_id, sh_id, cu_id;
6712         char type[20];
6713         int sq_edc_source = -1;
6714
6715         enc = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_CMN, ENCODING);
6716         se_id = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_CMN, SE_ID);
6717
6718         switch (enc) {
6719                 case 0:
6720                         DRM_INFO("SQ general purpose intr detected:"
6721                                         "se_id %d, immed_overflow %d, host_reg_overflow %d,"
6722                                         "host_cmd_overflow %d, cmd_timestamp %d,"
6723                                         "reg_timestamp %d, thread_trace_buff_full %d,"
6724                                         "wlt %d, thread_trace %d.\n",
6725                                         se_id,
6726                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, IMMED_OVERFLOW),
6727                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, HOST_REG_OVERFLOW),
6728                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, HOST_CMD_OVERFLOW),
6729                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, CMD_TIMESTAMP),
6730                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, REG_TIMESTAMP),
6731                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, THREAD_TRACE_BUF_FULL),
6732                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, WLT),
6733                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, THREAD_TRACE)
6734                                         );
6735                         break;
6736                 case 1:
6737                 case 2:
6738
6739                         cu_id = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, CU_ID);
6740                         sh_id = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, SH_ID);
6741
6742                         /*
6743                          * This function can be called either directly from ISR
6744                          * or from BH in which case we can access SQ_EDC_INFO
6745                          * instance
6746                          */
6747                         if (from_wq) {
6748                                 mutex_lock(&adev->grbm_idx_mutex);
6749                                 gfx_v8_0_select_se_sh(adev, se_id, sh_id, cu_id, 0);
6750
6751                                 sq_edc_source = REG_GET_FIELD(RREG32(mmSQ_EDC_INFO), SQ_EDC_INFO, SOURCE);
6752
6753                                 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
6754                                 mutex_unlock(&adev->grbm_idx_mutex);
6755                         }
6756
6757                         if (enc == 1)
6758                                 sprintf(type, "instruction intr");
6759                         else
6760                                 sprintf(type, "EDC/ECC error");
6761
6762                         DRM_INFO(
6763                                 "SQ %s detected: "
6764                                         "se_id %d, sh_id %d, cu_id %d, simd_id %d, wave_id %d, vm_id %d "
6765                                         "trap %s, sq_ed_info.source %s.\n",
6766                                         type, se_id, sh_id, cu_id,
6767                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, SIMD_ID),
6768                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, WAVE_ID),
6769                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, VM_ID),
6770                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, PRIV) ? "true" : "false",
6771                                         (sq_edc_source != -1) ? sq_edc_source_names[sq_edc_source] : "unavailable"
6772                                 );
6773                         break;
6774                 default:
6775                         DRM_ERROR("SQ invalid encoding type\n.");
6776         }
6777 }
6778
6779 static void gfx_v8_0_sq_irq_work_func(struct work_struct *work)
6780 {
6781
6782         struct amdgpu_device *adev = container_of(work, struct amdgpu_device, gfx.sq_work.work);
6783         struct sq_work *sq_work = container_of(work, struct sq_work, work);
6784
6785         gfx_v8_0_parse_sq_irq(adev, sq_work->ih_data, true);
6786 }
6787
6788 static int gfx_v8_0_sq_irq(struct amdgpu_device *adev,
6789                            struct amdgpu_irq_src *source,
6790                            struct amdgpu_iv_entry *entry)
6791 {
6792         unsigned ih_data = entry->src_data[0];
6793
6794         /*
6795          * Try to submit work so SQ_EDC_INFO can be accessed from
6796          * BH. If previous work submission hasn't finished yet
6797          * just print whatever info is possible directly from the ISR.
6798          */
6799         if (work_pending(&adev->gfx.sq_work.work)) {
6800                 gfx_v8_0_parse_sq_irq(adev, ih_data, false);
6801         } else {
6802                 adev->gfx.sq_work.ih_data = ih_data;
6803                 schedule_work(&adev->gfx.sq_work.work);
6804         }
6805
6806         return 0;
6807 }
6808
6809 static void gfx_v8_0_emit_mem_sync(struct amdgpu_ring *ring)
6810 {
6811         amdgpu_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
6812         amdgpu_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
6813                           PACKET3_TC_ACTION_ENA |
6814                           PACKET3_SH_KCACHE_ACTION_ENA |
6815                           PACKET3_SH_ICACHE_ACTION_ENA |
6816                           PACKET3_TC_WB_ACTION_ENA);  /* CP_COHER_CNTL */
6817         amdgpu_ring_write(ring, 0xffffffff);  /* CP_COHER_SIZE */
6818         amdgpu_ring_write(ring, 0);  /* CP_COHER_BASE */
6819         amdgpu_ring_write(ring, 0x0000000A); /* poll interval */
6820 }
6821
6822 static void gfx_v8_0_emit_mem_sync_compute(struct amdgpu_ring *ring)
6823 {
6824         amdgpu_ring_write(ring, PACKET3(PACKET3_ACQUIRE_MEM, 5));
6825         amdgpu_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
6826                           PACKET3_TC_ACTION_ENA |
6827                           PACKET3_SH_KCACHE_ACTION_ENA |
6828                           PACKET3_SH_ICACHE_ACTION_ENA |
6829                           PACKET3_TC_WB_ACTION_ENA);  /* CP_COHER_CNTL */
6830         amdgpu_ring_write(ring, 0xffffffff);    /* CP_COHER_SIZE */
6831         amdgpu_ring_write(ring, 0xff);          /* CP_COHER_SIZE_HI */
6832         amdgpu_ring_write(ring, 0);             /* CP_COHER_BASE */
6833         amdgpu_ring_write(ring, 0);             /* CP_COHER_BASE_HI */
6834         amdgpu_ring_write(ring, 0x0000000A);    /* poll interval */
6835 }
6836
6837
6838 /* mmSPI_WCL_PIPE_PERCENT_CS[0-7]_DEFAULT values are same */
6839 #define mmSPI_WCL_PIPE_PERCENT_CS_DEFAULT       0x0000007f
6840 static void gfx_v8_0_emit_wave_limit_cs(struct amdgpu_ring *ring,
6841                                         uint32_t pipe, bool enable)
6842 {
6843         uint32_t val;
6844         uint32_t wcl_cs_reg;
6845
6846         val = enable ? 0x1 : mmSPI_WCL_PIPE_PERCENT_CS_DEFAULT;
6847
6848         switch (pipe) {
6849         case 0:
6850                 wcl_cs_reg = mmSPI_WCL_PIPE_PERCENT_CS0;
6851                 break;
6852         case 1:
6853                 wcl_cs_reg = mmSPI_WCL_PIPE_PERCENT_CS1;
6854                 break;
6855         case 2:
6856                 wcl_cs_reg = mmSPI_WCL_PIPE_PERCENT_CS2;
6857                 break;
6858         case 3:
6859                 wcl_cs_reg = mmSPI_WCL_PIPE_PERCENT_CS3;
6860                 break;
6861         default:
6862                 DRM_DEBUG("invalid pipe %d\n", pipe);
6863                 return;
6864         }
6865
6866         amdgpu_ring_emit_wreg(ring, wcl_cs_reg, val);
6867
6868 }
6869
6870 #define mmSPI_WCL_PIPE_PERCENT_GFX_DEFAULT      0x07ffffff
6871 static void gfx_v8_0_emit_wave_limit(struct amdgpu_ring *ring, bool enable)
6872 {
6873         struct amdgpu_device *adev = ring->adev;
6874         uint32_t val;
6875         int i;
6876
6877         /* mmSPI_WCL_PIPE_PERCENT_GFX is 7 bit multiplier register to limit
6878          * number of gfx waves. Setting 5 bit will make sure gfx only gets
6879          * around 25% of gpu resources.
6880          */
6881         val = enable ? 0x1f : mmSPI_WCL_PIPE_PERCENT_GFX_DEFAULT;
6882         amdgpu_ring_emit_wreg(ring, mmSPI_WCL_PIPE_PERCENT_GFX, val);
6883
6884         /* Restrict waves for normal/low priority compute queues as well
6885          * to get best QoS for high priority compute jobs.
6886          *
6887          * amdgpu controls only 1st ME(0-3 CS pipes).
6888          */
6889         for (i = 0; i < adev->gfx.mec.num_pipe_per_mec; i++) {
6890                 if (i != ring->pipe)
6891                         gfx_v8_0_emit_wave_limit_cs(ring, i, enable);
6892
6893         }
6894
6895 }
6896
6897 static int gfx_v8_0_reset_kgq(struct amdgpu_ring *ring, unsigned int vmid)
6898 {
6899         struct amdgpu_device *adev = ring->adev;
6900         struct amdgpu_kiq *kiq = &adev->gfx.kiq[0];
6901         struct amdgpu_ring *kiq_ring = &kiq->ring;
6902         unsigned long flags;
6903         u32 tmp;
6904         int r;
6905
6906         if (amdgpu_sriov_vf(adev))
6907                 return -EINVAL;
6908
6909         if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues)
6910                 return -EINVAL;
6911
6912         spin_lock_irqsave(&kiq->ring_lock, flags);
6913
6914         if (amdgpu_ring_alloc(kiq_ring, 5)) {
6915                 spin_unlock_irqrestore(&kiq->ring_lock, flags);
6916                 return -ENOMEM;
6917         }
6918
6919         tmp = REG_SET_FIELD(0, CP_VMID_RESET, RESET_REQUEST, 1 << vmid);
6920         gfx_v8_0_ring_emit_wreg(kiq_ring, mmCP_VMID_RESET, tmp);
6921         amdgpu_ring_commit(kiq_ring);
6922
6923         spin_unlock_irqrestore(&kiq->ring_lock, flags);
6924
6925         r = amdgpu_ring_test_ring(kiq_ring);
6926         if (r)
6927                 return r;
6928
6929         if (amdgpu_ring_alloc(ring, 7 + 12 + 5))
6930                 return -ENOMEM;
6931         gfx_v8_0_ring_emit_fence_gfx(ring, ring->fence_drv.gpu_addr,
6932                                      ring->fence_drv.sync_seq, AMDGPU_FENCE_FLAG_EXEC);
6933         gfx_v8_0_ring_emit_reg_wait(ring, mmCP_VMID_RESET, 0, 0xffff);
6934         gfx_v8_0_ring_emit_wreg(ring, mmCP_VMID_RESET, 0);
6935
6936         return amdgpu_ring_test_ring(ring);
6937 }
6938
6939 static const struct amd_ip_funcs gfx_v8_0_ip_funcs = {
6940         .name = "gfx_v8_0",
6941         .early_init = gfx_v8_0_early_init,
6942         .late_init = gfx_v8_0_late_init,
6943         .sw_init = gfx_v8_0_sw_init,
6944         .sw_fini = gfx_v8_0_sw_fini,
6945         .hw_init = gfx_v8_0_hw_init,
6946         .hw_fini = gfx_v8_0_hw_fini,
6947         .suspend = gfx_v8_0_suspend,
6948         .resume = gfx_v8_0_resume,
6949         .is_idle = gfx_v8_0_is_idle,
6950         .wait_for_idle = gfx_v8_0_wait_for_idle,
6951         .check_soft_reset = gfx_v8_0_check_soft_reset,
6952         .pre_soft_reset = gfx_v8_0_pre_soft_reset,
6953         .soft_reset = gfx_v8_0_soft_reset,
6954         .post_soft_reset = gfx_v8_0_post_soft_reset,
6955         .set_clockgating_state = gfx_v8_0_set_clockgating_state,
6956         .set_powergating_state = gfx_v8_0_set_powergating_state,
6957         .get_clockgating_state = gfx_v8_0_get_clockgating_state,
6958 };
6959
6960 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
6961         .type = AMDGPU_RING_TYPE_GFX,
6962         .align_mask = 0xff,
6963         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6964         .support_64bit_ptrs = false,
6965         .get_rptr = gfx_v8_0_ring_get_rptr,
6966         .get_wptr = gfx_v8_0_ring_get_wptr_gfx,
6967         .set_wptr = gfx_v8_0_ring_set_wptr_gfx,
6968         .emit_frame_size = /* maximum 215dw if count 16 IBs in */
6969                 5 +  /* COND_EXEC */
6970                 7 +  /* PIPELINE_SYNC */
6971                 VI_FLUSH_GPU_TLB_NUM_WREG * 5 + 9 + /* VM_FLUSH */
6972                 12 +  /* FENCE for VM_FLUSH */
6973                 20 + /* GDS switch */
6974                 4 + /* double SWITCH_BUFFER,
6975                        the first COND_EXEC jump to the place just
6976                            prior to this double SWITCH_BUFFER  */
6977                 5 + /* COND_EXEC */
6978                 7 +      /*     HDP_flush */
6979                 4 +      /*     VGT_flush */
6980                 14 + /* CE_META */
6981                 31 + /* DE_META */
6982                 3 + /* CNTX_CTRL */
6983                 5 + /* HDP_INVL */
6984                 12 + 12 + /* FENCE x2 */
6985                 2 + /* SWITCH_BUFFER */
6986                 5, /* SURFACE_SYNC */
6987         .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_gfx */
6988         .emit_ib = gfx_v8_0_ring_emit_ib_gfx,
6989         .emit_fence = gfx_v8_0_ring_emit_fence_gfx,
6990         .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
6991         .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
6992         .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
6993         .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
6994         .test_ring = gfx_v8_0_ring_test_ring,
6995         .test_ib = gfx_v8_0_ring_test_ib,
6996         .insert_nop = amdgpu_ring_insert_nop,
6997         .pad_ib = amdgpu_ring_generic_pad_ib,
6998         .emit_switch_buffer = gfx_v8_ring_emit_sb,
6999         .emit_cntxcntl = gfx_v8_ring_emit_cntxcntl,
7000         .init_cond_exec = gfx_v8_0_ring_emit_init_cond_exec,
7001         .emit_wreg = gfx_v8_0_ring_emit_wreg,
7002         .soft_recovery = gfx_v8_0_ring_soft_recovery,
7003         .emit_mem_sync = gfx_v8_0_emit_mem_sync,
7004         .reset = gfx_v8_0_reset_kgq,
7005 };
7006
7007 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
7008         .type = AMDGPU_RING_TYPE_COMPUTE,
7009         .align_mask = 0xff,
7010         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
7011         .support_64bit_ptrs = false,
7012         .get_rptr = gfx_v8_0_ring_get_rptr,
7013         .get_wptr = gfx_v8_0_ring_get_wptr_compute,
7014         .set_wptr = gfx_v8_0_ring_set_wptr_compute,
7015         .emit_frame_size =
7016                 20 + /* gfx_v8_0_ring_emit_gds_switch */
7017                 7 + /* gfx_v8_0_ring_emit_hdp_flush */
7018                 5 + /* hdp_invalidate */
7019                 7 + /* gfx_v8_0_ring_emit_pipeline_sync */
7020                 VI_FLUSH_GPU_TLB_NUM_WREG * 5 + 7 + /* gfx_v8_0_ring_emit_vm_flush */
7021                 7 + 7 + 7 + /* gfx_v8_0_ring_emit_fence_compute x3 for user fence, vm fence */
7022                 7 + /* gfx_v8_0_emit_mem_sync_compute */
7023                 5 + /* gfx_v8_0_emit_wave_limit for updating mmSPI_WCL_PIPE_PERCENT_GFX register */
7024                 15, /* for updating 3 mmSPI_WCL_PIPE_PERCENT_CS registers */
7025         .emit_ib_size = 7, /* gfx_v8_0_ring_emit_ib_compute */
7026         .emit_ib = gfx_v8_0_ring_emit_ib_compute,
7027         .emit_fence = gfx_v8_0_ring_emit_fence_compute,
7028         .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
7029         .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
7030         .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
7031         .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
7032         .test_ring = gfx_v8_0_ring_test_ring,
7033         .test_ib = gfx_v8_0_ring_test_ib,
7034         .insert_nop = amdgpu_ring_insert_nop,
7035         .pad_ib = amdgpu_ring_generic_pad_ib,
7036         .emit_wreg = gfx_v8_0_ring_emit_wreg,
7037         .soft_recovery = gfx_v8_0_ring_soft_recovery,
7038         .emit_mem_sync = gfx_v8_0_emit_mem_sync_compute,
7039         .emit_wave_limit = gfx_v8_0_emit_wave_limit,
7040 };
7041
7042 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_kiq = {
7043         .type = AMDGPU_RING_TYPE_KIQ,
7044         .align_mask = 0xff,
7045         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
7046         .support_64bit_ptrs = false,
7047         .get_rptr = gfx_v8_0_ring_get_rptr,
7048         .get_wptr = gfx_v8_0_ring_get_wptr_compute,
7049         .set_wptr = gfx_v8_0_ring_set_wptr_compute,
7050         .emit_frame_size =
7051                 20 + /* gfx_v8_0_ring_emit_gds_switch */
7052                 7 + /* gfx_v8_0_ring_emit_hdp_flush */
7053                 5 + /* hdp_invalidate */
7054                 7 + /* gfx_v8_0_ring_emit_pipeline_sync */
7055                 17 + /* gfx_v8_0_ring_emit_vm_flush */
7056                 7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_kiq x3 for user fence, vm fence */
7057         .emit_ib_size = 7, /* gfx_v8_0_ring_emit_ib_compute */
7058         .emit_fence = gfx_v8_0_ring_emit_fence_kiq,
7059         .test_ring = gfx_v8_0_ring_test_ring,
7060         .insert_nop = amdgpu_ring_insert_nop,
7061         .pad_ib = amdgpu_ring_generic_pad_ib,
7062         .emit_rreg = gfx_v8_0_ring_emit_rreg,
7063         .emit_wreg = gfx_v8_0_ring_emit_wreg,
7064 };
7065
7066 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev)
7067 {
7068         int i;
7069
7070         adev->gfx.kiq[0].ring.funcs = &gfx_v8_0_ring_funcs_kiq;
7071
7072         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
7073                 adev->gfx.gfx_ring[i].funcs = &gfx_v8_0_ring_funcs_gfx;
7074
7075         for (i = 0; i < adev->gfx.num_compute_rings; i++)
7076                 adev->gfx.compute_ring[i].funcs = &gfx_v8_0_ring_funcs_compute;
7077 }
7078
7079 static const struct amdgpu_irq_src_funcs gfx_v8_0_eop_irq_funcs = {
7080         .set = gfx_v8_0_set_eop_interrupt_state,
7081         .process = gfx_v8_0_eop_irq,
7082 };
7083
7084 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_reg_irq_funcs = {
7085         .set = gfx_v8_0_set_priv_reg_fault_state,
7086         .process = gfx_v8_0_priv_reg_irq,
7087 };
7088
7089 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_inst_irq_funcs = {
7090         .set = gfx_v8_0_set_priv_inst_fault_state,
7091         .process = gfx_v8_0_priv_inst_irq,
7092 };
7093
7094 static const struct amdgpu_irq_src_funcs gfx_v8_0_cp_ecc_error_irq_funcs = {
7095         .set = gfx_v8_0_set_cp_ecc_int_state,
7096         .process = gfx_v8_0_cp_ecc_error_irq,
7097 };
7098
7099 static const struct amdgpu_irq_src_funcs gfx_v8_0_sq_irq_funcs = {
7100         .set = gfx_v8_0_set_sq_int_state,
7101         .process = gfx_v8_0_sq_irq,
7102 };
7103
7104 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev)
7105 {
7106         adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
7107         adev->gfx.eop_irq.funcs = &gfx_v8_0_eop_irq_funcs;
7108
7109         adev->gfx.priv_reg_irq.num_types = 1;
7110         adev->gfx.priv_reg_irq.funcs = &gfx_v8_0_priv_reg_irq_funcs;
7111
7112         adev->gfx.priv_inst_irq.num_types = 1;
7113         adev->gfx.priv_inst_irq.funcs = &gfx_v8_0_priv_inst_irq_funcs;
7114
7115         adev->gfx.cp_ecc_error_irq.num_types = 1;
7116         adev->gfx.cp_ecc_error_irq.funcs = &gfx_v8_0_cp_ecc_error_irq_funcs;
7117
7118         adev->gfx.sq_irq.num_types = 1;
7119         adev->gfx.sq_irq.funcs = &gfx_v8_0_sq_irq_funcs;
7120 }
7121
7122 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev)
7123 {
7124         adev->gfx.rlc.funcs = &iceland_rlc_funcs;
7125 }
7126
7127 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev)
7128 {
7129         /* init asci gds info */
7130         adev->gds.gds_size = RREG32(mmGDS_VMID0_SIZE);
7131         adev->gds.gws_size = 64;
7132         adev->gds.oa_size = 16;
7133         adev->gds.gds_compute_max_wave_id = RREG32(mmGDS_COMPUTE_MAX_WAVE_ID);
7134 }
7135
7136 static void gfx_v8_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
7137                                                  u32 bitmap)
7138 {
7139         u32 data;
7140
7141         if (!bitmap)
7142                 return;
7143
7144         data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
7145         data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
7146
7147         WREG32(mmGC_USER_SHADER_ARRAY_CONFIG, data);
7148 }
7149
7150 static u32 gfx_v8_0_get_cu_active_bitmap(struct amdgpu_device *adev)
7151 {
7152         u32 data, mask;
7153
7154         data =  RREG32(mmCC_GC_SHADER_ARRAY_CONFIG) |
7155                 RREG32(mmGC_USER_SHADER_ARRAY_CONFIG);
7156
7157         mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh);
7158
7159         return ~REG_GET_FIELD(data, CC_GC_SHADER_ARRAY_CONFIG, INACTIVE_CUS) & mask;
7160 }
7161
7162 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev)
7163 {
7164         int i, j, k, counter, active_cu_number = 0;
7165         u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
7166         struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
7167         unsigned disable_masks[4 * 2];
7168         u32 ao_cu_num;
7169
7170         memset(cu_info, 0, sizeof(*cu_info));
7171
7172         if (adev->flags & AMD_IS_APU)
7173                 ao_cu_num = 2;
7174         else
7175                 ao_cu_num = adev->gfx.config.max_cu_per_sh;
7176
7177         amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2);
7178
7179         mutex_lock(&adev->grbm_idx_mutex);
7180         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
7181                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
7182                         mask = 1;
7183                         ao_bitmap = 0;
7184                         counter = 0;
7185                         gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff, 0);
7186                         if (i < 4 && j < 2)
7187                                 gfx_v8_0_set_user_cu_inactive_bitmap(
7188                                         adev, disable_masks[i * 2 + j]);
7189                         bitmap = gfx_v8_0_get_cu_active_bitmap(adev);
7190                         cu_info->bitmap[0][i][j] = bitmap;
7191
7192                         for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
7193                                 if (bitmap & mask) {
7194                                         if (counter < ao_cu_num)
7195                                                 ao_bitmap |= mask;
7196                                         counter ++;
7197                                 }
7198                                 mask <<= 1;
7199                         }
7200                         active_cu_number += counter;
7201                         if (i < 2 && j < 2)
7202                                 ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
7203                         cu_info->ao_cu_bitmap[i][j] = ao_bitmap;
7204                 }
7205         }
7206         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
7207         mutex_unlock(&adev->grbm_idx_mutex);
7208
7209         cu_info->number = active_cu_number;
7210         cu_info->ao_cu_mask = ao_cu_mask;
7211         cu_info->simd_per_cu = NUM_SIMD_PER_CU;
7212         cu_info->max_waves_per_simd = 10;
7213         cu_info->max_scratch_slots_per_cu = 32;
7214         cu_info->wave_front_size = 64;
7215         cu_info->lds_size = 64;
7216 }
7217
7218 const struct amdgpu_ip_block_version gfx_v8_0_ip_block =
7219 {
7220         .type = AMD_IP_BLOCK_TYPE_GFX,
7221         .major = 8,
7222         .minor = 0,
7223         .rev = 0,
7224         .funcs = &gfx_v8_0_ip_funcs,
7225 };
7226
7227 const struct amdgpu_ip_block_version gfx_v8_1_ip_block =
7228 {
7229         .type = AMD_IP_BLOCK_TYPE_GFX,
7230         .major = 8,
7231         .minor = 1,
7232         .rev = 0,
7233         .funcs = &gfx_v8_0_ip_funcs,
7234 };
7235
7236 static void gfx_v8_0_ring_emit_ce_meta(struct amdgpu_ring *ring)
7237 {
7238         uint64_t ce_payload_addr;
7239         int cnt_ce;
7240         union {
7241                 struct vi_ce_ib_state regular;
7242                 struct vi_ce_ib_state_chained_ib chained;
7243         } ce_payload = {};
7244
7245         if (ring->adev->virt.chained_ib_support) {
7246                 ce_payload_addr = amdgpu_csa_vaddr(ring->adev) +
7247                         offsetof(struct vi_gfx_meta_data_chained_ib, ce_payload);
7248                 cnt_ce = (sizeof(ce_payload.chained) >> 2) + 4 - 2;
7249         } else {
7250                 ce_payload_addr = amdgpu_csa_vaddr(ring->adev) +
7251                         offsetof(struct vi_gfx_meta_data, ce_payload);
7252                 cnt_ce = (sizeof(ce_payload.regular) >> 2) + 4 - 2;
7253         }
7254
7255         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_ce));
7256         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
7257                                 WRITE_DATA_DST_SEL(8) |
7258                                 WR_CONFIRM) |
7259                                 WRITE_DATA_CACHE_POLICY(0));
7260         amdgpu_ring_write(ring, lower_32_bits(ce_payload_addr));
7261         amdgpu_ring_write(ring, upper_32_bits(ce_payload_addr));
7262         amdgpu_ring_write_multiple(ring, (void *)&ce_payload, cnt_ce - 2);
7263 }
7264
7265 static void gfx_v8_0_ring_emit_de_meta(struct amdgpu_ring *ring)
7266 {
7267         uint64_t de_payload_addr, gds_addr, csa_addr;
7268         int cnt_de;
7269         union {
7270                 struct vi_de_ib_state regular;
7271                 struct vi_de_ib_state_chained_ib chained;
7272         } de_payload = {};
7273
7274         csa_addr = amdgpu_csa_vaddr(ring->adev);
7275         gds_addr = csa_addr + 4096;
7276         if (ring->adev->virt.chained_ib_support) {
7277                 de_payload.chained.gds_backup_addrlo = lower_32_bits(gds_addr);
7278                 de_payload.chained.gds_backup_addrhi = upper_32_bits(gds_addr);
7279                 de_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data_chained_ib, de_payload);
7280                 cnt_de = (sizeof(de_payload.chained) >> 2) + 4 - 2;
7281         } else {
7282                 de_payload.regular.gds_backup_addrlo = lower_32_bits(gds_addr);
7283                 de_payload.regular.gds_backup_addrhi = upper_32_bits(gds_addr);
7284                 de_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data, de_payload);
7285                 cnt_de = (sizeof(de_payload.regular) >> 2) + 4 - 2;
7286         }
7287
7288         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_de));
7289         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
7290                                 WRITE_DATA_DST_SEL(8) |
7291                                 WR_CONFIRM) |
7292                                 WRITE_DATA_CACHE_POLICY(0));
7293         amdgpu_ring_write(ring, lower_32_bits(de_payload_addr));
7294         amdgpu_ring_write(ring, upper_32_bits(de_payload_addr));
7295         amdgpu_ring_write_multiple(ring, (void *)&de_payload, cnt_de - 2);
7296 }
This page took 0.465693 seconds and 4 git commands to generate.