]> Git Repo - linux.git/blob - drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
Merge tag 'drm-misc-next-2018-11-21' of git://anongit.freedesktop.org/drm/drm-misc...
[linux.git] / drivers / gpu / drm / amd / amdgpu / gfx_v8_0.c
1 /*
2  * Copyright 2014 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23 #include <linux/kernel.h>
24 #include <linux/firmware.h>
25 #include <drm/drmP.h>
26 #include "amdgpu.h"
27 #include "amdgpu_gfx.h"
28 #include "vi.h"
29 #include "vi_structs.h"
30 #include "vid.h"
31 #include "amdgpu_ucode.h"
32 #include "amdgpu_atombios.h"
33 #include "atombios_i2c.h"
34 #include "clearstate_vi.h"
35
36 #include "gmc/gmc_8_2_d.h"
37 #include "gmc/gmc_8_2_sh_mask.h"
38
39 #include "oss/oss_3_0_d.h"
40 #include "oss/oss_3_0_sh_mask.h"
41
42 #include "bif/bif_5_0_d.h"
43 #include "bif/bif_5_0_sh_mask.h"
44 #include "gca/gfx_8_0_d.h"
45 #include "gca/gfx_8_0_enum.h"
46 #include "gca/gfx_8_0_sh_mask.h"
47 #include "gca/gfx_8_0_enum.h"
48
49 #include "dce/dce_10_0_d.h"
50 #include "dce/dce_10_0_sh_mask.h"
51
52 #include "smu/smu_7_1_3_d.h"
53
54 #include "ivsrcid/ivsrcid_vislands30.h"
55
56 #define GFX8_NUM_GFX_RINGS     1
57 #define GFX8_MEC_HPD_SIZE 4096
58
59 #define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001
60 #define CARRIZO_GB_ADDR_CONFIG_GOLDEN 0x22010001
61 #define POLARIS11_GB_ADDR_CONFIG_GOLDEN 0x22011002
62 #define TONGA_GB_ADDR_CONFIG_GOLDEN 0x22011003
63
64 #define ARRAY_MODE(x)                                   ((x) << GB_TILE_MODE0__ARRAY_MODE__SHIFT)
65 #define PIPE_CONFIG(x)                                  ((x) << GB_TILE_MODE0__PIPE_CONFIG__SHIFT)
66 #define TILE_SPLIT(x)                                   ((x) << GB_TILE_MODE0__TILE_SPLIT__SHIFT)
67 #define MICRO_TILE_MODE_NEW(x)                          ((x) << GB_TILE_MODE0__MICRO_TILE_MODE_NEW__SHIFT)
68 #define SAMPLE_SPLIT(x)                                 ((x) << GB_TILE_MODE0__SAMPLE_SPLIT__SHIFT)
69 #define BANK_WIDTH(x)                                   ((x) << GB_MACROTILE_MODE0__BANK_WIDTH__SHIFT)
70 #define BANK_HEIGHT(x)                                  ((x) << GB_MACROTILE_MODE0__BANK_HEIGHT__SHIFT)
71 #define MACRO_TILE_ASPECT(x)                            ((x) << GB_MACROTILE_MODE0__MACRO_TILE_ASPECT__SHIFT)
72 #define NUM_BANKS(x)                                    ((x) << GB_MACROTILE_MODE0__NUM_BANKS__SHIFT)
73
74 #define RLC_CGTT_MGCG_OVERRIDE__CPF_MASK            0x00000001L
75 #define RLC_CGTT_MGCG_OVERRIDE__RLC_MASK            0x00000002L
76 #define RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK           0x00000004L
77 #define RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK           0x00000008L
78 #define RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK           0x00000010L
79 #define RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK           0x00000020L
80
81 /* BPM SERDES CMD */
82 #define SET_BPM_SERDES_CMD    1
83 #define CLE_BPM_SERDES_CMD    0
84
85 /* BPM Register Address*/
86 enum {
87         BPM_REG_CGLS_EN = 0,        /* Enable/Disable CGLS */
88         BPM_REG_CGLS_ON,            /* ON/OFF CGLS: shall be controlled by RLC FW */
89         BPM_REG_CGCG_OVERRIDE,      /* Set/Clear CGCG Override */
90         BPM_REG_MGCG_OVERRIDE,      /* Set/Clear MGCG Override */
91         BPM_REG_FGCG_OVERRIDE,      /* Set/Clear FGCG Override */
92         BPM_REG_FGCG_MAX
93 };
94
95 #define RLC_FormatDirectRegListLength        14
96
97 MODULE_FIRMWARE("amdgpu/carrizo_ce.bin");
98 MODULE_FIRMWARE("amdgpu/carrizo_pfp.bin");
99 MODULE_FIRMWARE("amdgpu/carrizo_me.bin");
100 MODULE_FIRMWARE("amdgpu/carrizo_mec.bin");
101 MODULE_FIRMWARE("amdgpu/carrizo_mec2.bin");
102 MODULE_FIRMWARE("amdgpu/carrizo_rlc.bin");
103
104 MODULE_FIRMWARE("amdgpu/stoney_ce.bin");
105 MODULE_FIRMWARE("amdgpu/stoney_pfp.bin");
106 MODULE_FIRMWARE("amdgpu/stoney_me.bin");
107 MODULE_FIRMWARE("amdgpu/stoney_mec.bin");
108 MODULE_FIRMWARE("amdgpu/stoney_rlc.bin");
109
110 MODULE_FIRMWARE("amdgpu/tonga_ce.bin");
111 MODULE_FIRMWARE("amdgpu/tonga_pfp.bin");
112 MODULE_FIRMWARE("amdgpu/tonga_me.bin");
113 MODULE_FIRMWARE("amdgpu/tonga_mec.bin");
114 MODULE_FIRMWARE("amdgpu/tonga_mec2.bin");
115 MODULE_FIRMWARE("amdgpu/tonga_rlc.bin");
116
117 MODULE_FIRMWARE("amdgpu/topaz_ce.bin");
118 MODULE_FIRMWARE("amdgpu/topaz_pfp.bin");
119 MODULE_FIRMWARE("amdgpu/topaz_me.bin");
120 MODULE_FIRMWARE("amdgpu/topaz_mec.bin");
121 MODULE_FIRMWARE("amdgpu/topaz_rlc.bin");
122
123 MODULE_FIRMWARE("amdgpu/fiji_ce.bin");
124 MODULE_FIRMWARE("amdgpu/fiji_pfp.bin");
125 MODULE_FIRMWARE("amdgpu/fiji_me.bin");
126 MODULE_FIRMWARE("amdgpu/fiji_mec.bin");
127 MODULE_FIRMWARE("amdgpu/fiji_mec2.bin");
128 MODULE_FIRMWARE("amdgpu/fiji_rlc.bin");
129
130 MODULE_FIRMWARE("amdgpu/polaris10_ce.bin");
131 MODULE_FIRMWARE("amdgpu/polaris10_ce_2.bin");
132 MODULE_FIRMWARE("amdgpu/polaris10_pfp.bin");
133 MODULE_FIRMWARE("amdgpu/polaris10_pfp_2.bin");
134 MODULE_FIRMWARE("amdgpu/polaris10_me.bin");
135 MODULE_FIRMWARE("amdgpu/polaris10_me_2.bin");
136 MODULE_FIRMWARE("amdgpu/polaris10_mec.bin");
137 MODULE_FIRMWARE("amdgpu/polaris10_mec_2.bin");
138 MODULE_FIRMWARE("amdgpu/polaris10_mec2.bin");
139 MODULE_FIRMWARE("amdgpu/polaris10_mec2_2.bin");
140 MODULE_FIRMWARE("amdgpu/polaris10_rlc.bin");
141
142 MODULE_FIRMWARE("amdgpu/polaris11_ce.bin");
143 MODULE_FIRMWARE("amdgpu/polaris11_ce_2.bin");
144 MODULE_FIRMWARE("amdgpu/polaris11_pfp.bin");
145 MODULE_FIRMWARE("amdgpu/polaris11_pfp_2.bin");
146 MODULE_FIRMWARE("amdgpu/polaris11_me.bin");
147 MODULE_FIRMWARE("amdgpu/polaris11_me_2.bin");
148 MODULE_FIRMWARE("amdgpu/polaris11_mec.bin");
149 MODULE_FIRMWARE("amdgpu/polaris11_mec_2.bin");
150 MODULE_FIRMWARE("amdgpu/polaris11_mec2.bin");
151 MODULE_FIRMWARE("amdgpu/polaris11_mec2_2.bin");
152 MODULE_FIRMWARE("amdgpu/polaris11_rlc.bin");
153
154 MODULE_FIRMWARE("amdgpu/polaris12_ce.bin");
155 MODULE_FIRMWARE("amdgpu/polaris12_ce_2.bin");
156 MODULE_FIRMWARE("amdgpu/polaris12_pfp.bin");
157 MODULE_FIRMWARE("amdgpu/polaris12_pfp_2.bin");
158 MODULE_FIRMWARE("amdgpu/polaris12_me.bin");
159 MODULE_FIRMWARE("amdgpu/polaris12_me_2.bin");
160 MODULE_FIRMWARE("amdgpu/polaris12_mec.bin");
161 MODULE_FIRMWARE("amdgpu/polaris12_mec_2.bin");
162 MODULE_FIRMWARE("amdgpu/polaris12_mec2.bin");
163 MODULE_FIRMWARE("amdgpu/polaris12_mec2_2.bin");
164 MODULE_FIRMWARE("amdgpu/polaris12_rlc.bin");
165
166 MODULE_FIRMWARE("amdgpu/vegam_ce.bin");
167 MODULE_FIRMWARE("amdgpu/vegam_pfp.bin");
168 MODULE_FIRMWARE("amdgpu/vegam_me.bin");
169 MODULE_FIRMWARE("amdgpu/vegam_mec.bin");
170 MODULE_FIRMWARE("amdgpu/vegam_mec2.bin");
171 MODULE_FIRMWARE("amdgpu/vegam_rlc.bin");
172
173 static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] =
174 {
175         {mmGDS_VMID0_BASE, mmGDS_VMID0_SIZE, mmGDS_GWS_VMID0, mmGDS_OA_VMID0},
176         {mmGDS_VMID1_BASE, mmGDS_VMID1_SIZE, mmGDS_GWS_VMID1, mmGDS_OA_VMID1},
177         {mmGDS_VMID2_BASE, mmGDS_VMID2_SIZE, mmGDS_GWS_VMID2, mmGDS_OA_VMID2},
178         {mmGDS_VMID3_BASE, mmGDS_VMID3_SIZE, mmGDS_GWS_VMID3, mmGDS_OA_VMID3},
179         {mmGDS_VMID4_BASE, mmGDS_VMID4_SIZE, mmGDS_GWS_VMID4, mmGDS_OA_VMID4},
180         {mmGDS_VMID5_BASE, mmGDS_VMID5_SIZE, mmGDS_GWS_VMID5, mmGDS_OA_VMID5},
181         {mmGDS_VMID6_BASE, mmGDS_VMID6_SIZE, mmGDS_GWS_VMID6, mmGDS_OA_VMID6},
182         {mmGDS_VMID7_BASE, mmGDS_VMID7_SIZE, mmGDS_GWS_VMID7, mmGDS_OA_VMID7},
183         {mmGDS_VMID8_BASE, mmGDS_VMID8_SIZE, mmGDS_GWS_VMID8, mmGDS_OA_VMID8},
184         {mmGDS_VMID9_BASE, mmGDS_VMID9_SIZE, mmGDS_GWS_VMID9, mmGDS_OA_VMID9},
185         {mmGDS_VMID10_BASE, mmGDS_VMID10_SIZE, mmGDS_GWS_VMID10, mmGDS_OA_VMID10},
186         {mmGDS_VMID11_BASE, mmGDS_VMID11_SIZE, mmGDS_GWS_VMID11, mmGDS_OA_VMID11},
187         {mmGDS_VMID12_BASE, mmGDS_VMID12_SIZE, mmGDS_GWS_VMID12, mmGDS_OA_VMID12},
188         {mmGDS_VMID13_BASE, mmGDS_VMID13_SIZE, mmGDS_GWS_VMID13, mmGDS_OA_VMID13},
189         {mmGDS_VMID14_BASE, mmGDS_VMID14_SIZE, mmGDS_GWS_VMID14, mmGDS_OA_VMID14},
190         {mmGDS_VMID15_BASE, mmGDS_VMID15_SIZE, mmGDS_GWS_VMID15, mmGDS_OA_VMID15}
191 };
192
193 static const u32 golden_settings_tonga_a11[] =
194 {
195         mmCB_HW_CONTROL, 0xfffdf3cf, 0x00007208,
196         mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
197         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
198         mmGB_GPU_ID, 0x0000000f, 0x00000000,
199         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
200         mmPA_SC_FIFO_DEPTH_CNTL, 0x000003ff, 0x000000fc,
201         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
202         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
203         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
204         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
205         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
206         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
207         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000002fb,
208         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x0000543b,
209         mmTCP_CHAN_STEER_LO, 0xffffffff, 0xa9210876,
210         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
211 };
212
213 static const u32 tonga_golden_common_all[] =
214 {
215         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
216         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
217         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
218         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
219         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
220         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
221         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
222         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
223 };
224
225 static const u32 tonga_mgcg_cgcg_init[] =
226 {
227         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
228         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
229         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
230         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
231         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
232         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
233         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
234         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
235         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
236         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
237         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
238         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
239         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
240         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
241         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
242         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
243         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
244         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
245         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
246         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
247         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
248         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
249         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
250         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
251         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
252         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
253         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
254         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
255         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
256         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
257         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
258         mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
259         mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
260         mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
261         mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
262         mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
263         mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
264         mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
265         mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
266         mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
267         mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
268         mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
269         mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
270         mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
271         mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
272         mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
273         mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
274         mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
275         mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
276         mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
277         mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
278         mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
279         mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
280         mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
281         mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
282         mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
283         mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
284         mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
285         mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
286         mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
287         mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
288         mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
289         mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
290         mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
291         mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
292         mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
293         mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
294         mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
295         mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
296         mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
297         mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
298         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
299         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
300         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
301         mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
302 };
303
304 static const u32 golden_settings_vegam_a11[] =
305 {
306         mmCB_HW_CONTROL, 0x0001f3cf, 0x00007208,
307         mmCB_HW_CONTROL_2, 0x0f000000, 0x0d000000,
308         mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
309         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
310         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
311         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
312         mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x3a00161a,
313         mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002e,
314         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
315         mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
316         mmSQ_CONFIG, 0x07f80000, 0x01180000,
317         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
318         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
319         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7,
320         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
321         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x32761054,
322         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
323 };
324
325 static const u32 vegam_golden_common_all[] =
326 {
327         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
328         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
329         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
330         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
331         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
332         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
333 };
334
335 static const u32 golden_settings_polaris11_a11[] =
336 {
337         mmCB_HW_CONTROL, 0x0000f3cf, 0x00007208,
338         mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
339         mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
340         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
341         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
342         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
343         mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
344         mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
345         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
346         mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
347         mmSQ_CONFIG, 0x07f80000, 0x01180000,
348         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
349         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
350         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f3,
351         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
352         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003210,
353         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
354 };
355
356 static const u32 polaris11_golden_common_all[] =
357 {
358         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
359         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011002,
360         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
361         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
362         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
363         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
364 };
365
366 static const u32 golden_settings_polaris10_a11[] =
367 {
368         mmATC_MISC_CG, 0x000c0fc0, 0x000c0200,
369         mmCB_HW_CONTROL, 0x0001f3cf, 0x00007208,
370         mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
371         mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
372         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
373         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
374         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
375         mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
376         mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002a,
377         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
378         mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
379         mmSQ_CONFIG, 0x07f80000, 0x07180000,
380         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
381         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
382         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7,
383         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
384         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
385 };
386
387 static const u32 polaris10_golden_common_all[] =
388 {
389         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
390         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
391         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
392         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
393         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
394         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
395         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
396         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
397 };
398
399 static const u32 fiji_golden_common_all[] =
400 {
401         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
402         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x3a00161a,
403         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002e,
404         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
405         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
406         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
407         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
408         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
409         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
410         mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x00000009,
411 };
412
413 static const u32 golden_settings_fiji_a10[] =
414 {
415         mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
416         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
417         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
418         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
419         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
420         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
421         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
422         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
423         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
424         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000ff,
425         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
426 };
427
428 static const u32 fiji_mgcg_cgcg_init[] =
429 {
430         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
431         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
432         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
433         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
434         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
435         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
436         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
437         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
438         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
439         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
440         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
441         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
442         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
443         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
444         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
445         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
446         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
447         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
448         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
449         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
450         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
451         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
452         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
453         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
454         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
455         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
456         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
457         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
458         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
459         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
460         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
461         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
462         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
463         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
464         mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
465 };
466
467 static const u32 golden_settings_iceland_a11[] =
468 {
469         mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
470         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
471         mmDB_DEBUG3, 0xc0000000, 0xc0000000,
472         mmGB_GPU_ID, 0x0000000f, 0x00000000,
473         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
474         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
475         mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x00000002,
476         mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
477         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
478         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
479         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
480         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
481         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
482         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f1,
483         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
484         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010,
485 };
486
487 static const u32 iceland_golden_common_all[] =
488 {
489         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
490         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
491         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
492         mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
493         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
494         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
495         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
496         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
497 };
498
499 static const u32 iceland_mgcg_cgcg_init[] =
500 {
501         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
502         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
503         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
504         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
505         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0xc0000100,
506         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0xc0000100,
507         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0xc0000100,
508         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
509         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
510         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
511         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
512         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
513         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
514         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
515         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
516         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
517         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
518         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
519         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
520         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
521         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
522         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
523         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0xff000100,
524         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
525         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
526         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
527         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
528         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
529         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
530         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
531         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
532         mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
533         mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
534         mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
535         mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
536         mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
537         mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
538         mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
539         mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
540         mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
541         mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
542         mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
543         mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
544         mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
545         mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
546         mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
547         mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
548         mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
549         mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
550         mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
551         mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
552         mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
553         mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
554         mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
555         mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
556         mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
557         mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
558         mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
559         mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
560         mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
561         mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
562         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
563         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
564         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
565 };
566
567 static const u32 cz_golden_settings_a11[] =
568 {
569         mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
570         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
571         mmGB_GPU_ID, 0x0000000f, 0x00000000,
572         mmPA_SC_ENHANCE, 0xffffffff, 0x00000001,
573         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
574         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
575         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
576         mmTA_CNTL_AUX, 0x000f000f, 0x00010000,
577         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
578         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
579         mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f3,
580         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00001302
581 };
582
583 static const u32 cz_golden_common_all[] =
584 {
585         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
586         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
587         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
588         mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
589         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
590         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
591         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
592         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
593 };
594
595 static const u32 cz_mgcg_cgcg_init[] =
596 {
597         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
598         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
599         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
600         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
601         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
602         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
603         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x00000100,
604         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
605         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
606         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
607         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
608         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
609         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
610         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
611         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
612         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
613         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
614         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
615         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
616         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
617         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
618         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
619         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
620         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
621         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
622         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
623         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
624         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
625         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
626         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
627         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
628         mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
629         mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
630         mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
631         mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
632         mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
633         mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
634         mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
635         mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
636         mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
637         mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
638         mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
639         mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
640         mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
641         mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
642         mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
643         mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
644         mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
645         mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
646         mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
647         mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
648         mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
649         mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
650         mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
651         mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
652         mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
653         mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
654         mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
655         mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
656         mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
657         mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
658         mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
659         mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
660         mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
661         mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
662         mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
663         mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
664         mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
665         mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
666         mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
667         mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
668         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
669         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
670         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
671         mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
672 };
673
674 static const u32 stoney_golden_settings_a11[] =
675 {
676         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
677         mmGB_GPU_ID, 0x0000000f, 0x00000000,
678         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
679         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
680         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
681         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
682         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
683         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
684         mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f1,
685         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x10101010,
686 };
687
688 static const u32 stoney_golden_common_all[] =
689 {
690         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
691         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000000,
692         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
693         mmGB_ADDR_CONFIG, 0xffffffff, 0x12010001,
694         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
695         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
696         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
697         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
698 };
699
700 static const u32 stoney_mgcg_cgcg_init[] =
701 {
702         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
703         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
704         mmCP_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
705         mmRLC_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
706         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96940200,
707 };
708
709
710 static const char * const sq_edc_source_names[] = {
711         "SQ_EDC_INFO_SOURCE_INVALID: No EDC error has occurred",
712         "SQ_EDC_INFO_SOURCE_INST: EDC source is Instruction Fetch",
713         "SQ_EDC_INFO_SOURCE_SGPR: EDC source is SGPR or SQC data return",
714         "SQ_EDC_INFO_SOURCE_VGPR: EDC source is VGPR",
715         "SQ_EDC_INFO_SOURCE_LDS: EDC source is LDS",
716         "SQ_EDC_INFO_SOURCE_GDS: EDC source is GDS",
717         "SQ_EDC_INFO_SOURCE_TA: EDC source is TA",
718 };
719
720 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev);
721 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev);
722 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev);
723 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev);
724 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev);
725 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev);
726 static void gfx_v8_0_ring_emit_ce_meta(struct amdgpu_ring *ring);
727 static void gfx_v8_0_ring_emit_de_meta(struct amdgpu_ring *ring);
728
729 static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev)
730 {
731         switch (adev->asic_type) {
732         case CHIP_TOPAZ:
733                 amdgpu_device_program_register_sequence(adev,
734                                                         iceland_mgcg_cgcg_init,
735                                                         ARRAY_SIZE(iceland_mgcg_cgcg_init));
736                 amdgpu_device_program_register_sequence(adev,
737                                                         golden_settings_iceland_a11,
738                                                         ARRAY_SIZE(golden_settings_iceland_a11));
739                 amdgpu_device_program_register_sequence(adev,
740                                                         iceland_golden_common_all,
741                                                         ARRAY_SIZE(iceland_golden_common_all));
742                 break;
743         case CHIP_FIJI:
744                 amdgpu_device_program_register_sequence(adev,
745                                                         fiji_mgcg_cgcg_init,
746                                                         ARRAY_SIZE(fiji_mgcg_cgcg_init));
747                 amdgpu_device_program_register_sequence(adev,
748                                                         golden_settings_fiji_a10,
749                                                         ARRAY_SIZE(golden_settings_fiji_a10));
750                 amdgpu_device_program_register_sequence(adev,
751                                                         fiji_golden_common_all,
752                                                         ARRAY_SIZE(fiji_golden_common_all));
753                 break;
754
755         case CHIP_TONGA:
756                 amdgpu_device_program_register_sequence(adev,
757                                                         tonga_mgcg_cgcg_init,
758                                                         ARRAY_SIZE(tonga_mgcg_cgcg_init));
759                 amdgpu_device_program_register_sequence(adev,
760                                                         golden_settings_tonga_a11,
761                                                         ARRAY_SIZE(golden_settings_tonga_a11));
762                 amdgpu_device_program_register_sequence(adev,
763                                                         tonga_golden_common_all,
764                                                         ARRAY_SIZE(tonga_golden_common_all));
765                 break;
766         case CHIP_VEGAM:
767                 amdgpu_device_program_register_sequence(adev,
768                                                         golden_settings_vegam_a11,
769                                                         ARRAY_SIZE(golden_settings_vegam_a11));
770                 amdgpu_device_program_register_sequence(adev,
771                                                         vegam_golden_common_all,
772                                                         ARRAY_SIZE(vegam_golden_common_all));
773                 break;
774         case CHIP_POLARIS11:
775         case CHIP_POLARIS12:
776                 amdgpu_device_program_register_sequence(adev,
777                                                         golden_settings_polaris11_a11,
778                                                         ARRAY_SIZE(golden_settings_polaris11_a11));
779                 amdgpu_device_program_register_sequence(adev,
780                                                         polaris11_golden_common_all,
781                                                         ARRAY_SIZE(polaris11_golden_common_all));
782                 break;
783         case CHIP_POLARIS10:
784                 amdgpu_device_program_register_sequence(adev,
785                                                         golden_settings_polaris10_a11,
786                                                         ARRAY_SIZE(golden_settings_polaris10_a11));
787                 amdgpu_device_program_register_sequence(adev,
788                                                         polaris10_golden_common_all,
789                                                         ARRAY_SIZE(polaris10_golden_common_all));
790                 WREG32_SMC(ixCG_ACLK_CNTL, 0x0000001C);
791                 if (adev->pdev->revision == 0xc7 &&
792                     ((adev->pdev->subsystem_device == 0xb37 && adev->pdev->subsystem_vendor == 0x1002) ||
793                      (adev->pdev->subsystem_device == 0x4a8 && adev->pdev->subsystem_vendor == 0x1043) ||
794                      (adev->pdev->subsystem_device == 0x9480 && adev->pdev->subsystem_vendor == 0x1682))) {
795                         amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1E, 0xDD);
796                         amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1F, 0xD0);
797                 }
798                 break;
799         case CHIP_CARRIZO:
800                 amdgpu_device_program_register_sequence(adev,
801                                                         cz_mgcg_cgcg_init,
802                                                         ARRAY_SIZE(cz_mgcg_cgcg_init));
803                 amdgpu_device_program_register_sequence(adev,
804                                                         cz_golden_settings_a11,
805                                                         ARRAY_SIZE(cz_golden_settings_a11));
806                 amdgpu_device_program_register_sequence(adev,
807                                                         cz_golden_common_all,
808                                                         ARRAY_SIZE(cz_golden_common_all));
809                 break;
810         case CHIP_STONEY:
811                 amdgpu_device_program_register_sequence(adev,
812                                                         stoney_mgcg_cgcg_init,
813                                                         ARRAY_SIZE(stoney_mgcg_cgcg_init));
814                 amdgpu_device_program_register_sequence(adev,
815                                                         stoney_golden_settings_a11,
816                                                         ARRAY_SIZE(stoney_golden_settings_a11));
817                 amdgpu_device_program_register_sequence(adev,
818                                                         stoney_golden_common_all,
819                                                         ARRAY_SIZE(stoney_golden_common_all));
820                 break;
821         default:
822                 break;
823         }
824 }
825
826 static void gfx_v8_0_scratch_init(struct amdgpu_device *adev)
827 {
828         adev->gfx.scratch.num_reg = 8;
829         adev->gfx.scratch.reg_base = mmSCRATCH_REG0;
830         adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1;
831 }
832
833 static int gfx_v8_0_ring_test_ring(struct amdgpu_ring *ring)
834 {
835         struct amdgpu_device *adev = ring->adev;
836         uint32_t scratch;
837         uint32_t tmp = 0;
838         unsigned i;
839         int r;
840
841         r = amdgpu_gfx_scratch_get(adev, &scratch);
842         if (r)
843                 return r;
844
845         WREG32(scratch, 0xCAFEDEAD);
846         r = amdgpu_ring_alloc(ring, 3);
847         if (r)
848                 goto error_free_scratch;
849
850         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
851         amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
852         amdgpu_ring_write(ring, 0xDEADBEEF);
853         amdgpu_ring_commit(ring);
854
855         for (i = 0; i < adev->usec_timeout; i++) {
856                 tmp = RREG32(scratch);
857                 if (tmp == 0xDEADBEEF)
858                         break;
859                 DRM_UDELAY(1);
860         }
861
862         if (i >= adev->usec_timeout)
863                 r = -ETIMEDOUT;
864
865 error_free_scratch:
866         amdgpu_gfx_scratch_free(adev, scratch);
867         return r;
868 }
869
870 static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
871 {
872         struct amdgpu_device *adev = ring->adev;
873         struct amdgpu_ib ib;
874         struct dma_fence *f = NULL;
875
876         unsigned int index;
877         uint64_t gpu_addr;
878         uint32_t tmp;
879         long r;
880
881         r = amdgpu_device_wb_get(adev, &index);
882         if (r)
883                 return r;
884
885         gpu_addr = adev->wb.gpu_addr + (index * 4);
886         adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
887         memset(&ib, 0, sizeof(ib));
888         r = amdgpu_ib_get(adev, NULL, 16, &ib);
889         if (r)
890                 goto err1;
891
892         ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3);
893         ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
894         ib.ptr[2] = lower_32_bits(gpu_addr);
895         ib.ptr[3] = upper_32_bits(gpu_addr);
896         ib.ptr[4] = 0xDEADBEEF;
897         ib.length_dw = 5;
898
899         r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
900         if (r)
901                 goto err2;
902
903         r = dma_fence_wait_timeout(f, false, timeout);
904         if (r == 0) {
905                 r = -ETIMEDOUT;
906                 goto err2;
907         } else if (r < 0) {
908                 goto err2;
909         }
910
911         tmp = adev->wb.wb[index];
912         if (tmp == 0xDEADBEEF)
913                 r = 0;
914         else
915                 r = -EINVAL;
916
917 err2:
918         amdgpu_ib_free(adev, &ib, NULL);
919         dma_fence_put(f);
920 err1:
921         amdgpu_device_wb_free(adev, index);
922         return r;
923 }
924
925
926 static void gfx_v8_0_free_microcode(struct amdgpu_device *adev)
927 {
928         release_firmware(adev->gfx.pfp_fw);
929         adev->gfx.pfp_fw = NULL;
930         release_firmware(adev->gfx.me_fw);
931         adev->gfx.me_fw = NULL;
932         release_firmware(adev->gfx.ce_fw);
933         adev->gfx.ce_fw = NULL;
934         release_firmware(adev->gfx.rlc_fw);
935         adev->gfx.rlc_fw = NULL;
936         release_firmware(adev->gfx.mec_fw);
937         adev->gfx.mec_fw = NULL;
938         if ((adev->asic_type != CHIP_STONEY) &&
939             (adev->asic_type != CHIP_TOPAZ))
940                 release_firmware(adev->gfx.mec2_fw);
941         adev->gfx.mec2_fw = NULL;
942
943         kfree(adev->gfx.rlc.register_list_format);
944 }
945
946 static int gfx_v8_0_init_microcode(struct amdgpu_device *adev)
947 {
948         const char *chip_name;
949         char fw_name[30];
950         int err;
951         struct amdgpu_firmware_info *info = NULL;
952         const struct common_firmware_header *header = NULL;
953         const struct gfx_firmware_header_v1_0 *cp_hdr;
954         const struct rlc_firmware_header_v2_0 *rlc_hdr;
955         unsigned int *tmp = NULL, i;
956
957         DRM_DEBUG("\n");
958
959         switch (adev->asic_type) {
960         case CHIP_TOPAZ:
961                 chip_name = "topaz";
962                 break;
963         case CHIP_TONGA:
964                 chip_name = "tonga";
965                 break;
966         case CHIP_CARRIZO:
967                 chip_name = "carrizo";
968                 break;
969         case CHIP_FIJI:
970                 chip_name = "fiji";
971                 break;
972         case CHIP_STONEY:
973                 chip_name = "stoney";
974                 break;
975         case CHIP_POLARIS10:
976                 chip_name = "polaris10";
977                 break;
978         case CHIP_POLARIS11:
979                 chip_name = "polaris11";
980                 break;
981         case CHIP_POLARIS12:
982                 chip_name = "polaris12";
983                 break;
984         case CHIP_VEGAM:
985                 chip_name = "vegam";
986                 break;
987         default:
988                 BUG();
989         }
990
991         if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
992                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp_2.bin", chip_name);
993                 err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
994                 if (err == -ENOENT) {
995                         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
996                         err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
997                 }
998         } else {
999                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
1000                 err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
1001         }
1002         if (err)
1003                 goto out;
1004         err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
1005         if (err)
1006                 goto out;
1007         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
1008         adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1009         adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1010
1011         if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1012                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me_2.bin", chip_name);
1013                 err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
1014                 if (err == -ENOENT) {
1015                         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
1016                         err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
1017                 }
1018         } else {
1019                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
1020                 err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
1021         }
1022         if (err)
1023                 goto out;
1024         err = amdgpu_ucode_validate(adev->gfx.me_fw);
1025         if (err)
1026                 goto out;
1027         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
1028         adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1029
1030         adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1031
1032         if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1033                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce_2.bin", chip_name);
1034                 err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
1035                 if (err == -ENOENT) {
1036                         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
1037                         err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
1038                 }
1039         } else {
1040                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
1041                 err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
1042         }
1043         if (err)
1044                 goto out;
1045         err = amdgpu_ucode_validate(adev->gfx.ce_fw);
1046         if (err)
1047                 goto out;
1048         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
1049         adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1050         adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1051
1052         /*
1053          * Support for MCBP/Virtualization in combination with chained IBs is
1054          * formal released on feature version #46
1055          */
1056         if (adev->gfx.ce_feature_version >= 46 &&
1057             adev->gfx.pfp_feature_version >= 46) {
1058                 adev->virt.chained_ib_support = true;
1059                 DRM_INFO("Chained IB support enabled!\n");
1060         } else
1061                 adev->virt.chained_ib_support = false;
1062
1063         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
1064         err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
1065         if (err)
1066                 goto out;
1067         err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
1068         rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
1069         adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
1070         adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
1071
1072         adev->gfx.rlc.save_and_restore_offset =
1073                         le32_to_cpu(rlc_hdr->save_and_restore_offset);
1074         adev->gfx.rlc.clear_state_descriptor_offset =
1075                         le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
1076         adev->gfx.rlc.avail_scratch_ram_locations =
1077                         le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
1078         adev->gfx.rlc.reg_restore_list_size =
1079                         le32_to_cpu(rlc_hdr->reg_restore_list_size);
1080         adev->gfx.rlc.reg_list_format_start =
1081                         le32_to_cpu(rlc_hdr->reg_list_format_start);
1082         adev->gfx.rlc.reg_list_format_separate_start =
1083                         le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
1084         adev->gfx.rlc.starting_offsets_start =
1085                         le32_to_cpu(rlc_hdr->starting_offsets_start);
1086         adev->gfx.rlc.reg_list_format_size_bytes =
1087                         le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
1088         adev->gfx.rlc.reg_list_size_bytes =
1089                         le32_to_cpu(rlc_hdr->reg_list_size_bytes);
1090
1091         adev->gfx.rlc.register_list_format =
1092                         kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
1093                                         adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
1094
1095         if (!adev->gfx.rlc.register_list_format) {
1096                 err = -ENOMEM;
1097                 goto out;
1098         }
1099
1100         tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1101                         le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
1102         for (i = 0 ; i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2); i++)
1103                 adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]);
1104
1105         adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
1106
1107         tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1108                         le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
1109         for (i = 0 ; i < (adev->gfx.rlc.reg_list_size_bytes >> 2); i++)
1110                 adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
1111
1112         if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1113                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec_2.bin", chip_name);
1114                 err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1115                 if (err == -ENOENT) {
1116                         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
1117                         err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1118                 }
1119         } else {
1120                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
1121                 err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1122         }
1123         if (err)
1124                 goto out;
1125         err = amdgpu_ucode_validate(adev->gfx.mec_fw);
1126         if (err)
1127                 goto out;
1128         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1129         adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1130         adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1131
1132         if ((adev->asic_type != CHIP_STONEY) &&
1133             (adev->asic_type != CHIP_TOPAZ)) {
1134                 if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1135                         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2_2.bin", chip_name);
1136                         err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1137                         if (err == -ENOENT) {
1138                                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1139                                 err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1140                         }
1141                 } else {
1142                         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1143                         err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1144                 }
1145                 if (!err) {
1146                         err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
1147                         if (err)
1148                                 goto out;
1149                         cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1150                                 adev->gfx.mec2_fw->data;
1151                         adev->gfx.mec2_fw_version =
1152                                 le32_to_cpu(cp_hdr->header.ucode_version);
1153                         adev->gfx.mec2_feature_version =
1154                                 le32_to_cpu(cp_hdr->ucode_feature_version);
1155                 } else {
1156                         err = 0;
1157                         adev->gfx.mec2_fw = NULL;
1158                 }
1159         }
1160
1161         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
1162         info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
1163         info->fw = adev->gfx.pfp_fw;
1164         header = (const struct common_firmware_header *)info->fw->data;
1165         adev->firmware.fw_size +=
1166                 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1167
1168         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
1169         info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
1170         info->fw = adev->gfx.me_fw;
1171         header = (const struct common_firmware_header *)info->fw->data;
1172         adev->firmware.fw_size +=
1173                 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1174
1175         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
1176         info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
1177         info->fw = adev->gfx.ce_fw;
1178         header = (const struct common_firmware_header *)info->fw->data;
1179         adev->firmware.fw_size +=
1180                 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1181
1182         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
1183         info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
1184         info->fw = adev->gfx.rlc_fw;
1185         header = (const struct common_firmware_header *)info->fw->data;
1186         adev->firmware.fw_size +=
1187                 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1188
1189         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
1190         info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
1191         info->fw = adev->gfx.mec_fw;
1192         header = (const struct common_firmware_header *)info->fw->data;
1193         adev->firmware.fw_size +=
1194                 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1195
1196         /* we need account JT in */
1197         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1198         adev->firmware.fw_size +=
1199                 ALIGN(le32_to_cpu(cp_hdr->jt_size) << 2, PAGE_SIZE);
1200
1201         if (amdgpu_sriov_vf(adev)) {
1202                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_STORAGE];
1203                 info->ucode_id = AMDGPU_UCODE_ID_STORAGE;
1204                 info->fw = adev->gfx.mec_fw;
1205                 adev->firmware.fw_size +=
1206                         ALIGN(le32_to_cpu(64 * PAGE_SIZE), PAGE_SIZE);
1207         }
1208
1209         if (adev->gfx.mec2_fw) {
1210                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
1211                 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
1212                 info->fw = adev->gfx.mec2_fw;
1213                 header = (const struct common_firmware_header *)info->fw->data;
1214                 adev->firmware.fw_size +=
1215                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1216         }
1217
1218 out:
1219         if (err) {
1220                 dev_err(adev->dev,
1221                         "gfx8: Failed to load firmware \"%s\"\n",
1222                         fw_name);
1223                 release_firmware(adev->gfx.pfp_fw);
1224                 adev->gfx.pfp_fw = NULL;
1225                 release_firmware(adev->gfx.me_fw);
1226                 adev->gfx.me_fw = NULL;
1227                 release_firmware(adev->gfx.ce_fw);
1228                 adev->gfx.ce_fw = NULL;
1229                 release_firmware(adev->gfx.rlc_fw);
1230                 adev->gfx.rlc_fw = NULL;
1231                 release_firmware(adev->gfx.mec_fw);
1232                 adev->gfx.mec_fw = NULL;
1233                 release_firmware(adev->gfx.mec2_fw);
1234                 adev->gfx.mec2_fw = NULL;
1235         }
1236         return err;
1237 }
1238
1239 static void gfx_v8_0_get_csb_buffer(struct amdgpu_device *adev,
1240                                     volatile u32 *buffer)
1241 {
1242         u32 count = 0, i;
1243         const struct cs_section_def *sect = NULL;
1244         const struct cs_extent_def *ext = NULL;
1245
1246         if (adev->gfx.rlc.cs_data == NULL)
1247                 return;
1248         if (buffer == NULL)
1249                 return;
1250
1251         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1252         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1253
1254         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
1255         buffer[count++] = cpu_to_le32(0x80000000);
1256         buffer[count++] = cpu_to_le32(0x80000000);
1257
1258         for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
1259                 for (ext = sect->section; ext->extent != NULL; ++ext) {
1260                         if (sect->id == SECT_CONTEXT) {
1261                                 buffer[count++] =
1262                                         cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
1263                                 buffer[count++] = cpu_to_le32(ext->reg_index -
1264                                                 PACKET3_SET_CONTEXT_REG_START);
1265                                 for (i = 0; i < ext->reg_count; i++)
1266                                         buffer[count++] = cpu_to_le32(ext->extent[i]);
1267                         } else {
1268                                 return;
1269                         }
1270                 }
1271         }
1272
1273         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
1274         buffer[count++] = cpu_to_le32(mmPA_SC_RASTER_CONFIG -
1275                         PACKET3_SET_CONTEXT_REG_START);
1276         buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config);
1277         buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config_1);
1278
1279         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1280         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
1281
1282         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
1283         buffer[count++] = cpu_to_le32(0);
1284 }
1285
1286 static int gfx_v8_0_cp_jump_table_num(struct amdgpu_device *adev)
1287 {
1288         if (adev->asic_type == CHIP_CARRIZO)
1289                 return 5;
1290         else
1291                 return 4;
1292 }
1293
1294 static int gfx_v8_0_rlc_init(struct amdgpu_device *adev)
1295 {
1296         const struct cs_section_def *cs_data;
1297         int r;
1298
1299         adev->gfx.rlc.cs_data = vi_cs_data;
1300
1301         cs_data = adev->gfx.rlc.cs_data;
1302
1303         if (cs_data) {
1304                 /* init clear state block */
1305                 r = amdgpu_gfx_rlc_init_csb(adev);
1306                 if (r)
1307                         return r;
1308         }
1309
1310         if ((adev->asic_type == CHIP_CARRIZO) ||
1311             (adev->asic_type == CHIP_STONEY)) {
1312                 adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
1313                 r = amdgpu_gfx_rlc_init_cpt(adev);
1314                 if (r)
1315                         return r;
1316         }
1317
1318         return 0;
1319 }
1320
1321 static void gfx_v8_0_mec_fini(struct amdgpu_device *adev)
1322 {
1323         amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
1324 }
1325
1326 static int gfx_v8_0_mec_init(struct amdgpu_device *adev)
1327 {
1328         int r;
1329         u32 *hpd;
1330         size_t mec_hpd_size;
1331
1332         bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
1333
1334         /* take ownership of the relevant compute queues */
1335         amdgpu_gfx_compute_queue_acquire(adev);
1336
1337         mec_hpd_size = adev->gfx.num_compute_rings * GFX8_MEC_HPD_SIZE;
1338
1339         r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
1340                                       AMDGPU_GEM_DOMAIN_VRAM,
1341                                       &adev->gfx.mec.hpd_eop_obj,
1342                                       &adev->gfx.mec.hpd_eop_gpu_addr,
1343                                       (void **)&hpd);
1344         if (r) {
1345                 dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
1346                 return r;
1347         }
1348
1349         memset(hpd, 0, mec_hpd_size);
1350
1351         amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
1352         amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1353
1354         return 0;
1355 }
1356
1357 static const u32 vgpr_init_compute_shader[] =
1358 {
1359         0x7e000209, 0x7e020208,
1360         0x7e040207, 0x7e060206,
1361         0x7e080205, 0x7e0a0204,
1362         0x7e0c0203, 0x7e0e0202,
1363         0x7e100201, 0x7e120200,
1364         0x7e140209, 0x7e160208,
1365         0x7e180207, 0x7e1a0206,
1366         0x7e1c0205, 0x7e1e0204,
1367         0x7e200203, 0x7e220202,
1368         0x7e240201, 0x7e260200,
1369         0x7e280209, 0x7e2a0208,
1370         0x7e2c0207, 0x7e2e0206,
1371         0x7e300205, 0x7e320204,
1372         0x7e340203, 0x7e360202,
1373         0x7e380201, 0x7e3a0200,
1374         0x7e3c0209, 0x7e3e0208,
1375         0x7e400207, 0x7e420206,
1376         0x7e440205, 0x7e460204,
1377         0x7e480203, 0x7e4a0202,
1378         0x7e4c0201, 0x7e4e0200,
1379         0x7e500209, 0x7e520208,
1380         0x7e540207, 0x7e560206,
1381         0x7e580205, 0x7e5a0204,
1382         0x7e5c0203, 0x7e5e0202,
1383         0x7e600201, 0x7e620200,
1384         0x7e640209, 0x7e660208,
1385         0x7e680207, 0x7e6a0206,
1386         0x7e6c0205, 0x7e6e0204,
1387         0x7e700203, 0x7e720202,
1388         0x7e740201, 0x7e760200,
1389         0x7e780209, 0x7e7a0208,
1390         0x7e7c0207, 0x7e7e0206,
1391         0xbf8a0000, 0xbf810000,
1392 };
1393
1394 static const u32 sgpr_init_compute_shader[] =
1395 {
1396         0xbe8a0100, 0xbe8c0102,
1397         0xbe8e0104, 0xbe900106,
1398         0xbe920108, 0xbe940100,
1399         0xbe960102, 0xbe980104,
1400         0xbe9a0106, 0xbe9c0108,
1401         0xbe9e0100, 0xbea00102,
1402         0xbea20104, 0xbea40106,
1403         0xbea60108, 0xbea80100,
1404         0xbeaa0102, 0xbeac0104,
1405         0xbeae0106, 0xbeb00108,
1406         0xbeb20100, 0xbeb40102,
1407         0xbeb60104, 0xbeb80106,
1408         0xbeba0108, 0xbebc0100,
1409         0xbebe0102, 0xbec00104,
1410         0xbec20106, 0xbec40108,
1411         0xbec60100, 0xbec80102,
1412         0xbee60004, 0xbee70005,
1413         0xbeea0006, 0xbeeb0007,
1414         0xbee80008, 0xbee90009,
1415         0xbefc0000, 0xbf8a0000,
1416         0xbf810000, 0x00000000,
1417 };
1418
1419 static const u32 vgpr_init_regs[] =
1420 {
1421         mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xffffffff,
1422         mmCOMPUTE_RESOURCE_LIMITS, 0x1000000, /* CU_GROUP_COUNT=1 */
1423         mmCOMPUTE_NUM_THREAD_X, 256*4,
1424         mmCOMPUTE_NUM_THREAD_Y, 1,
1425         mmCOMPUTE_NUM_THREAD_Z, 1,
1426         mmCOMPUTE_PGM_RSRC1, 0x100004f, /* VGPRS=15 (64 logical VGPRs), SGPRS=1 (16 SGPRs), BULKY=1 */
1427         mmCOMPUTE_PGM_RSRC2, 20,
1428         mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1429         mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1430         mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1431         mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1432         mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1433         mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1434         mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1435         mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1436         mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1437         mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1438 };
1439
1440 static const u32 sgpr1_init_regs[] =
1441 {
1442         mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0x0f,
1443         mmCOMPUTE_RESOURCE_LIMITS, 0x1000000, /* CU_GROUP_COUNT=1 */
1444         mmCOMPUTE_NUM_THREAD_X, 256*5,
1445         mmCOMPUTE_NUM_THREAD_Y, 1,
1446         mmCOMPUTE_NUM_THREAD_Z, 1,
1447         mmCOMPUTE_PGM_RSRC1, 0x240, /* SGPRS=9 (80 GPRS) */
1448         mmCOMPUTE_PGM_RSRC2, 20,
1449         mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1450         mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1451         mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1452         mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1453         mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1454         mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1455         mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1456         mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1457         mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1458         mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1459 };
1460
1461 static const u32 sgpr2_init_regs[] =
1462 {
1463         mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xf0,
1464         mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1465         mmCOMPUTE_NUM_THREAD_X, 256*5,
1466         mmCOMPUTE_NUM_THREAD_Y, 1,
1467         mmCOMPUTE_NUM_THREAD_Z, 1,
1468         mmCOMPUTE_PGM_RSRC1, 0x240, /* SGPRS=9 (80 GPRS) */
1469         mmCOMPUTE_PGM_RSRC2, 20,
1470         mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1471         mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1472         mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1473         mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1474         mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1475         mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1476         mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1477         mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1478         mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1479         mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1480 };
1481
1482 static const u32 sec_ded_counter_registers[] =
1483 {
1484         mmCPC_EDC_ATC_CNT,
1485         mmCPC_EDC_SCRATCH_CNT,
1486         mmCPC_EDC_UCODE_CNT,
1487         mmCPF_EDC_ATC_CNT,
1488         mmCPF_EDC_ROQ_CNT,
1489         mmCPF_EDC_TAG_CNT,
1490         mmCPG_EDC_ATC_CNT,
1491         mmCPG_EDC_DMA_CNT,
1492         mmCPG_EDC_TAG_CNT,
1493         mmDC_EDC_CSINVOC_CNT,
1494         mmDC_EDC_RESTORE_CNT,
1495         mmDC_EDC_STATE_CNT,
1496         mmGDS_EDC_CNT,
1497         mmGDS_EDC_GRBM_CNT,
1498         mmGDS_EDC_OA_DED,
1499         mmSPI_EDC_CNT,
1500         mmSQC_ATC_EDC_GATCL1_CNT,
1501         mmSQC_EDC_CNT,
1502         mmSQ_EDC_DED_CNT,
1503         mmSQ_EDC_INFO,
1504         mmSQ_EDC_SEC_CNT,
1505         mmTCC_EDC_CNT,
1506         mmTCP_ATC_EDC_GATCL1_CNT,
1507         mmTCP_EDC_CNT,
1508         mmTD_EDC_CNT
1509 };
1510
1511 static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
1512 {
1513         struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
1514         struct amdgpu_ib ib;
1515         struct dma_fence *f = NULL;
1516         int r, i;
1517         u32 tmp;
1518         unsigned total_size, vgpr_offset, sgpr_offset;
1519         u64 gpu_addr;
1520
1521         /* only supported on CZ */
1522         if (adev->asic_type != CHIP_CARRIZO)
1523                 return 0;
1524
1525         /* bail if the compute ring is not ready */
1526         if (!ring->sched.ready)
1527                 return 0;
1528
1529         tmp = RREG32(mmGB_EDC_MODE);
1530         WREG32(mmGB_EDC_MODE, 0);
1531
1532         total_size =
1533                 (((ARRAY_SIZE(vgpr_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1534         total_size +=
1535                 (((ARRAY_SIZE(sgpr1_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1536         total_size +=
1537                 (((ARRAY_SIZE(sgpr2_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1538         total_size = ALIGN(total_size, 256);
1539         vgpr_offset = total_size;
1540         total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256);
1541         sgpr_offset = total_size;
1542         total_size += sizeof(sgpr_init_compute_shader);
1543
1544         /* allocate an indirect buffer to put the commands in */
1545         memset(&ib, 0, sizeof(ib));
1546         r = amdgpu_ib_get(adev, NULL, total_size, &ib);
1547         if (r) {
1548                 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
1549                 return r;
1550         }
1551
1552         /* load the compute shaders */
1553         for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++)
1554                 ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i];
1555
1556         for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
1557                 ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
1558
1559         /* init the ib length to 0 */
1560         ib.length_dw = 0;
1561
1562         /* VGPR */
1563         /* write the register state for the compute dispatch */
1564         for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i += 2) {
1565                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1566                 ib.ptr[ib.length_dw++] = vgpr_init_regs[i] - PACKET3_SET_SH_REG_START;
1567                 ib.ptr[ib.length_dw++] = vgpr_init_regs[i + 1];
1568         }
1569         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1570         gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
1571         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1572         ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1573         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1574         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1575
1576         /* write dispatch packet */
1577         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1578         ib.ptr[ib.length_dw++] = 8; /* x */
1579         ib.ptr[ib.length_dw++] = 1; /* y */
1580         ib.ptr[ib.length_dw++] = 1; /* z */
1581         ib.ptr[ib.length_dw++] =
1582                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1583
1584         /* write CS partial flush packet */
1585         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1586         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1587
1588         /* SGPR1 */
1589         /* write the register state for the compute dispatch */
1590         for (i = 0; i < ARRAY_SIZE(sgpr1_init_regs); i += 2) {
1591                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1592                 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i] - PACKET3_SET_SH_REG_START;
1593                 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i + 1];
1594         }
1595         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1596         gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1597         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1598         ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1599         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1600         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1601
1602         /* write dispatch packet */
1603         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1604         ib.ptr[ib.length_dw++] = 8; /* x */
1605         ib.ptr[ib.length_dw++] = 1; /* y */
1606         ib.ptr[ib.length_dw++] = 1; /* z */
1607         ib.ptr[ib.length_dw++] =
1608                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1609
1610         /* write CS partial flush packet */
1611         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1612         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1613
1614         /* SGPR2 */
1615         /* write the register state for the compute dispatch */
1616         for (i = 0; i < ARRAY_SIZE(sgpr2_init_regs); i += 2) {
1617                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1618                 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i] - PACKET3_SET_SH_REG_START;
1619                 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i + 1];
1620         }
1621         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1622         gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1623         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1624         ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1625         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1626         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1627
1628         /* write dispatch packet */
1629         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1630         ib.ptr[ib.length_dw++] = 8; /* x */
1631         ib.ptr[ib.length_dw++] = 1; /* y */
1632         ib.ptr[ib.length_dw++] = 1; /* z */
1633         ib.ptr[ib.length_dw++] =
1634                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1635
1636         /* write CS partial flush packet */
1637         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1638         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1639
1640         /* shedule the ib on the ring */
1641         r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
1642         if (r) {
1643                 DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
1644                 goto fail;
1645         }
1646
1647         /* wait for the GPU to finish processing the IB */
1648         r = dma_fence_wait(f, false);
1649         if (r) {
1650                 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
1651                 goto fail;
1652         }
1653
1654         tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, DED_MODE, 2);
1655         tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, PROP_FED, 1);
1656         WREG32(mmGB_EDC_MODE, tmp);
1657
1658         tmp = RREG32(mmCC_GC_EDC_CONFIG);
1659         tmp = REG_SET_FIELD(tmp, CC_GC_EDC_CONFIG, DIS_EDC, 0) | 1;
1660         WREG32(mmCC_GC_EDC_CONFIG, tmp);
1661
1662
1663         /* read back registers to clear the counters */
1664         for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++)
1665                 RREG32(sec_ded_counter_registers[i]);
1666
1667 fail:
1668         amdgpu_ib_free(adev, &ib, NULL);
1669         dma_fence_put(f);
1670
1671         return r;
1672 }
1673
1674 static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev)
1675 {
1676         u32 gb_addr_config;
1677         u32 mc_shared_chmap, mc_arb_ramcfg;
1678         u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map;
1679         u32 tmp;
1680         int ret;
1681
1682         switch (adev->asic_type) {
1683         case CHIP_TOPAZ:
1684                 adev->gfx.config.max_shader_engines = 1;
1685                 adev->gfx.config.max_tile_pipes = 2;
1686                 adev->gfx.config.max_cu_per_sh = 6;
1687                 adev->gfx.config.max_sh_per_se = 1;
1688                 adev->gfx.config.max_backends_per_se = 2;
1689                 adev->gfx.config.max_texture_channel_caches = 2;
1690                 adev->gfx.config.max_gprs = 256;
1691                 adev->gfx.config.max_gs_threads = 32;
1692                 adev->gfx.config.max_hw_contexts = 8;
1693
1694                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1695                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1696                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1697                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1698                 gb_addr_config = TOPAZ_GB_ADDR_CONFIG_GOLDEN;
1699                 break;
1700         case CHIP_FIJI:
1701                 adev->gfx.config.max_shader_engines = 4;
1702                 adev->gfx.config.max_tile_pipes = 16;
1703                 adev->gfx.config.max_cu_per_sh = 16;
1704                 adev->gfx.config.max_sh_per_se = 1;
1705                 adev->gfx.config.max_backends_per_se = 4;
1706                 adev->gfx.config.max_texture_channel_caches = 16;
1707                 adev->gfx.config.max_gprs = 256;
1708                 adev->gfx.config.max_gs_threads = 32;
1709                 adev->gfx.config.max_hw_contexts = 8;
1710
1711                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1712                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1713                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1714                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1715                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1716                 break;
1717         case CHIP_POLARIS11:
1718         case CHIP_POLARIS12:
1719                 ret = amdgpu_atombios_get_gfx_info(adev);
1720                 if (ret)
1721                         return ret;
1722                 adev->gfx.config.max_gprs = 256;
1723                 adev->gfx.config.max_gs_threads = 32;
1724                 adev->gfx.config.max_hw_contexts = 8;
1725
1726                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1727                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1728                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1729                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1730                 gb_addr_config = POLARIS11_GB_ADDR_CONFIG_GOLDEN;
1731                 break;
1732         case CHIP_POLARIS10:
1733         case CHIP_VEGAM:
1734                 ret = amdgpu_atombios_get_gfx_info(adev);
1735                 if (ret)
1736                         return ret;
1737                 adev->gfx.config.max_gprs = 256;
1738                 adev->gfx.config.max_gs_threads = 32;
1739                 adev->gfx.config.max_hw_contexts = 8;
1740
1741                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1742                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1743                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1744                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1745                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1746                 break;
1747         case CHIP_TONGA:
1748                 adev->gfx.config.max_shader_engines = 4;
1749                 adev->gfx.config.max_tile_pipes = 8;
1750                 adev->gfx.config.max_cu_per_sh = 8;
1751                 adev->gfx.config.max_sh_per_se = 1;
1752                 adev->gfx.config.max_backends_per_se = 2;
1753                 adev->gfx.config.max_texture_channel_caches = 8;
1754                 adev->gfx.config.max_gprs = 256;
1755                 adev->gfx.config.max_gs_threads = 32;
1756                 adev->gfx.config.max_hw_contexts = 8;
1757
1758                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1759                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1760                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1761                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1762                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1763                 break;
1764         case CHIP_CARRIZO:
1765                 adev->gfx.config.max_shader_engines = 1;
1766                 adev->gfx.config.max_tile_pipes = 2;
1767                 adev->gfx.config.max_sh_per_se = 1;
1768                 adev->gfx.config.max_backends_per_se = 2;
1769                 adev->gfx.config.max_cu_per_sh = 8;
1770                 adev->gfx.config.max_texture_channel_caches = 2;
1771                 adev->gfx.config.max_gprs = 256;
1772                 adev->gfx.config.max_gs_threads = 32;
1773                 adev->gfx.config.max_hw_contexts = 8;
1774
1775                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1776                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1777                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1778                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1779                 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1780                 break;
1781         case CHIP_STONEY:
1782                 adev->gfx.config.max_shader_engines = 1;
1783                 adev->gfx.config.max_tile_pipes = 2;
1784                 adev->gfx.config.max_sh_per_se = 1;
1785                 adev->gfx.config.max_backends_per_se = 1;
1786                 adev->gfx.config.max_cu_per_sh = 3;
1787                 adev->gfx.config.max_texture_channel_caches = 2;
1788                 adev->gfx.config.max_gprs = 256;
1789                 adev->gfx.config.max_gs_threads = 16;
1790                 adev->gfx.config.max_hw_contexts = 8;
1791
1792                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1793                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1794                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1795                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1796                 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1797                 break;
1798         default:
1799                 adev->gfx.config.max_shader_engines = 2;
1800                 adev->gfx.config.max_tile_pipes = 4;
1801                 adev->gfx.config.max_cu_per_sh = 2;
1802                 adev->gfx.config.max_sh_per_se = 1;
1803                 adev->gfx.config.max_backends_per_se = 2;
1804                 adev->gfx.config.max_texture_channel_caches = 4;
1805                 adev->gfx.config.max_gprs = 256;
1806                 adev->gfx.config.max_gs_threads = 32;
1807                 adev->gfx.config.max_hw_contexts = 8;
1808
1809                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1810                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1811                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1812                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1813                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1814                 break;
1815         }
1816
1817         mc_shared_chmap = RREG32(mmMC_SHARED_CHMAP);
1818         adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG);
1819         mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg;
1820
1821         adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes;
1822         adev->gfx.config.mem_max_burst_length_bytes = 256;
1823         if (adev->flags & AMD_IS_APU) {
1824                 /* Get memory bank mapping mode. */
1825                 tmp = RREG32(mmMC_FUS_DRAM0_BANK_ADDR_MAPPING);
1826                 dimm00_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1827                 dimm01_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1828
1829                 tmp = RREG32(mmMC_FUS_DRAM1_BANK_ADDR_MAPPING);
1830                 dimm10_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1831                 dimm11_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1832
1833                 /* Validate settings in case only one DIMM installed. */
1834                 if ((dimm00_addr_map == 0) || (dimm00_addr_map == 3) || (dimm00_addr_map == 4) || (dimm00_addr_map > 12))
1835                         dimm00_addr_map = 0;
1836                 if ((dimm01_addr_map == 0) || (dimm01_addr_map == 3) || (dimm01_addr_map == 4) || (dimm01_addr_map > 12))
1837                         dimm01_addr_map = 0;
1838                 if ((dimm10_addr_map == 0) || (dimm10_addr_map == 3) || (dimm10_addr_map == 4) || (dimm10_addr_map > 12))
1839                         dimm10_addr_map = 0;
1840                 if ((dimm11_addr_map == 0) || (dimm11_addr_map == 3) || (dimm11_addr_map == 4) || (dimm11_addr_map > 12))
1841                         dimm11_addr_map = 0;
1842
1843                 /* If DIMM Addr map is 8GB, ROW size should be 2KB. Otherwise 1KB. */
1844                 /* If ROW size(DIMM1) != ROW size(DMIMM0), ROW size should be larger one. */
1845                 if ((dimm00_addr_map == 11) || (dimm01_addr_map == 11) || (dimm10_addr_map == 11) || (dimm11_addr_map == 11))
1846                         adev->gfx.config.mem_row_size_in_kb = 2;
1847                 else
1848                         adev->gfx.config.mem_row_size_in_kb = 1;
1849         } else {
1850                 tmp = REG_GET_FIELD(mc_arb_ramcfg, MC_ARB_RAMCFG, NOOFCOLS);
1851                 adev->gfx.config.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
1852                 if (adev->gfx.config.mem_row_size_in_kb > 4)
1853                         adev->gfx.config.mem_row_size_in_kb = 4;
1854         }
1855
1856         adev->gfx.config.shader_engine_tile_size = 32;
1857         adev->gfx.config.num_gpus = 1;
1858         adev->gfx.config.multi_gpu_tile_size = 64;
1859
1860         /* fix up row size */
1861         switch (adev->gfx.config.mem_row_size_in_kb) {
1862         case 1:
1863         default:
1864                 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 0);
1865                 break;
1866         case 2:
1867                 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 1);
1868                 break;
1869         case 4:
1870                 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 2);
1871                 break;
1872         }
1873         adev->gfx.config.gb_addr_config = gb_addr_config;
1874
1875         return 0;
1876 }
1877
1878 static int gfx_v8_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
1879                                         int mec, int pipe, int queue)
1880 {
1881         int r;
1882         unsigned irq_type;
1883         struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
1884
1885         ring = &adev->gfx.compute_ring[ring_id];
1886
1887         /* mec0 is me1 */
1888         ring->me = mec + 1;
1889         ring->pipe = pipe;
1890         ring->queue = queue;
1891
1892         ring->ring_obj = NULL;
1893         ring->use_doorbell = true;
1894         ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + ring_id;
1895         ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
1896                                 + (ring_id * GFX8_MEC_HPD_SIZE);
1897         sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
1898
1899         irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
1900                 + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
1901                 + ring->pipe;
1902
1903         /* type-2 packets are deprecated on MEC, use type-3 instead */
1904         r = amdgpu_ring_init(adev, ring, 1024,
1905                         &adev->gfx.eop_irq, irq_type);
1906         if (r)
1907                 return r;
1908
1909
1910         return 0;
1911 }
1912
1913 static void gfx_v8_0_sq_irq_work_func(struct work_struct *work);
1914
1915 static int gfx_v8_0_sw_init(void *handle)
1916 {
1917         int i, j, k, r, ring_id;
1918         struct amdgpu_ring *ring;
1919         struct amdgpu_kiq *kiq;
1920         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1921
1922         switch (adev->asic_type) {
1923         case CHIP_TONGA:
1924         case CHIP_CARRIZO:
1925         case CHIP_FIJI:
1926         case CHIP_POLARIS10:
1927         case CHIP_POLARIS11:
1928         case CHIP_POLARIS12:
1929         case CHIP_VEGAM:
1930                 adev->gfx.mec.num_mec = 2;
1931                 break;
1932         case CHIP_TOPAZ:
1933         case CHIP_STONEY:
1934         default:
1935                 adev->gfx.mec.num_mec = 1;
1936                 break;
1937         }
1938
1939         adev->gfx.mec.num_pipe_per_mec = 4;
1940         adev->gfx.mec.num_queue_per_pipe = 8;
1941
1942         /* EOP Event */
1943         r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_END_OF_PIPE, &adev->gfx.eop_irq);
1944         if (r)
1945                 return r;
1946
1947         /* Privileged reg */
1948         r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_PRIV_REG_FAULT,
1949                               &adev->gfx.priv_reg_irq);
1950         if (r)
1951                 return r;
1952
1953         /* Privileged inst */
1954         r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_PRIV_INSTR_FAULT,
1955                               &adev->gfx.priv_inst_irq);
1956         if (r)
1957                 return r;
1958
1959         /* Add CP EDC/ECC irq  */
1960         r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_ECC_ERROR,
1961                               &adev->gfx.cp_ecc_error_irq);
1962         if (r)
1963                 return r;
1964
1965         /* SQ interrupts. */
1966         r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_SQ_INTERRUPT_MSG,
1967                               &adev->gfx.sq_irq);
1968         if (r) {
1969                 DRM_ERROR("amdgpu_irq_add() for SQ failed: %d\n", r);
1970                 return r;
1971         }
1972
1973         INIT_WORK(&adev->gfx.sq_work.work, gfx_v8_0_sq_irq_work_func);
1974
1975         adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
1976
1977         gfx_v8_0_scratch_init(adev);
1978
1979         r = gfx_v8_0_init_microcode(adev);
1980         if (r) {
1981                 DRM_ERROR("Failed to load gfx firmware!\n");
1982                 return r;
1983         }
1984
1985         r = adev->gfx.rlc.funcs->init(adev);
1986         if (r) {
1987                 DRM_ERROR("Failed to init rlc BOs!\n");
1988                 return r;
1989         }
1990
1991         r = gfx_v8_0_mec_init(adev);
1992         if (r) {
1993                 DRM_ERROR("Failed to init MEC BOs!\n");
1994                 return r;
1995         }
1996
1997         /* set up the gfx ring */
1998         for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
1999                 ring = &adev->gfx.gfx_ring[i];
2000                 ring->ring_obj = NULL;
2001                 sprintf(ring->name, "gfx");
2002                 /* no gfx doorbells on iceland */
2003                 if (adev->asic_type != CHIP_TOPAZ) {
2004                         ring->use_doorbell = true;
2005                         ring->doorbell_index = AMDGPU_DOORBELL_GFX_RING0;
2006                 }
2007
2008                 r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
2009                                      AMDGPU_CP_IRQ_GFX_EOP);
2010                 if (r)
2011                         return r;
2012         }
2013
2014
2015         /* set up the compute queues - allocate horizontally across pipes */
2016         ring_id = 0;
2017         for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
2018                 for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
2019                         for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
2020                                 if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, j))
2021                                         continue;
2022
2023                                 r = gfx_v8_0_compute_ring_init(adev,
2024                                                                 ring_id,
2025                                                                 i, k, j);
2026                                 if (r)
2027                                         return r;
2028
2029                                 ring_id++;
2030                         }
2031                 }
2032         }
2033
2034         r = amdgpu_gfx_kiq_init(adev, GFX8_MEC_HPD_SIZE);
2035         if (r) {
2036                 DRM_ERROR("Failed to init KIQ BOs!\n");
2037                 return r;
2038         }
2039
2040         kiq = &adev->gfx.kiq;
2041         r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
2042         if (r)
2043                 return r;
2044
2045         /* create MQD for all compute queues as well as KIQ for SRIOV case */
2046         r = amdgpu_gfx_compute_mqd_sw_init(adev, sizeof(struct vi_mqd_allocation));
2047         if (r)
2048                 return r;
2049
2050         adev->gfx.ce_ram_size = 0x8000;
2051
2052         r = gfx_v8_0_gpu_early_init(adev);
2053         if (r)
2054                 return r;
2055
2056         return 0;
2057 }
2058
2059 static int gfx_v8_0_sw_fini(void *handle)
2060 {
2061         int i;
2062         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2063
2064         amdgpu_bo_free_kernel(&adev->gds.oa_gfx_bo, NULL, NULL);
2065         amdgpu_bo_free_kernel(&adev->gds.gws_gfx_bo, NULL, NULL);
2066         amdgpu_bo_free_kernel(&adev->gds.gds_gfx_bo, NULL, NULL);
2067
2068         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2069                 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
2070         for (i = 0; i < adev->gfx.num_compute_rings; i++)
2071                 amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
2072
2073         amdgpu_gfx_compute_mqd_sw_fini(adev);
2074         amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq);
2075         amdgpu_gfx_kiq_fini(adev);
2076
2077         gfx_v8_0_mec_fini(adev);
2078         amdgpu_gfx_rlc_fini(adev);
2079         amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj,
2080                                 &adev->gfx.rlc.clear_state_gpu_addr,
2081                                 (void **)&adev->gfx.rlc.cs_ptr);
2082         if ((adev->asic_type == CHIP_CARRIZO) ||
2083             (adev->asic_type == CHIP_STONEY)) {
2084                 amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj,
2085                                 &adev->gfx.rlc.cp_table_gpu_addr,
2086                                 (void **)&adev->gfx.rlc.cp_table_ptr);
2087         }
2088         gfx_v8_0_free_microcode(adev);
2089
2090         return 0;
2091 }
2092
2093 static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev)
2094 {
2095         uint32_t *modearray, *mod2array;
2096         const u32 num_tile_mode_states = ARRAY_SIZE(adev->gfx.config.tile_mode_array);
2097         const u32 num_secondary_tile_mode_states = ARRAY_SIZE(adev->gfx.config.macrotile_mode_array);
2098         u32 reg_offset;
2099
2100         modearray = adev->gfx.config.tile_mode_array;
2101         mod2array = adev->gfx.config.macrotile_mode_array;
2102
2103         for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2104                 modearray[reg_offset] = 0;
2105
2106         for (reg_offset = 0; reg_offset <  num_secondary_tile_mode_states; reg_offset++)
2107                 mod2array[reg_offset] = 0;
2108
2109         switch (adev->asic_type) {
2110         case CHIP_TOPAZ:
2111                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2112                                 PIPE_CONFIG(ADDR_SURF_P2) |
2113                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2114                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2115                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2116                                 PIPE_CONFIG(ADDR_SURF_P2) |
2117                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2118                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2119                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2120                                 PIPE_CONFIG(ADDR_SURF_P2) |
2121                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2122                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2123                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2124                                 PIPE_CONFIG(ADDR_SURF_P2) |
2125                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2126                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2127                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2128                                 PIPE_CONFIG(ADDR_SURF_P2) |
2129                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2130                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2131                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2132                                 PIPE_CONFIG(ADDR_SURF_P2) |
2133                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2134                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2135                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2136                                 PIPE_CONFIG(ADDR_SURF_P2) |
2137                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2138                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2139                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2140                                 PIPE_CONFIG(ADDR_SURF_P2));
2141                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2142                                 PIPE_CONFIG(ADDR_SURF_P2) |
2143                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2144                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2145                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2146                                  PIPE_CONFIG(ADDR_SURF_P2) |
2147                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2148                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2149                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2150                                  PIPE_CONFIG(ADDR_SURF_P2) |
2151                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2152                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2153                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2154                                  PIPE_CONFIG(ADDR_SURF_P2) |
2155                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2156                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2157                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2158                                  PIPE_CONFIG(ADDR_SURF_P2) |
2159                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2160                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2161                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2162                                  PIPE_CONFIG(ADDR_SURF_P2) |
2163                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2164                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2165                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2166                                  PIPE_CONFIG(ADDR_SURF_P2) |
2167                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2168                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2169                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2170                                  PIPE_CONFIG(ADDR_SURF_P2) |
2171                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2172                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2173                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2174                                  PIPE_CONFIG(ADDR_SURF_P2) |
2175                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2176                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2177                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2178                                  PIPE_CONFIG(ADDR_SURF_P2) |
2179                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2180                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2181                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2182                                  PIPE_CONFIG(ADDR_SURF_P2) |
2183                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2184                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2185                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2186                                  PIPE_CONFIG(ADDR_SURF_P2) |
2187                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2188                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2189                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2190                                  PIPE_CONFIG(ADDR_SURF_P2) |
2191                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2192                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2193                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2194                                  PIPE_CONFIG(ADDR_SURF_P2) |
2195                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2196                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2197                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2198                                  PIPE_CONFIG(ADDR_SURF_P2) |
2199                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2200                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2201                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2202                                  PIPE_CONFIG(ADDR_SURF_P2) |
2203                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2204                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2205                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2206                                  PIPE_CONFIG(ADDR_SURF_P2) |
2207                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2208                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2209                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2210                                  PIPE_CONFIG(ADDR_SURF_P2) |
2211                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2212                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2213
2214                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2215                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2216                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2217                                 NUM_BANKS(ADDR_SURF_8_BANK));
2218                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2219                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2220                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2221                                 NUM_BANKS(ADDR_SURF_8_BANK));
2222                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2223                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2224                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2225                                 NUM_BANKS(ADDR_SURF_8_BANK));
2226                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2227                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2228                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2229                                 NUM_BANKS(ADDR_SURF_8_BANK));
2230                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2231                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2232                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2233                                 NUM_BANKS(ADDR_SURF_8_BANK));
2234                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2235                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2236                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2237                                 NUM_BANKS(ADDR_SURF_8_BANK));
2238                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2239                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2240                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2241                                 NUM_BANKS(ADDR_SURF_8_BANK));
2242                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2243                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2244                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2245                                 NUM_BANKS(ADDR_SURF_16_BANK));
2246                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2247                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2248                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2249                                 NUM_BANKS(ADDR_SURF_16_BANK));
2250                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2251                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2252                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2253                                  NUM_BANKS(ADDR_SURF_16_BANK));
2254                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2255                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2256                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2257                                  NUM_BANKS(ADDR_SURF_16_BANK));
2258                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2259                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2260                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2261                                  NUM_BANKS(ADDR_SURF_16_BANK));
2262                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2263                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2264                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2265                                  NUM_BANKS(ADDR_SURF_16_BANK));
2266                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2267                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2268                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2269                                  NUM_BANKS(ADDR_SURF_8_BANK));
2270
2271                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2272                         if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
2273                             reg_offset != 23)
2274                                 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2275
2276                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2277                         if (reg_offset != 7)
2278                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2279
2280                 break;
2281         case CHIP_FIJI:
2282         case CHIP_VEGAM:
2283                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2284                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2285                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2286                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2287                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2288                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2289                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2290                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2291                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2292                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2293                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2294                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2295                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2296                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2297                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2298                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2299                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2300                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2301                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2302                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2303                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2304                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2305                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2306                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2307                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2308                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2309                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2310                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2311                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2312                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2313                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2314                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2315                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2316                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2317                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2318                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2319                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2320                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2321                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2322                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2323                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2324                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2325                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2326                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2327                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2328                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2329                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2330                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2331                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2332                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2333                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2334                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2335                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2336                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2337                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2338                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2339                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2340                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2341                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2342                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2343                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2344                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2345                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2346                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2347                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2348                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2349                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2350                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2351                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2352                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2353                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2354                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2355                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2356                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2357                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2358                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2359                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2360                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2361                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2362                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2363                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2364                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2365                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2366                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2367                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2368                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2369                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2370                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2371                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2372                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2373                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2374                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2375                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2376                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2377                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2378                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2379                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2380                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2381                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2382                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2383                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2384                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2385                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2386                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2387                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2388                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2389                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2390                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2391                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2392                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2393                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2394                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2395                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2396                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2397                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2398                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2399                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2400                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2401                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2402                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2403                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2404                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2405
2406                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2407                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2408                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2409                                 NUM_BANKS(ADDR_SURF_8_BANK));
2410                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2411                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2412                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2413                                 NUM_BANKS(ADDR_SURF_8_BANK));
2414                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2415                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2416                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2417                                 NUM_BANKS(ADDR_SURF_8_BANK));
2418                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2419                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2420                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2421                                 NUM_BANKS(ADDR_SURF_8_BANK));
2422                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2423                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2424                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2425                                 NUM_BANKS(ADDR_SURF_8_BANK));
2426                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2427                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2428                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2429                                 NUM_BANKS(ADDR_SURF_8_BANK));
2430                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2431                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2432                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2433                                 NUM_BANKS(ADDR_SURF_8_BANK));
2434                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2435                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2436                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2437                                 NUM_BANKS(ADDR_SURF_8_BANK));
2438                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2439                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2440                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2441                                 NUM_BANKS(ADDR_SURF_8_BANK));
2442                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2443                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2444                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2445                                  NUM_BANKS(ADDR_SURF_8_BANK));
2446                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2447                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2448                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2449                                  NUM_BANKS(ADDR_SURF_8_BANK));
2450                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2451                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2452                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2453                                  NUM_BANKS(ADDR_SURF_8_BANK));
2454                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2455                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2456                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2457                                  NUM_BANKS(ADDR_SURF_8_BANK));
2458                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2459                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2460                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2461                                  NUM_BANKS(ADDR_SURF_4_BANK));
2462
2463                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2464                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2465
2466                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2467                         if (reg_offset != 7)
2468                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2469
2470                 break;
2471         case CHIP_TONGA:
2472                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2473                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2474                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2475                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2476                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2477                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2478                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2479                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2480                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2481                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2482                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2483                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2484                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2485                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2486                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2487                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2488                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2489                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2490                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2491                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2492                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2493                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2494                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2495                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2496                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2497                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2498                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2499                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2500                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2501                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2502                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2503                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2504                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2505                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2506                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2507                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2508                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2509                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2510                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2511                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2512                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2513                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2514                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2515                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2516                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2517                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2518                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2519                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2520                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2521                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2522                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2523                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2524                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2525                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2526                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2527                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2528                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2529                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2530                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2531                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2532                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2533                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2534                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2535                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2536                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2537                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2538                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2539                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2540                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2541                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2542                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2543                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2544                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2545                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2546                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2547                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2548                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2549                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2550                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2551                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2552                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2553                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2554                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2555                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2556                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2557                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2558                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2559                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2560                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2561                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2562                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2563                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2564                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2565                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2566                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2567                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2568                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2569                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2570                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2571                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2572                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2573                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2574                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2575                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2576                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2577                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2578                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2579                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2580                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2581                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2582                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2583                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2584                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2585                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2586                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2587                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2588                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2589                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2590                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2591                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2592                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2593                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2594
2595                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2596                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2597                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2598                                 NUM_BANKS(ADDR_SURF_16_BANK));
2599                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2600                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2601                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2602                                 NUM_BANKS(ADDR_SURF_16_BANK));
2603                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2604                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2605                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2606                                 NUM_BANKS(ADDR_SURF_16_BANK));
2607                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2608                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2609                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2610                                 NUM_BANKS(ADDR_SURF_16_BANK));
2611                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2612                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2613                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2614                                 NUM_BANKS(ADDR_SURF_16_BANK));
2615                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2616                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2617                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2618                                 NUM_BANKS(ADDR_SURF_16_BANK));
2619                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2620                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2621                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2622                                 NUM_BANKS(ADDR_SURF_16_BANK));
2623                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2624                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2625                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2626                                 NUM_BANKS(ADDR_SURF_16_BANK));
2627                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2628                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2629                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2630                                 NUM_BANKS(ADDR_SURF_16_BANK));
2631                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2632                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2633                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2634                                  NUM_BANKS(ADDR_SURF_16_BANK));
2635                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2636                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2637                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2638                                  NUM_BANKS(ADDR_SURF_16_BANK));
2639                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2640                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2641                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2642                                  NUM_BANKS(ADDR_SURF_8_BANK));
2643                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2644                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2645                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2646                                  NUM_BANKS(ADDR_SURF_4_BANK));
2647                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2648                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2649                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2650                                  NUM_BANKS(ADDR_SURF_4_BANK));
2651
2652                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2653                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2654
2655                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2656                         if (reg_offset != 7)
2657                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2658
2659                 break;
2660         case CHIP_POLARIS11:
2661         case CHIP_POLARIS12:
2662                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2663                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2664                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2665                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2666                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2667                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2668                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2669                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2670                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2671                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2672                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2673                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2674                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2675                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2676                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2677                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2678                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2679                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2680                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2681                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2682                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2683                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2684                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2685                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2686                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2687                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2688                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2689                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2690                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2691                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2692                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2693                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2694                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2695                                 PIPE_CONFIG(ADDR_SURF_P4_16x16));
2696                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2697                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2698                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2699                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2700                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2701                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2702                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2703                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2704                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2705                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2706                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2707                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2708                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2709                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2710                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2711                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2712                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2713                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2714                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2715                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2716                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2717                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2718                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2719                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2720                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2721                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2722                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2723                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2724                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2725                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2726                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2727                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2728                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2729                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2730                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2731                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2732                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2733                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2734                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2735                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2736                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2737                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2738                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2739                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2740                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2741                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2742                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2743                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2744                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2745                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2746                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2747                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2748                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2749                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2750                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2751                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2752                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2753                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2754                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2755                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2756                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2757                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2758                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2759                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2760                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2761                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2762                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2763                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2764                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2765                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2766                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2767                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2768                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2769                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2770                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2771                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2772                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2773                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2774                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2775                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2776                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2777                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2778                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2779                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2780                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2781                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2782                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2783                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2784
2785                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2786                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2787                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2788                                 NUM_BANKS(ADDR_SURF_16_BANK));
2789
2790                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2791                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2792                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2793                                 NUM_BANKS(ADDR_SURF_16_BANK));
2794
2795                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2796                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2797                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2798                                 NUM_BANKS(ADDR_SURF_16_BANK));
2799
2800                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2801                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2802                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2803                                 NUM_BANKS(ADDR_SURF_16_BANK));
2804
2805                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2806                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2807                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2808                                 NUM_BANKS(ADDR_SURF_16_BANK));
2809
2810                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2811                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2812                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2813                                 NUM_BANKS(ADDR_SURF_16_BANK));
2814
2815                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2816                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2817                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2818                                 NUM_BANKS(ADDR_SURF_16_BANK));
2819
2820                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2821                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2822                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2823                                 NUM_BANKS(ADDR_SURF_16_BANK));
2824
2825                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2826                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2827                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2828                                 NUM_BANKS(ADDR_SURF_16_BANK));
2829
2830                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2831                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2832                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2833                                 NUM_BANKS(ADDR_SURF_16_BANK));
2834
2835                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2836                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2837                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2838                                 NUM_BANKS(ADDR_SURF_16_BANK));
2839
2840                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2841                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2842                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2843                                 NUM_BANKS(ADDR_SURF_16_BANK));
2844
2845                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2846                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2847                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2848                                 NUM_BANKS(ADDR_SURF_8_BANK));
2849
2850                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2851                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2852                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2853                                 NUM_BANKS(ADDR_SURF_4_BANK));
2854
2855                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2856                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2857
2858                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2859                         if (reg_offset != 7)
2860                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2861
2862                 break;
2863         case CHIP_POLARIS10:
2864                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2865                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2866                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2867                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2868                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2869                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2870                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2871                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2872                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2873                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2874                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2875                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2876                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2877                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2878                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2879                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2880                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2881                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2882                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2883                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2884                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2885                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2886                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2887                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2888                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2889                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2890                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2891                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2892                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2893                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2894                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2895                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2896                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2897                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2898                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2899                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2900                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2901                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2902                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2903                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2904                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2905                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2906                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2907                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2908                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2909                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2910                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2911                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2912                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2913                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2914                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2915                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2916                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2917                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2918                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2919                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2920                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2921                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2922                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2923                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2924                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2925                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2926                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2927                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2928                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2929                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2930                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2931                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2932                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2933                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2934                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2935                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2936                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2937                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2938                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2939                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2940                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2941                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2942                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2943                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2944                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2945                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2946                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2947                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2948                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2949                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2950                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2951                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2952                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2953                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2954                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2955                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2956                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2957                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2958                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2959                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2960                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2961                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2962                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2963                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2964                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2965                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2966                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2967                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2968                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2969                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2970                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2971                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2972                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2973                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2974                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2975                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2976                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2977                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2978                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2979                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2980                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2981                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2982                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2983                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2984                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2985                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2986
2987                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2988                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2989                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2990                                 NUM_BANKS(ADDR_SURF_16_BANK));
2991
2992                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2993                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2994                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2995                                 NUM_BANKS(ADDR_SURF_16_BANK));
2996
2997                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2998                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2999                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3000                                 NUM_BANKS(ADDR_SURF_16_BANK));
3001
3002                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3003                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3004                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3005                                 NUM_BANKS(ADDR_SURF_16_BANK));
3006
3007                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3008                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3009                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3010                                 NUM_BANKS(ADDR_SURF_16_BANK));
3011
3012                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3013                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3014                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3015                                 NUM_BANKS(ADDR_SURF_16_BANK));
3016
3017                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3018                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3019                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3020                                 NUM_BANKS(ADDR_SURF_16_BANK));
3021
3022                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3023                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3024                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3025                                 NUM_BANKS(ADDR_SURF_16_BANK));
3026
3027                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3028                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3029                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3030                                 NUM_BANKS(ADDR_SURF_16_BANK));
3031
3032                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3033                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3034                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3035                                 NUM_BANKS(ADDR_SURF_16_BANK));
3036
3037                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3038                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3039                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3040                                 NUM_BANKS(ADDR_SURF_16_BANK));
3041
3042                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3043                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3044                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3045                                 NUM_BANKS(ADDR_SURF_8_BANK));
3046
3047                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3048                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3049                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3050                                 NUM_BANKS(ADDR_SURF_4_BANK));
3051
3052                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3053                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3054                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3055                                 NUM_BANKS(ADDR_SURF_4_BANK));
3056
3057                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3058                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3059
3060                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3061                         if (reg_offset != 7)
3062                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3063
3064                 break;
3065         case CHIP_STONEY:
3066                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3067                                 PIPE_CONFIG(ADDR_SURF_P2) |
3068                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3069                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3070                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3071                                 PIPE_CONFIG(ADDR_SURF_P2) |
3072                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3073                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3074                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3075                                 PIPE_CONFIG(ADDR_SURF_P2) |
3076                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3077                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3078                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3079                                 PIPE_CONFIG(ADDR_SURF_P2) |
3080                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3081                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3082                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3083                                 PIPE_CONFIG(ADDR_SURF_P2) |
3084                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3085                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3086                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3087                                 PIPE_CONFIG(ADDR_SURF_P2) |
3088                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3089                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3090                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3091                                 PIPE_CONFIG(ADDR_SURF_P2) |
3092                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3093                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3094                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3095                                 PIPE_CONFIG(ADDR_SURF_P2));
3096                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3097                                 PIPE_CONFIG(ADDR_SURF_P2) |
3098                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3099                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3100                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3101                                  PIPE_CONFIG(ADDR_SURF_P2) |
3102                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3103                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3104                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3105                                  PIPE_CONFIG(ADDR_SURF_P2) |
3106                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3107                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3108                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3109                                  PIPE_CONFIG(ADDR_SURF_P2) |
3110                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3111                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3112                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3113                                  PIPE_CONFIG(ADDR_SURF_P2) |
3114                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3115                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3116                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3117                                  PIPE_CONFIG(ADDR_SURF_P2) |
3118                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3119                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3120                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3121                                  PIPE_CONFIG(ADDR_SURF_P2) |
3122                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3123                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3124                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3125                                  PIPE_CONFIG(ADDR_SURF_P2) |
3126                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3127                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3128                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3129                                  PIPE_CONFIG(ADDR_SURF_P2) |
3130                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3131                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3132                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3133                                  PIPE_CONFIG(ADDR_SURF_P2) |
3134                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3135                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3136                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3137                                  PIPE_CONFIG(ADDR_SURF_P2) |
3138                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3139                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3140                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3141                                  PIPE_CONFIG(ADDR_SURF_P2) |
3142                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3143                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3144                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3145                                  PIPE_CONFIG(ADDR_SURF_P2) |
3146                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3147                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3148                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3149                                  PIPE_CONFIG(ADDR_SURF_P2) |
3150                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3151                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3152                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3153                                  PIPE_CONFIG(ADDR_SURF_P2) |
3154                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3155                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3156                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3157                                  PIPE_CONFIG(ADDR_SURF_P2) |
3158                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3159                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3160                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3161                                  PIPE_CONFIG(ADDR_SURF_P2) |
3162                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3163                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3164                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3165                                  PIPE_CONFIG(ADDR_SURF_P2) |
3166                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3167                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3168
3169                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3170                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3171                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3172                                 NUM_BANKS(ADDR_SURF_8_BANK));
3173                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3174                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3175                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3176                                 NUM_BANKS(ADDR_SURF_8_BANK));
3177                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3178                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3179                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3180                                 NUM_BANKS(ADDR_SURF_8_BANK));
3181                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3182                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3183                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3184                                 NUM_BANKS(ADDR_SURF_8_BANK));
3185                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3186                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3187                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3188                                 NUM_BANKS(ADDR_SURF_8_BANK));
3189                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3190                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3191                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3192                                 NUM_BANKS(ADDR_SURF_8_BANK));
3193                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3194                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3195                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3196                                 NUM_BANKS(ADDR_SURF_8_BANK));
3197                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3198                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3199                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3200                                 NUM_BANKS(ADDR_SURF_16_BANK));
3201                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3202                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3203                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3204                                 NUM_BANKS(ADDR_SURF_16_BANK));
3205                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3206                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3207                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3208                                  NUM_BANKS(ADDR_SURF_16_BANK));
3209                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3210                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3211                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3212                                  NUM_BANKS(ADDR_SURF_16_BANK));
3213                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3214                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3215                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3216                                  NUM_BANKS(ADDR_SURF_16_BANK));
3217                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3218                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3219                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3220                                  NUM_BANKS(ADDR_SURF_16_BANK));
3221                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3222                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3223                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3224                                  NUM_BANKS(ADDR_SURF_8_BANK));
3225
3226                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3227                         if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3228                             reg_offset != 23)
3229                                 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3230
3231                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3232                         if (reg_offset != 7)
3233                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3234
3235                 break;
3236         default:
3237                 dev_warn(adev->dev,
3238                          "Unknown chip type (%d) in function gfx_v8_0_tiling_mode_table_init() falling through to CHIP_CARRIZO\n",
3239                          adev->asic_type);
3240
3241         case CHIP_CARRIZO:
3242                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3243                                 PIPE_CONFIG(ADDR_SURF_P2) |
3244                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3245                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3246                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3247                                 PIPE_CONFIG(ADDR_SURF_P2) |
3248                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3249                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3250                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3251                                 PIPE_CONFIG(ADDR_SURF_P2) |
3252                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3253                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3254                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3255                                 PIPE_CONFIG(ADDR_SURF_P2) |
3256                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3257                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3258                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3259                                 PIPE_CONFIG(ADDR_SURF_P2) |
3260                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3261                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3262                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3263                                 PIPE_CONFIG(ADDR_SURF_P2) |
3264                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3265                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3266                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3267                                 PIPE_CONFIG(ADDR_SURF_P2) |
3268                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3269                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3270                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3271                                 PIPE_CONFIG(ADDR_SURF_P2));
3272                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3273                                 PIPE_CONFIG(ADDR_SURF_P2) |
3274                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3275                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3276                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3277                                  PIPE_CONFIG(ADDR_SURF_P2) |
3278                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3279                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3280                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3281                                  PIPE_CONFIG(ADDR_SURF_P2) |
3282                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3283                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3284                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3285                                  PIPE_CONFIG(ADDR_SURF_P2) |
3286                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3287                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3288                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3289                                  PIPE_CONFIG(ADDR_SURF_P2) |
3290                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3291                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3292                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3293                                  PIPE_CONFIG(ADDR_SURF_P2) |
3294                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3295                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3296                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3297                                  PIPE_CONFIG(ADDR_SURF_P2) |
3298                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3299                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3300                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3301                                  PIPE_CONFIG(ADDR_SURF_P2) |
3302                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3303                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3304                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3305                                  PIPE_CONFIG(ADDR_SURF_P2) |
3306                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3307                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3308                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3309                                  PIPE_CONFIG(ADDR_SURF_P2) |
3310                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3311                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3312                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3313                                  PIPE_CONFIG(ADDR_SURF_P2) |
3314                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3315                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3316                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3317                                  PIPE_CONFIG(ADDR_SURF_P2) |
3318                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3319                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3320                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3321                                  PIPE_CONFIG(ADDR_SURF_P2) |
3322                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3323                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3324                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3325                                  PIPE_CONFIG(ADDR_SURF_P2) |
3326                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3327                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3328                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3329                                  PIPE_CONFIG(ADDR_SURF_P2) |
3330                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3331                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3332                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3333                                  PIPE_CONFIG(ADDR_SURF_P2) |
3334                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3335                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3336                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3337                                  PIPE_CONFIG(ADDR_SURF_P2) |
3338                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3339                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3340                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3341                                  PIPE_CONFIG(ADDR_SURF_P2) |
3342                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3343                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3344
3345                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3346                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3347                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3348                                 NUM_BANKS(ADDR_SURF_8_BANK));
3349                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3350                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3351                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3352                                 NUM_BANKS(ADDR_SURF_8_BANK));
3353                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3354                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3355                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3356                                 NUM_BANKS(ADDR_SURF_8_BANK));
3357                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3358                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3359                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3360                                 NUM_BANKS(ADDR_SURF_8_BANK));
3361                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3362                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3363                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3364                                 NUM_BANKS(ADDR_SURF_8_BANK));
3365                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3366                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3367                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3368                                 NUM_BANKS(ADDR_SURF_8_BANK));
3369                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3370                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3371                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3372                                 NUM_BANKS(ADDR_SURF_8_BANK));
3373                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3374                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3375                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3376                                 NUM_BANKS(ADDR_SURF_16_BANK));
3377                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3378                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3379                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3380                                 NUM_BANKS(ADDR_SURF_16_BANK));
3381                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3382                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3383                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3384                                  NUM_BANKS(ADDR_SURF_16_BANK));
3385                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3386                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3387                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3388                                  NUM_BANKS(ADDR_SURF_16_BANK));
3389                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3390                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3391                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3392                                  NUM_BANKS(ADDR_SURF_16_BANK));
3393                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3394                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3395                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3396                                  NUM_BANKS(ADDR_SURF_16_BANK));
3397                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3398                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3399                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3400                                  NUM_BANKS(ADDR_SURF_8_BANK));
3401
3402                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3403                         if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3404                             reg_offset != 23)
3405                                 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3406
3407                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3408                         if (reg_offset != 7)
3409                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3410
3411                 break;
3412         }
3413 }
3414
3415 static void gfx_v8_0_select_se_sh(struct amdgpu_device *adev,
3416                                   u32 se_num, u32 sh_num, u32 instance)
3417 {
3418         u32 data;
3419
3420         if (instance == 0xffffffff)
3421                 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
3422         else
3423                 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
3424
3425         if (se_num == 0xffffffff)
3426                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
3427         else
3428                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
3429
3430         if (sh_num == 0xffffffff)
3431                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
3432         else
3433                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
3434
3435         WREG32(mmGRBM_GFX_INDEX, data);
3436 }
3437
3438 static void gfx_v8_0_select_me_pipe_q(struct amdgpu_device *adev,
3439                                   u32 me, u32 pipe, u32 q)
3440 {
3441         vi_srbm_select(adev, me, pipe, q, 0);
3442 }
3443
3444 static u32 gfx_v8_0_get_rb_active_bitmap(struct amdgpu_device *adev)
3445 {
3446         u32 data, mask;
3447
3448         data =  RREG32(mmCC_RB_BACKEND_DISABLE) |
3449                 RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3450
3451         data = REG_GET_FIELD(data, GC_USER_RB_BACKEND_DISABLE, BACKEND_DISABLE);
3452
3453         mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se /
3454                                          adev->gfx.config.max_sh_per_se);
3455
3456         return (~data) & mask;
3457 }
3458
3459 static void
3460 gfx_v8_0_raster_config(struct amdgpu_device *adev, u32 *rconf, u32 *rconf1)
3461 {
3462         switch (adev->asic_type) {
3463         case CHIP_FIJI:
3464         case CHIP_VEGAM:
3465                 *rconf |= RB_MAP_PKR0(2) | RB_MAP_PKR1(2) |
3466                           RB_XSEL2(1) | PKR_MAP(2) |
3467                           PKR_XSEL(1) | PKR_YSEL(1) |
3468                           SE_MAP(2) | SE_XSEL(2) | SE_YSEL(3);
3469                 *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(3) |
3470                            SE_PAIR_YSEL(2);
3471                 break;
3472         case CHIP_TONGA:
3473         case CHIP_POLARIS10:
3474                 *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3475                           SE_XSEL(1) | SE_YSEL(1);
3476                 *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(2) |
3477                            SE_PAIR_YSEL(2);
3478                 break;
3479         case CHIP_TOPAZ:
3480         case CHIP_CARRIZO:
3481                 *rconf |= RB_MAP_PKR0(2);
3482                 *rconf1 |= 0x0;
3483                 break;
3484         case CHIP_POLARIS11:
3485         case CHIP_POLARIS12:
3486                 *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3487                           SE_XSEL(1) | SE_YSEL(1);
3488                 *rconf1 |= 0x0;
3489                 break;
3490         case CHIP_STONEY:
3491                 *rconf |= 0x0;
3492                 *rconf1 |= 0x0;
3493                 break;
3494         default:
3495                 DRM_ERROR("unknown asic: 0x%x\n", adev->asic_type);
3496                 break;
3497         }
3498 }
3499
3500 static void
3501 gfx_v8_0_write_harvested_raster_configs(struct amdgpu_device *adev,
3502                                         u32 raster_config, u32 raster_config_1,
3503                                         unsigned rb_mask, unsigned num_rb)
3504 {
3505         unsigned sh_per_se = max_t(unsigned, adev->gfx.config.max_sh_per_se, 1);
3506         unsigned num_se = max_t(unsigned, adev->gfx.config.max_shader_engines, 1);
3507         unsigned rb_per_pkr = min_t(unsigned, num_rb / num_se / sh_per_se, 2);
3508         unsigned rb_per_se = num_rb / num_se;
3509         unsigned se_mask[4];
3510         unsigned se;
3511
3512         se_mask[0] = ((1 << rb_per_se) - 1) & rb_mask;
3513         se_mask[1] = (se_mask[0] << rb_per_se) & rb_mask;
3514         se_mask[2] = (se_mask[1] << rb_per_se) & rb_mask;
3515         se_mask[3] = (se_mask[2] << rb_per_se) & rb_mask;
3516
3517         WARN_ON(!(num_se == 1 || num_se == 2 || num_se == 4));
3518         WARN_ON(!(sh_per_se == 1 || sh_per_se == 2));
3519         WARN_ON(!(rb_per_pkr == 1 || rb_per_pkr == 2));
3520
3521         if ((num_se > 2) && ((!se_mask[0] && !se_mask[1]) ||
3522                              (!se_mask[2] && !se_mask[3]))) {
3523                 raster_config_1 &= ~SE_PAIR_MAP_MASK;
3524
3525                 if (!se_mask[0] && !se_mask[1]) {
3526                         raster_config_1 |=
3527                                 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_3);
3528                 } else {
3529                         raster_config_1 |=
3530                                 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_0);
3531                 }
3532         }
3533
3534         for (se = 0; se < num_se; se++) {
3535                 unsigned raster_config_se = raster_config;
3536                 unsigned pkr0_mask = ((1 << rb_per_pkr) - 1) << (se * rb_per_se);
3537                 unsigned pkr1_mask = pkr0_mask << rb_per_pkr;
3538                 int idx = (se / 2) * 2;
3539
3540                 if ((num_se > 1) && (!se_mask[idx] || !se_mask[idx + 1])) {
3541                         raster_config_se &= ~SE_MAP_MASK;
3542
3543                         if (!se_mask[idx]) {
3544                                 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_3);
3545                         } else {
3546                                 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_0);
3547                         }
3548                 }
3549
3550                 pkr0_mask &= rb_mask;
3551                 pkr1_mask &= rb_mask;
3552                 if (rb_per_se > 2 && (!pkr0_mask || !pkr1_mask)) {
3553                         raster_config_se &= ~PKR_MAP_MASK;
3554
3555                         if (!pkr0_mask) {
3556                                 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_3);
3557                         } else {
3558                                 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_0);
3559                         }
3560                 }
3561
3562                 if (rb_per_se >= 2) {
3563                         unsigned rb0_mask = 1 << (se * rb_per_se);
3564                         unsigned rb1_mask = rb0_mask << 1;
3565
3566                         rb0_mask &= rb_mask;
3567                         rb1_mask &= rb_mask;
3568                         if (!rb0_mask || !rb1_mask) {
3569                                 raster_config_se &= ~RB_MAP_PKR0_MASK;
3570
3571                                 if (!rb0_mask) {
3572                                         raster_config_se |=
3573                                                 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_3);
3574                                 } else {
3575                                         raster_config_se |=
3576                                                 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_0);
3577                                 }
3578                         }
3579
3580                         if (rb_per_se > 2) {
3581                                 rb0_mask = 1 << (se * rb_per_se + rb_per_pkr);
3582                                 rb1_mask = rb0_mask << 1;
3583                                 rb0_mask &= rb_mask;
3584                                 rb1_mask &= rb_mask;
3585                                 if (!rb0_mask || !rb1_mask) {
3586                                         raster_config_se &= ~RB_MAP_PKR1_MASK;
3587
3588                                         if (!rb0_mask) {
3589                                                 raster_config_se |=
3590                                                         RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_3);
3591                                         } else {
3592                                                 raster_config_se |=
3593                                                         RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_0);
3594                                         }
3595                                 }
3596                         }
3597                 }
3598
3599                 /* GRBM_GFX_INDEX has a different offset on VI */
3600                 gfx_v8_0_select_se_sh(adev, se, 0xffffffff, 0xffffffff);
3601                 WREG32(mmPA_SC_RASTER_CONFIG, raster_config_se);
3602                 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3603         }
3604
3605         /* GRBM_GFX_INDEX has a different offset on VI */
3606         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3607 }
3608
3609 static void gfx_v8_0_setup_rb(struct amdgpu_device *adev)
3610 {
3611         int i, j;
3612         u32 data;
3613         u32 raster_config = 0, raster_config_1 = 0;
3614         u32 active_rbs = 0;
3615         u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
3616                                         adev->gfx.config.max_sh_per_se;
3617         unsigned num_rb_pipes;
3618
3619         mutex_lock(&adev->grbm_idx_mutex);
3620         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3621                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3622                         gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3623                         data = gfx_v8_0_get_rb_active_bitmap(adev);
3624                         active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
3625                                                rb_bitmap_width_per_sh);
3626                 }
3627         }
3628         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3629
3630         adev->gfx.config.backend_enable_mask = active_rbs;
3631         adev->gfx.config.num_rbs = hweight32(active_rbs);
3632
3633         num_rb_pipes = min_t(unsigned, adev->gfx.config.max_backends_per_se *
3634                              adev->gfx.config.max_shader_engines, 16);
3635
3636         gfx_v8_0_raster_config(adev, &raster_config, &raster_config_1);
3637
3638         if (!adev->gfx.config.backend_enable_mask ||
3639                         adev->gfx.config.num_rbs >= num_rb_pipes) {
3640                 WREG32(mmPA_SC_RASTER_CONFIG, raster_config);
3641                 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3642         } else {
3643                 gfx_v8_0_write_harvested_raster_configs(adev, raster_config, raster_config_1,
3644                                                         adev->gfx.config.backend_enable_mask,
3645                                                         num_rb_pipes);
3646         }
3647
3648         /* cache the values for userspace */
3649         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3650                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3651                         gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3652                         adev->gfx.config.rb_config[i][j].rb_backend_disable =
3653                                 RREG32(mmCC_RB_BACKEND_DISABLE);
3654                         adev->gfx.config.rb_config[i][j].user_rb_backend_disable =
3655                                 RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3656                         adev->gfx.config.rb_config[i][j].raster_config =
3657                                 RREG32(mmPA_SC_RASTER_CONFIG);
3658                         adev->gfx.config.rb_config[i][j].raster_config_1 =
3659                                 RREG32(mmPA_SC_RASTER_CONFIG_1);
3660                 }
3661         }
3662         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3663         mutex_unlock(&adev->grbm_idx_mutex);
3664 }
3665
3666 /**
3667  * gfx_v8_0_init_compute_vmid - gart enable
3668  *
3669  * @adev: amdgpu_device pointer
3670  *
3671  * Initialize compute vmid sh_mem registers
3672  *
3673  */
3674 #define DEFAULT_SH_MEM_BASES    (0x6000)
3675 #define FIRST_COMPUTE_VMID      (8)
3676 #define LAST_COMPUTE_VMID       (16)
3677 static void gfx_v8_0_init_compute_vmid(struct amdgpu_device *adev)
3678 {
3679         int i;
3680         uint32_t sh_mem_config;
3681         uint32_t sh_mem_bases;
3682
3683         /*
3684          * Configure apertures:
3685          * LDS:         0x60000000'00000000 - 0x60000001'00000000 (4GB)
3686          * Scratch:     0x60000001'00000000 - 0x60000002'00000000 (4GB)
3687          * GPUVM:       0x60010000'00000000 - 0x60020000'00000000 (1TB)
3688          */
3689         sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
3690
3691         sh_mem_config = SH_MEM_ADDRESS_MODE_HSA64 <<
3692                         SH_MEM_CONFIG__ADDRESS_MODE__SHIFT |
3693                         SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
3694                         SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT |
3695                         MTYPE_CC << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT |
3696                         SH_MEM_CONFIG__PRIVATE_ATC_MASK;
3697
3698         mutex_lock(&adev->srbm_mutex);
3699         for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
3700                 vi_srbm_select(adev, 0, 0, 0, i);
3701                 /* CP and shaders */
3702                 WREG32(mmSH_MEM_CONFIG, sh_mem_config);
3703                 WREG32(mmSH_MEM_APE1_BASE, 1);
3704                 WREG32(mmSH_MEM_APE1_LIMIT, 0);
3705                 WREG32(mmSH_MEM_BASES, sh_mem_bases);
3706         }
3707         vi_srbm_select(adev, 0, 0, 0, 0);
3708         mutex_unlock(&adev->srbm_mutex);
3709 }
3710
3711 static void gfx_v8_0_config_init(struct amdgpu_device *adev)
3712 {
3713         switch (adev->asic_type) {
3714         default:
3715                 adev->gfx.config.double_offchip_lds_buf = 1;
3716                 break;
3717         case CHIP_CARRIZO:
3718         case CHIP_STONEY:
3719                 adev->gfx.config.double_offchip_lds_buf = 0;
3720                 break;
3721         }
3722 }
3723
3724 static void gfx_v8_0_constants_init(struct amdgpu_device *adev)
3725 {
3726         u32 tmp, sh_static_mem_cfg;
3727         int i;
3728
3729         WREG32_FIELD(GRBM_CNTL, READ_TIMEOUT, 0xFF);
3730         WREG32(mmGB_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3731         WREG32(mmHDP_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3732         WREG32(mmDMIF_ADDR_CALC, adev->gfx.config.gb_addr_config);
3733
3734         gfx_v8_0_tiling_mode_table_init(adev);
3735         gfx_v8_0_setup_rb(adev);
3736         gfx_v8_0_get_cu_info(adev);
3737         gfx_v8_0_config_init(adev);
3738
3739         /* XXX SH_MEM regs */
3740         /* where to put LDS, scratch, GPUVM in FSA64 space */
3741         sh_static_mem_cfg = REG_SET_FIELD(0, SH_STATIC_MEM_CONFIG,
3742                                    SWIZZLE_ENABLE, 1);
3743         sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG,
3744                                    ELEMENT_SIZE, 1);
3745         sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG,
3746                                    INDEX_STRIDE, 3);
3747         WREG32(mmSH_STATIC_MEM_CONFIG, sh_static_mem_cfg);
3748
3749         mutex_lock(&adev->srbm_mutex);
3750         for (i = 0; i < adev->vm_manager.id_mgr[0].num_ids; i++) {
3751                 vi_srbm_select(adev, 0, 0, 0, i);
3752                 /* CP and shaders */
3753                 if (i == 0) {
3754                         tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_UC);
3755                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
3756                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3757                                             SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3758                         WREG32(mmSH_MEM_CONFIG, tmp);
3759                         WREG32(mmSH_MEM_BASES, 0);
3760                 } else {
3761                         tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_NC);
3762                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
3763                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3764                                             SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3765                         WREG32(mmSH_MEM_CONFIG, tmp);
3766                         tmp = adev->gmc.shared_aperture_start >> 48;
3767                         WREG32(mmSH_MEM_BASES, tmp);
3768                 }
3769
3770                 WREG32(mmSH_MEM_APE1_BASE, 1);
3771                 WREG32(mmSH_MEM_APE1_LIMIT, 0);
3772         }
3773         vi_srbm_select(adev, 0, 0, 0, 0);
3774         mutex_unlock(&adev->srbm_mutex);
3775
3776         gfx_v8_0_init_compute_vmid(adev);
3777
3778         mutex_lock(&adev->grbm_idx_mutex);
3779         /*
3780          * making sure that the following register writes will be broadcasted
3781          * to all the shaders
3782          */
3783         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3784
3785         WREG32(mmPA_SC_FIFO_SIZE,
3786                    (adev->gfx.config.sc_prim_fifo_size_frontend <<
3787                         PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) |
3788                    (adev->gfx.config.sc_prim_fifo_size_backend <<
3789                         PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT) |
3790                    (adev->gfx.config.sc_hiz_tile_fifo_size <<
3791                         PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) |
3792                    (adev->gfx.config.sc_earlyz_tile_fifo_size <<
3793                         PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT));
3794
3795         tmp = RREG32(mmSPI_ARB_PRIORITY);
3796         tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS0, 2);
3797         tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS1, 2);
3798         tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS2, 2);
3799         tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS3, 2);
3800         WREG32(mmSPI_ARB_PRIORITY, tmp);
3801
3802         mutex_unlock(&adev->grbm_idx_mutex);
3803
3804 }
3805
3806 static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
3807 {
3808         u32 i, j, k;
3809         u32 mask;
3810
3811         mutex_lock(&adev->grbm_idx_mutex);
3812         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3813                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3814                         gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3815                         for (k = 0; k < adev->usec_timeout; k++) {
3816                                 if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0)
3817                                         break;
3818                                 udelay(1);
3819                         }
3820                         if (k == adev->usec_timeout) {
3821                                 gfx_v8_0_select_se_sh(adev, 0xffffffff,
3822                                                       0xffffffff, 0xffffffff);
3823                                 mutex_unlock(&adev->grbm_idx_mutex);
3824                                 DRM_INFO("Timeout wait for RLC serdes %u,%u\n",
3825                                          i, j);
3826                                 return;
3827                         }
3828                 }
3829         }
3830         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3831         mutex_unlock(&adev->grbm_idx_mutex);
3832
3833         mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
3834                 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
3835                 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
3836                 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
3837         for (k = 0; k < adev->usec_timeout; k++) {
3838                 if ((RREG32(mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
3839                         break;
3840                 udelay(1);
3841         }
3842 }
3843
3844 static void gfx_v8_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
3845                                                bool enable)
3846 {
3847         u32 tmp = RREG32(mmCP_INT_CNTL_RING0);
3848
3849         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
3850         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
3851         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
3852         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
3853
3854         WREG32(mmCP_INT_CNTL_RING0, tmp);
3855 }
3856
3857 static void gfx_v8_0_init_csb(struct amdgpu_device *adev)
3858 {
3859         /* csib */
3860         WREG32(mmRLC_CSIB_ADDR_HI,
3861                         adev->gfx.rlc.clear_state_gpu_addr >> 32);
3862         WREG32(mmRLC_CSIB_ADDR_LO,
3863                         adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
3864         WREG32(mmRLC_CSIB_LENGTH,
3865                         adev->gfx.rlc.clear_state_size);
3866 }
3867
3868 static void gfx_v8_0_parse_ind_reg_list(int *register_list_format,
3869                                 int ind_offset,
3870                                 int list_size,
3871                                 int *unique_indices,
3872                                 int *indices_count,
3873                                 int max_indices,
3874                                 int *ind_start_offsets,
3875                                 int *offset_count,
3876                                 int max_offset)
3877 {
3878         int indices;
3879         bool new_entry = true;
3880
3881         for (; ind_offset < list_size; ind_offset++) {
3882
3883                 if (new_entry) {
3884                         new_entry = false;
3885                         ind_start_offsets[*offset_count] = ind_offset;
3886                         *offset_count = *offset_count + 1;
3887                         BUG_ON(*offset_count >= max_offset);
3888                 }
3889
3890                 if (register_list_format[ind_offset] == 0xFFFFFFFF) {
3891                         new_entry = true;
3892                         continue;
3893                 }
3894
3895                 ind_offset += 2;
3896
3897                 /* look for the matching indice */
3898                 for (indices = 0;
3899                         indices < *indices_count;
3900                         indices++) {
3901                         if (unique_indices[indices] ==
3902                                 register_list_format[ind_offset])
3903                                 break;
3904                 }
3905
3906                 if (indices >= *indices_count) {
3907                         unique_indices[*indices_count] =
3908                                 register_list_format[ind_offset];
3909                         indices = *indices_count;
3910                         *indices_count = *indices_count + 1;
3911                         BUG_ON(*indices_count >= max_indices);
3912                 }
3913
3914                 register_list_format[ind_offset] = indices;
3915         }
3916 }
3917
3918 static int gfx_v8_0_init_save_restore_list(struct amdgpu_device *adev)
3919 {
3920         int i, temp, data;
3921         int unique_indices[] = {0, 0, 0, 0, 0, 0, 0, 0};
3922         int indices_count = 0;
3923         int indirect_start_offsets[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
3924         int offset_count = 0;
3925
3926         int list_size;
3927         unsigned int *register_list_format =
3928                 kmalloc(adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
3929         if (!register_list_format)
3930                 return -ENOMEM;
3931         memcpy(register_list_format, adev->gfx.rlc.register_list_format,
3932                         adev->gfx.rlc.reg_list_format_size_bytes);
3933
3934         gfx_v8_0_parse_ind_reg_list(register_list_format,
3935                                 RLC_FormatDirectRegListLength,
3936                                 adev->gfx.rlc.reg_list_format_size_bytes >> 2,
3937                                 unique_indices,
3938                                 &indices_count,
3939                                 ARRAY_SIZE(unique_indices),
3940                                 indirect_start_offsets,
3941                                 &offset_count,
3942                                 ARRAY_SIZE(indirect_start_offsets));
3943
3944         /* save and restore list */
3945         WREG32_FIELD(RLC_SRM_CNTL, AUTO_INCR_ADDR, 1);
3946
3947         WREG32(mmRLC_SRM_ARAM_ADDR, 0);
3948         for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
3949                 WREG32(mmRLC_SRM_ARAM_DATA, adev->gfx.rlc.register_restore[i]);
3950
3951         /* indirect list */
3952         WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_list_format_start);
3953         for (i = 0; i < adev->gfx.rlc.reg_list_format_size_bytes >> 2; i++)
3954                 WREG32(mmRLC_GPM_SCRATCH_DATA, register_list_format[i]);
3955
3956         list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
3957         list_size = list_size >> 1;
3958         WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_restore_list_size);
3959         WREG32(mmRLC_GPM_SCRATCH_DATA, list_size);
3960
3961         /* starting offsets starts */
3962         WREG32(mmRLC_GPM_SCRATCH_ADDR,
3963                 adev->gfx.rlc.starting_offsets_start);
3964         for (i = 0; i < ARRAY_SIZE(indirect_start_offsets); i++)
3965                 WREG32(mmRLC_GPM_SCRATCH_DATA,
3966                                 indirect_start_offsets[i]);
3967
3968         /* unique indices */
3969         temp = mmRLC_SRM_INDEX_CNTL_ADDR_0;
3970         data = mmRLC_SRM_INDEX_CNTL_DATA_0;
3971         for (i = 0; i < ARRAY_SIZE(unique_indices); i++) {
3972                 if (unique_indices[i] != 0) {
3973                         WREG32(temp + i, unique_indices[i] & 0x3FFFF);
3974                         WREG32(data + i, unique_indices[i] >> 20);
3975                 }
3976         }
3977         kfree(register_list_format);
3978
3979         return 0;
3980 }
3981
3982 static void gfx_v8_0_enable_save_restore_machine(struct amdgpu_device *adev)
3983 {
3984         WREG32_FIELD(RLC_SRM_CNTL, SRM_ENABLE, 1);
3985 }
3986
3987 static void gfx_v8_0_init_power_gating(struct amdgpu_device *adev)
3988 {
3989         uint32_t data;
3990
3991         WREG32_FIELD(CP_RB_WPTR_POLL_CNTL, IDLE_POLL_COUNT, 0x60);
3992
3993         data = REG_SET_FIELD(0, RLC_PG_DELAY, POWER_UP_DELAY, 0x10);
3994         data = REG_SET_FIELD(data, RLC_PG_DELAY, POWER_DOWN_DELAY, 0x10);
3995         data = REG_SET_FIELD(data, RLC_PG_DELAY, CMD_PROPAGATE_DELAY, 0x10);
3996         data = REG_SET_FIELD(data, RLC_PG_DELAY, MEM_SLEEP_DELAY, 0x10);
3997         WREG32(mmRLC_PG_DELAY, data);
3998
3999         WREG32_FIELD(RLC_PG_DELAY_2, SERDES_CMD_DELAY, 0x3);
4000         WREG32_FIELD(RLC_AUTO_PG_CTRL, GRBM_REG_SAVE_GFX_IDLE_THRESHOLD, 0x55f0);
4001
4002 }
4003
4004 static void cz_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
4005                                                 bool enable)
4006 {
4007         WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PU_ENABLE, enable ? 1 : 0);
4008 }
4009
4010 static void cz_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
4011                                                   bool enable)
4012 {
4013         WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PD_ENABLE, enable ? 1 : 0);
4014 }
4015
4016 static void cz_enable_cp_power_gating(struct amdgpu_device *adev, bool enable)
4017 {
4018         WREG32_FIELD(RLC_PG_CNTL, CP_PG_DISABLE, enable ? 0 : 1);
4019 }
4020
4021 static void gfx_v8_0_init_pg(struct amdgpu_device *adev)
4022 {
4023         if ((adev->asic_type == CHIP_CARRIZO) ||
4024             (adev->asic_type == CHIP_STONEY)) {
4025                 gfx_v8_0_init_csb(adev);
4026                 gfx_v8_0_init_save_restore_list(adev);
4027                 gfx_v8_0_enable_save_restore_machine(adev);
4028                 WREG32(mmRLC_JUMP_TABLE_RESTORE, adev->gfx.rlc.cp_table_gpu_addr >> 8);
4029                 gfx_v8_0_init_power_gating(adev);
4030                 WREG32(mmRLC_PG_ALWAYS_ON_CU_MASK, adev->gfx.cu_info.ao_cu_mask);
4031         } else if ((adev->asic_type == CHIP_POLARIS11) ||
4032                    (adev->asic_type == CHIP_POLARIS12) ||
4033                    (adev->asic_type == CHIP_VEGAM)) {
4034                 gfx_v8_0_init_csb(adev);
4035                 gfx_v8_0_init_save_restore_list(adev);
4036                 gfx_v8_0_enable_save_restore_machine(adev);
4037                 gfx_v8_0_init_power_gating(adev);
4038         }
4039
4040 }
4041
4042 static void gfx_v8_0_rlc_stop(struct amdgpu_device *adev)
4043 {
4044         WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 0);
4045
4046         gfx_v8_0_enable_gui_idle_interrupt(adev, false);
4047         gfx_v8_0_wait_for_rlc_serdes(adev);
4048 }
4049
4050 static void gfx_v8_0_rlc_reset(struct amdgpu_device *adev)
4051 {
4052         WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
4053         udelay(50);
4054
4055         WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
4056         udelay(50);
4057 }
4058
4059 static void gfx_v8_0_rlc_start(struct amdgpu_device *adev)
4060 {
4061         WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 1);
4062
4063         /* carrizo do enable cp interrupt after cp inited */
4064         if (!(adev->flags & AMD_IS_APU))
4065                 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
4066
4067         udelay(50);
4068 }
4069
4070 static int gfx_v8_0_rlc_resume(struct amdgpu_device *adev)
4071 {
4072         adev->gfx.rlc.funcs->stop(adev);
4073         adev->gfx.rlc.funcs->reset(adev);
4074         gfx_v8_0_init_pg(adev);
4075         adev->gfx.rlc.funcs->start(adev);
4076
4077         return 0;
4078 }
4079
4080 static void gfx_v8_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
4081 {
4082         int i;
4083         u32 tmp = RREG32(mmCP_ME_CNTL);
4084
4085         if (enable) {
4086                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 0);
4087                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 0);
4088                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 0);
4089         } else {
4090                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 1);
4091                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 1);
4092                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 1);
4093                 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
4094                         adev->gfx.gfx_ring[i].sched.ready = false;
4095         }
4096         WREG32(mmCP_ME_CNTL, tmp);
4097         udelay(50);
4098 }
4099
4100 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev)
4101 {
4102         u32 count = 0;
4103         const struct cs_section_def *sect = NULL;
4104         const struct cs_extent_def *ext = NULL;
4105
4106         /* begin clear state */
4107         count += 2;
4108         /* context control state */
4109         count += 3;
4110
4111         for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4112                 for (ext = sect->section; ext->extent != NULL; ++ext) {
4113                         if (sect->id == SECT_CONTEXT)
4114                                 count += 2 + ext->reg_count;
4115                         else
4116                                 return 0;
4117                 }
4118         }
4119         /* pa_sc_raster_config/pa_sc_raster_config1 */
4120         count += 4;
4121         /* end clear state */
4122         count += 2;
4123         /* clear state */
4124         count += 2;
4125
4126         return count;
4127 }
4128
4129 static int gfx_v8_0_cp_gfx_start(struct amdgpu_device *adev)
4130 {
4131         struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
4132         const struct cs_section_def *sect = NULL;
4133         const struct cs_extent_def *ext = NULL;
4134         int r, i;
4135
4136         /* init the CP */
4137         WREG32(mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
4138         WREG32(mmCP_ENDIAN_SWAP, 0);
4139         WREG32(mmCP_DEVICE_ID, 1);
4140
4141         gfx_v8_0_cp_gfx_enable(adev, true);
4142
4143         r = amdgpu_ring_alloc(ring, gfx_v8_0_get_csb_size(adev) + 4);
4144         if (r) {
4145                 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
4146                 return r;
4147         }
4148
4149         /* clear state buffer */
4150         amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4151         amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
4152
4153         amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4154         amdgpu_ring_write(ring, 0x80000000);
4155         amdgpu_ring_write(ring, 0x80000000);
4156
4157         for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4158                 for (ext = sect->section; ext->extent != NULL; ++ext) {
4159                         if (sect->id == SECT_CONTEXT) {
4160                                 amdgpu_ring_write(ring,
4161                                        PACKET3(PACKET3_SET_CONTEXT_REG,
4162                                                ext->reg_count));
4163                                 amdgpu_ring_write(ring,
4164                                        ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
4165                                 for (i = 0; i < ext->reg_count; i++)
4166                                         amdgpu_ring_write(ring, ext->extent[i]);
4167                         }
4168                 }
4169         }
4170
4171         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
4172         amdgpu_ring_write(ring, mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
4173         amdgpu_ring_write(ring, adev->gfx.config.rb_config[0][0].raster_config);
4174         amdgpu_ring_write(ring, adev->gfx.config.rb_config[0][0].raster_config_1);
4175
4176         amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4177         amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
4178
4179         amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
4180         amdgpu_ring_write(ring, 0);
4181
4182         /* init the CE partitions */
4183         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
4184         amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
4185         amdgpu_ring_write(ring, 0x8000);
4186         amdgpu_ring_write(ring, 0x8000);
4187
4188         amdgpu_ring_commit(ring);
4189
4190         return 0;
4191 }
4192 static void gfx_v8_0_set_cpg_door_bell(struct amdgpu_device *adev, struct amdgpu_ring *ring)
4193 {
4194         u32 tmp;
4195         /* no gfx doorbells on iceland */
4196         if (adev->asic_type == CHIP_TOPAZ)
4197                 return;
4198
4199         tmp = RREG32(mmCP_RB_DOORBELL_CONTROL);
4200
4201         if (ring->use_doorbell) {
4202                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4203                                 DOORBELL_OFFSET, ring->doorbell_index);
4204                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4205                                                 DOORBELL_HIT, 0);
4206                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4207                                             DOORBELL_EN, 1);
4208         } else {
4209                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0);
4210         }
4211
4212         WREG32(mmCP_RB_DOORBELL_CONTROL, tmp);
4213
4214         if (adev->flags & AMD_IS_APU)
4215                 return;
4216
4217         tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
4218                                         DOORBELL_RANGE_LOWER,
4219                                         AMDGPU_DOORBELL_GFX_RING0);
4220         WREG32(mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
4221
4222         WREG32(mmCP_RB_DOORBELL_RANGE_UPPER,
4223                 CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
4224 }
4225
4226 static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev)
4227 {
4228         struct amdgpu_ring *ring;
4229         u32 tmp;
4230         u32 rb_bufsz;
4231         u64 rb_addr, rptr_addr, wptr_gpu_addr;
4232         int r;
4233
4234         /* Set the write pointer delay */
4235         WREG32(mmCP_RB_WPTR_DELAY, 0);
4236
4237         /* set the RB to use vmid 0 */
4238         WREG32(mmCP_RB_VMID, 0);
4239
4240         /* Set ring buffer size */
4241         ring = &adev->gfx.gfx_ring[0];
4242         rb_bufsz = order_base_2(ring->ring_size / 8);
4243         tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
4244         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
4245         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MTYPE, 3);
4246         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MIN_IB_AVAILSZ, 1);
4247 #ifdef __BIG_ENDIAN
4248         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
4249 #endif
4250         WREG32(mmCP_RB0_CNTL, tmp);
4251
4252         /* Initialize the ring buffer's read and write pointers */
4253         WREG32(mmCP_RB0_CNTL, tmp | CP_RB0_CNTL__RB_RPTR_WR_ENA_MASK);
4254         ring->wptr = 0;
4255         WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
4256
4257         /* set the wb address wether it's enabled or not */
4258         rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4259         WREG32(mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
4260         WREG32(mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 0xFF);
4261
4262         wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4263         WREG32(mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr));
4264         WREG32(mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr));
4265         mdelay(1);
4266         WREG32(mmCP_RB0_CNTL, tmp);
4267
4268         rb_addr = ring->gpu_addr >> 8;
4269         WREG32(mmCP_RB0_BASE, rb_addr);
4270         WREG32(mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
4271
4272         gfx_v8_0_set_cpg_door_bell(adev, ring);
4273         /* start the ring */
4274         amdgpu_ring_clear_ring(ring);
4275         gfx_v8_0_cp_gfx_start(adev);
4276         ring->sched.ready = true;
4277         r = amdgpu_ring_test_helper(ring);
4278
4279         return r;
4280 }
4281
4282 static void gfx_v8_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
4283 {
4284         int i;
4285
4286         if (enable) {
4287                 WREG32(mmCP_MEC_CNTL, 0);
4288         } else {
4289                 WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
4290                 for (i = 0; i < adev->gfx.num_compute_rings; i++)
4291                         adev->gfx.compute_ring[i].sched.ready = false;
4292                 adev->gfx.kiq.ring.sched.ready = false;
4293         }
4294         udelay(50);
4295 }
4296
4297 /* KIQ functions */
4298 static void gfx_v8_0_kiq_setting(struct amdgpu_ring *ring)
4299 {
4300         uint32_t tmp;
4301         struct amdgpu_device *adev = ring->adev;
4302
4303         /* tell RLC which is KIQ queue */
4304         tmp = RREG32(mmRLC_CP_SCHEDULERS);
4305         tmp &= 0xffffff00;
4306         tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
4307         WREG32(mmRLC_CP_SCHEDULERS, tmp);
4308         tmp |= 0x80;
4309         WREG32(mmRLC_CP_SCHEDULERS, tmp);
4310 }
4311
4312 static int gfx_v8_0_kiq_kcq_enable(struct amdgpu_device *adev)
4313 {
4314         struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
4315         uint64_t queue_mask = 0;
4316         int r, i;
4317
4318         for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) {
4319                 if (!test_bit(i, adev->gfx.mec.queue_bitmap))
4320                         continue;
4321
4322                 /* This situation may be hit in the future if a new HW
4323                  * generation exposes more than 64 queues. If so, the
4324                  * definition of queue_mask needs updating */
4325                 if (WARN_ON(i >= (sizeof(queue_mask)*8))) {
4326                         DRM_ERROR("Invalid KCQ enabled: %d\n", i);
4327                         break;
4328                 }
4329
4330                 queue_mask |= (1ull << i);
4331         }
4332
4333         r = amdgpu_ring_alloc(kiq_ring, (8 * adev->gfx.num_compute_rings) + 8);
4334         if (r) {
4335                 DRM_ERROR("Failed to lock KIQ (%d).\n", r);
4336                 return r;
4337         }
4338         /* set resources */
4339         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
4340         amdgpu_ring_write(kiq_ring, 0); /* vmid_mask:0 queue_type:0 (KIQ) */
4341         amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask)); /* queue mask lo */
4342         amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask)); /* queue mask hi */
4343         amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */
4344         amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */
4345         amdgpu_ring_write(kiq_ring, 0); /* oac mask */
4346         amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */
4347         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4348                 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4349                 uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
4350                 uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4351
4352                 /* map queues */
4353                 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
4354                 /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
4355                 amdgpu_ring_write(kiq_ring,
4356                                   PACKET3_MAP_QUEUES_NUM_QUEUES(1));
4357                 amdgpu_ring_write(kiq_ring,
4358                                   PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index) |
4359                                   PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
4360                                   PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
4361                                   PACKET3_MAP_QUEUES_ME(ring->me == 1 ? 0 : 1)); /* doorbell */
4362                 amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
4363                 amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
4364                 amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
4365                 amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
4366         }
4367
4368         r = amdgpu_ring_test_helper(kiq_ring);
4369         if (r)
4370                 DRM_ERROR("KCQ enable failed\n");
4371         return r;
4372 }
4373
4374 static int gfx_v8_0_deactivate_hqd(struct amdgpu_device *adev, u32 req)
4375 {
4376         int i, r = 0;
4377
4378         if (RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK) {
4379                 WREG32_FIELD(CP_HQD_DEQUEUE_REQUEST, DEQUEUE_REQ, req);
4380                 for (i = 0; i < adev->usec_timeout; i++) {
4381                         if (!(RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK))
4382                                 break;
4383                         udelay(1);
4384                 }
4385                 if (i == adev->usec_timeout)
4386                         r = -ETIMEDOUT;
4387         }
4388         WREG32(mmCP_HQD_DEQUEUE_REQUEST, 0);
4389         WREG32(mmCP_HQD_PQ_RPTR, 0);
4390         WREG32(mmCP_HQD_PQ_WPTR, 0);
4391
4392         return r;
4393 }
4394
4395 static int gfx_v8_0_mqd_init(struct amdgpu_ring *ring)
4396 {
4397         struct amdgpu_device *adev = ring->adev;
4398         struct vi_mqd *mqd = ring->mqd_ptr;
4399         uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
4400         uint32_t tmp;
4401
4402         mqd->header = 0xC0310800;
4403         mqd->compute_pipelinestat_enable = 0x00000001;
4404         mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
4405         mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
4406         mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
4407         mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
4408         mqd->compute_misc_reserved = 0x00000003;
4409         mqd->dynamic_cu_mask_addr_lo = lower_32_bits(ring->mqd_gpu_addr
4410                                                      + offsetof(struct vi_mqd_allocation, dynamic_cu_mask));
4411         mqd->dynamic_cu_mask_addr_hi = upper_32_bits(ring->mqd_gpu_addr
4412                                                      + offsetof(struct vi_mqd_allocation, dynamic_cu_mask));
4413         eop_base_addr = ring->eop_gpu_addr >> 8;
4414         mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
4415         mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
4416
4417         /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4418         tmp = RREG32(mmCP_HQD_EOP_CONTROL);
4419         tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
4420                         (order_base_2(GFX8_MEC_HPD_SIZE / 4) - 1));
4421
4422         mqd->cp_hqd_eop_control = tmp;
4423
4424         /* enable doorbell? */
4425         tmp = REG_SET_FIELD(RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL),
4426                             CP_HQD_PQ_DOORBELL_CONTROL,
4427                             DOORBELL_EN,
4428                             ring->use_doorbell ? 1 : 0);
4429
4430         mqd->cp_hqd_pq_doorbell_control = tmp;
4431
4432         /* set the pointer to the MQD */
4433         mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
4434         mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
4435
4436         /* set MQD vmid to 0 */
4437         tmp = RREG32(mmCP_MQD_CONTROL);
4438         tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
4439         mqd->cp_mqd_control = tmp;
4440
4441         /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4442         hqd_gpu_addr = ring->gpu_addr >> 8;
4443         mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
4444         mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4445
4446         /* set up the HQD, this is similar to CP_RB0_CNTL */
4447         tmp = RREG32(mmCP_HQD_PQ_CONTROL);
4448         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
4449                             (order_base_2(ring->ring_size / 4) - 1));
4450         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
4451                         ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
4452 #ifdef __BIG_ENDIAN
4453         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
4454 #endif
4455         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
4456         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
4457         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
4458         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
4459         mqd->cp_hqd_pq_control = tmp;
4460
4461         /* set the wb address whether it's enabled or not */
4462         wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4463         mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
4464         mqd->cp_hqd_pq_rptr_report_addr_hi =
4465                 upper_32_bits(wb_gpu_addr) & 0xffff;
4466
4467         /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
4468         wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4469         mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
4470         mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4471
4472         tmp = 0;
4473         /* enable the doorbell if requested */
4474         if (ring->use_doorbell) {
4475                 tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
4476                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4477                                 DOORBELL_OFFSET, ring->doorbell_index);
4478
4479                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4480                                          DOORBELL_EN, 1);
4481                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4482                                          DOORBELL_SOURCE, 0);
4483                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4484                                          DOORBELL_HIT, 0);
4485         }
4486
4487         mqd->cp_hqd_pq_doorbell_control = tmp;
4488
4489         /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4490         ring->wptr = 0;
4491         mqd->cp_hqd_pq_wptr = ring->wptr;
4492         mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
4493
4494         /* set the vmid for the queue */
4495         mqd->cp_hqd_vmid = 0;
4496
4497         tmp = RREG32(mmCP_HQD_PERSISTENT_STATE);
4498         tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
4499         mqd->cp_hqd_persistent_state = tmp;
4500
4501         /* set MTYPE */
4502         tmp = RREG32(mmCP_HQD_IB_CONTROL);
4503         tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
4504         tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MTYPE, 3);
4505         mqd->cp_hqd_ib_control = tmp;
4506
4507         tmp = RREG32(mmCP_HQD_IQ_TIMER);
4508         tmp = REG_SET_FIELD(tmp, CP_HQD_IQ_TIMER, MTYPE, 3);
4509         mqd->cp_hqd_iq_timer = tmp;
4510
4511         tmp = RREG32(mmCP_HQD_CTX_SAVE_CONTROL);
4512         tmp = REG_SET_FIELD(tmp, CP_HQD_CTX_SAVE_CONTROL, MTYPE, 3);
4513         mqd->cp_hqd_ctx_save_control = tmp;
4514
4515         /* defaults */
4516         mqd->cp_hqd_eop_rptr = RREG32(mmCP_HQD_EOP_RPTR);
4517         mqd->cp_hqd_eop_wptr = RREG32(mmCP_HQD_EOP_WPTR);
4518         mqd->cp_hqd_pipe_priority = RREG32(mmCP_HQD_PIPE_PRIORITY);
4519         mqd->cp_hqd_queue_priority = RREG32(mmCP_HQD_QUEUE_PRIORITY);
4520         mqd->cp_hqd_quantum = RREG32(mmCP_HQD_QUANTUM);
4521         mqd->cp_hqd_ctx_save_base_addr_lo = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_LO);
4522         mqd->cp_hqd_ctx_save_base_addr_hi = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_HI);
4523         mqd->cp_hqd_cntl_stack_offset = RREG32(mmCP_HQD_CNTL_STACK_OFFSET);
4524         mqd->cp_hqd_cntl_stack_size = RREG32(mmCP_HQD_CNTL_STACK_SIZE);
4525         mqd->cp_hqd_wg_state_offset = RREG32(mmCP_HQD_WG_STATE_OFFSET);
4526         mqd->cp_hqd_ctx_save_size = RREG32(mmCP_HQD_CTX_SAVE_SIZE);
4527         mqd->cp_hqd_eop_done_events = RREG32(mmCP_HQD_EOP_EVENTS);
4528         mqd->cp_hqd_error = RREG32(mmCP_HQD_ERROR);
4529         mqd->cp_hqd_eop_wptr_mem = RREG32(mmCP_HQD_EOP_WPTR_MEM);
4530         mqd->cp_hqd_eop_dones = RREG32(mmCP_HQD_EOP_DONES);
4531
4532         /* activate the queue */
4533         mqd->cp_hqd_active = 1;
4534
4535         return 0;
4536 }
4537
4538 int gfx_v8_0_mqd_commit(struct amdgpu_device *adev,
4539                         struct vi_mqd *mqd)
4540 {
4541         uint32_t mqd_reg;
4542         uint32_t *mqd_data;
4543
4544         /* HQD registers extend from mmCP_MQD_BASE_ADDR to mmCP_HQD_ERROR */
4545         mqd_data = &mqd->cp_mqd_base_addr_lo;
4546
4547         /* disable wptr polling */
4548         WREG32_FIELD(CP_PQ_WPTR_POLL_CNTL, EN, 0);
4549
4550         /* program all HQD registers */
4551         for (mqd_reg = mmCP_HQD_VMID; mqd_reg <= mmCP_HQD_EOP_CONTROL; mqd_reg++)
4552                 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4553
4554         /* Tonga errata: EOP RPTR/WPTR should be left unmodified.
4555          * This is safe since EOP RPTR==WPTR for any inactive HQD
4556          * on ASICs that do not support context-save.
4557          * EOP writes/reads can start anywhere in the ring.
4558          */
4559         if (adev->asic_type != CHIP_TONGA) {
4560                 WREG32(mmCP_HQD_EOP_RPTR, mqd->cp_hqd_eop_rptr);
4561                 WREG32(mmCP_HQD_EOP_WPTR, mqd->cp_hqd_eop_wptr);
4562                 WREG32(mmCP_HQD_EOP_WPTR_MEM, mqd->cp_hqd_eop_wptr_mem);
4563         }
4564
4565         for (mqd_reg = mmCP_HQD_EOP_EVENTS; mqd_reg <= mmCP_HQD_ERROR; mqd_reg++)
4566                 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4567
4568         /* activate the HQD */
4569         for (mqd_reg = mmCP_MQD_BASE_ADDR; mqd_reg <= mmCP_HQD_ACTIVE; mqd_reg++)
4570                 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4571
4572         return 0;
4573 }
4574
4575 static int gfx_v8_0_kiq_init_queue(struct amdgpu_ring *ring)
4576 {
4577         struct amdgpu_device *adev = ring->adev;
4578         struct vi_mqd *mqd = ring->mqd_ptr;
4579         int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS;
4580
4581         gfx_v8_0_kiq_setting(ring);
4582
4583         if (adev->in_gpu_reset) { /* for GPU_RESET case */
4584                 /* reset MQD to a clean status */
4585                 if (adev->gfx.mec.mqd_backup[mqd_idx])
4586                         memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation));
4587
4588                 /* reset ring buffer */
4589                 ring->wptr = 0;
4590                 amdgpu_ring_clear_ring(ring);
4591                 mutex_lock(&adev->srbm_mutex);
4592                 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4593                 gfx_v8_0_mqd_commit(adev, mqd);
4594                 vi_srbm_select(adev, 0, 0, 0, 0);
4595                 mutex_unlock(&adev->srbm_mutex);
4596         } else {
4597                 memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation));
4598                 ((struct vi_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
4599                 ((struct vi_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
4600                 mutex_lock(&adev->srbm_mutex);
4601                 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4602                 gfx_v8_0_mqd_init(ring);
4603                 gfx_v8_0_mqd_commit(adev, mqd);
4604                 vi_srbm_select(adev, 0, 0, 0, 0);
4605                 mutex_unlock(&adev->srbm_mutex);
4606
4607                 if (adev->gfx.mec.mqd_backup[mqd_idx])
4608                         memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation));
4609         }
4610
4611         return 0;
4612 }
4613
4614 static int gfx_v8_0_kcq_init_queue(struct amdgpu_ring *ring)
4615 {
4616         struct amdgpu_device *adev = ring->adev;
4617         struct vi_mqd *mqd = ring->mqd_ptr;
4618         int mqd_idx = ring - &adev->gfx.compute_ring[0];
4619
4620         if (!adev->in_gpu_reset && !adev->in_suspend) {
4621                 memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation));
4622                 ((struct vi_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
4623                 ((struct vi_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
4624                 mutex_lock(&adev->srbm_mutex);
4625                 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4626                 gfx_v8_0_mqd_init(ring);
4627                 vi_srbm_select(adev, 0, 0, 0, 0);
4628                 mutex_unlock(&adev->srbm_mutex);
4629
4630                 if (adev->gfx.mec.mqd_backup[mqd_idx])
4631                         memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation));
4632         } else if (adev->in_gpu_reset) { /* for GPU_RESET case */
4633                 /* reset MQD to a clean status */
4634                 if (adev->gfx.mec.mqd_backup[mqd_idx])
4635                         memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation));
4636                 /* reset ring buffer */
4637                 ring->wptr = 0;
4638                 amdgpu_ring_clear_ring(ring);
4639         } else {
4640                 amdgpu_ring_clear_ring(ring);
4641         }
4642         return 0;
4643 }
4644
4645 static void gfx_v8_0_set_mec_doorbell_range(struct amdgpu_device *adev)
4646 {
4647         if (adev->asic_type > CHIP_TONGA) {
4648                 WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER, AMDGPU_DOORBELL_KIQ << 2);
4649                 WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER, AMDGPU_DOORBELL_MEC_RING7 << 2);
4650         }
4651         /* enable doorbells */
4652         WREG32_FIELD(CP_PQ_STATUS, DOORBELL_ENABLE, 1);
4653 }
4654
4655 static int gfx_v8_0_kiq_resume(struct amdgpu_device *adev)
4656 {
4657         struct amdgpu_ring *ring;
4658         int r;
4659
4660         ring = &adev->gfx.kiq.ring;
4661
4662         r = amdgpu_bo_reserve(ring->mqd_obj, false);
4663         if (unlikely(r != 0))
4664                 return r;
4665
4666         r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr);
4667         if (unlikely(r != 0))
4668                 return r;
4669
4670         gfx_v8_0_kiq_init_queue(ring);
4671         amdgpu_bo_kunmap(ring->mqd_obj);
4672         ring->mqd_ptr = NULL;
4673         amdgpu_bo_unreserve(ring->mqd_obj);
4674         ring->sched.ready = true;
4675         return 0;
4676 }
4677
4678 static int gfx_v8_0_kcq_resume(struct amdgpu_device *adev)
4679 {
4680         struct amdgpu_ring *ring = NULL;
4681         int r = 0, i;
4682
4683         gfx_v8_0_cp_compute_enable(adev, true);
4684
4685         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4686                 ring = &adev->gfx.compute_ring[i];
4687
4688                 r = amdgpu_bo_reserve(ring->mqd_obj, false);
4689                 if (unlikely(r != 0))
4690                         goto done;
4691                 r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr);
4692                 if (!r) {
4693                         r = gfx_v8_0_kcq_init_queue(ring);
4694                         amdgpu_bo_kunmap(ring->mqd_obj);
4695                         ring->mqd_ptr = NULL;
4696                 }
4697                 amdgpu_bo_unreserve(ring->mqd_obj);
4698                 if (r)
4699                         goto done;
4700         }
4701
4702         gfx_v8_0_set_mec_doorbell_range(adev);
4703
4704         r = gfx_v8_0_kiq_kcq_enable(adev);
4705         if (r)
4706                 goto done;
4707
4708         /* Test KCQs - reversing the order of rings seems to fix ring test failure
4709          * after GPU reset
4710          */
4711         for (i = adev->gfx.num_compute_rings - 1; i >= 0; i--) {
4712                 ring = &adev->gfx.compute_ring[i];
4713                 r = amdgpu_ring_test_helper(ring);
4714         }
4715
4716 done:
4717         return r;
4718 }
4719
4720 static int gfx_v8_0_cp_resume(struct amdgpu_device *adev)
4721 {
4722         int r;
4723
4724         if (!(adev->flags & AMD_IS_APU))
4725                 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
4726
4727         r = gfx_v8_0_kiq_resume(adev);
4728         if (r)
4729                 return r;
4730
4731         r = gfx_v8_0_cp_gfx_resume(adev);
4732         if (r)
4733                 return r;
4734
4735         r = gfx_v8_0_kcq_resume(adev);
4736         if (r)
4737                 return r;
4738         gfx_v8_0_enable_gui_idle_interrupt(adev, true);
4739
4740         return 0;
4741 }
4742
4743 static void gfx_v8_0_cp_enable(struct amdgpu_device *adev, bool enable)
4744 {
4745         gfx_v8_0_cp_gfx_enable(adev, enable);
4746         gfx_v8_0_cp_compute_enable(adev, enable);
4747 }
4748
4749 static int gfx_v8_0_hw_init(void *handle)
4750 {
4751         int r;
4752         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4753
4754         gfx_v8_0_init_golden_registers(adev);
4755         gfx_v8_0_constants_init(adev);
4756
4757         r = adev->gfx.rlc.funcs->resume(adev);
4758         if (r)
4759                 return r;
4760
4761         r = gfx_v8_0_cp_resume(adev);
4762
4763         return r;
4764 }
4765
4766 static int gfx_v8_0_kcq_disable(struct amdgpu_device *adev)
4767 {
4768         int r, i;
4769         struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
4770
4771         r = amdgpu_ring_alloc(kiq_ring, 6 * adev->gfx.num_compute_rings);
4772         if (r)
4773                 DRM_ERROR("Failed to lock KIQ (%d).\n", r);
4774
4775         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4776                 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4777
4778                 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
4779                 amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
4780                                                 PACKET3_UNMAP_QUEUES_ACTION(1) | /* RESET_QUEUES */
4781                                                 PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) |
4782                                                 PACKET3_UNMAP_QUEUES_ENGINE_SEL(0) |
4783                                                 PACKET3_UNMAP_QUEUES_NUM_QUEUES(1));
4784                 amdgpu_ring_write(kiq_ring, PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
4785                 amdgpu_ring_write(kiq_ring, 0);
4786                 amdgpu_ring_write(kiq_ring, 0);
4787                 amdgpu_ring_write(kiq_ring, 0);
4788         }
4789         r = amdgpu_ring_test_helper(kiq_ring);
4790         if (r)
4791                 DRM_ERROR("KCQ disable failed\n");
4792
4793         return r;
4794 }
4795
4796 static bool gfx_v8_0_is_idle(void *handle)
4797 {
4798         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4799
4800         if (REG_GET_FIELD(RREG32(mmGRBM_STATUS), GRBM_STATUS, GUI_ACTIVE)
4801                 || RREG32(mmGRBM_STATUS2) != 0x8)
4802                 return false;
4803         else
4804                 return true;
4805 }
4806
4807 static bool gfx_v8_0_rlc_is_idle(void *handle)
4808 {
4809         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4810
4811         if (RREG32(mmGRBM_STATUS2) != 0x8)
4812                 return false;
4813         else
4814                 return true;
4815 }
4816
4817 static int gfx_v8_0_wait_for_rlc_idle(void *handle)
4818 {
4819         unsigned int i;
4820         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4821
4822         for (i = 0; i < adev->usec_timeout; i++) {
4823                 if (gfx_v8_0_rlc_is_idle(handle))
4824                         return 0;
4825
4826                 udelay(1);
4827         }
4828         return -ETIMEDOUT;
4829 }
4830
4831 static int gfx_v8_0_wait_for_idle(void *handle)
4832 {
4833         unsigned int i;
4834         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4835
4836         for (i = 0; i < adev->usec_timeout; i++) {
4837                 if (gfx_v8_0_is_idle(handle))
4838                         return 0;
4839
4840                 udelay(1);
4841         }
4842         return -ETIMEDOUT;
4843 }
4844
4845 static int gfx_v8_0_hw_fini(void *handle)
4846 {
4847         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4848
4849         amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
4850         amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
4851
4852         amdgpu_irq_put(adev, &adev->gfx.cp_ecc_error_irq, 0);
4853
4854         amdgpu_irq_put(adev, &adev->gfx.sq_irq, 0);
4855
4856         /* disable KCQ to avoid CPC touch memory not valid anymore */
4857         gfx_v8_0_kcq_disable(adev);
4858
4859         if (amdgpu_sriov_vf(adev)) {
4860                 pr_debug("For SRIOV client, shouldn't do anything.\n");
4861                 return 0;
4862         }
4863         amdgpu_gfx_rlc_enter_safe_mode(adev);
4864         if (!gfx_v8_0_wait_for_idle(adev))
4865                 gfx_v8_0_cp_enable(adev, false);
4866         else
4867                 pr_err("cp is busy, skip halt cp\n");
4868         if (!gfx_v8_0_wait_for_rlc_idle(adev))
4869                 adev->gfx.rlc.funcs->stop(adev);
4870         else
4871                 pr_err("rlc is busy, skip halt rlc\n");
4872         amdgpu_gfx_rlc_exit_safe_mode(adev);
4873         return 0;
4874 }
4875
4876 static int gfx_v8_0_suspend(void *handle)
4877 {
4878         return gfx_v8_0_hw_fini(handle);
4879 }
4880
4881 static int gfx_v8_0_resume(void *handle)
4882 {
4883         return gfx_v8_0_hw_init(handle);
4884 }
4885
4886 static bool gfx_v8_0_check_soft_reset(void *handle)
4887 {
4888         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4889         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
4890         u32 tmp;
4891
4892         /* GRBM_STATUS */
4893         tmp = RREG32(mmGRBM_STATUS);
4894         if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
4895                    GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
4896                    GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
4897                    GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
4898                    GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
4899                    GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK |
4900                    GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
4901                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4902                                                 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
4903                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4904                                                 GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
4905                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
4906                                                 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
4907         }
4908
4909         /* GRBM_STATUS2 */
4910         tmp = RREG32(mmGRBM_STATUS2);
4911         if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
4912                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4913                                                 GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
4914
4915         if (REG_GET_FIELD(tmp, GRBM_STATUS2, CPF_BUSY) ||
4916             REG_GET_FIELD(tmp, GRBM_STATUS2, CPC_BUSY) ||
4917             REG_GET_FIELD(tmp, GRBM_STATUS2, CPG_BUSY)) {
4918                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
4919                                                 SOFT_RESET_CPF, 1);
4920                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
4921                                                 SOFT_RESET_CPC, 1);
4922                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
4923                                                 SOFT_RESET_CPG, 1);
4924                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET,
4925                                                 SOFT_RESET_GRBM, 1);
4926         }
4927
4928         /* SRBM_STATUS */
4929         tmp = RREG32(mmSRBM_STATUS);
4930         if (REG_GET_FIELD(tmp, SRBM_STATUS, GRBM_RQ_PENDING))
4931                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
4932                                                 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
4933         if (REG_GET_FIELD(tmp, SRBM_STATUS, SEM_BUSY))
4934                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
4935                                                 SRBM_SOFT_RESET, SOFT_RESET_SEM, 1);
4936
4937         if (grbm_soft_reset || srbm_soft_reset) {
4938                 adev->gfx.grbm_soft_reset = grbm_soft_reset;
4939                 adev->gfx.srbm_soft_reset = srbm_soft_reset;
4940                 return true;
4941         } else {
4942                 adev->gfx.grbm_soft_reset = 0;
4943                 adev->gfx.srbm_soft_reset = 0;
4944                 return false;
4945         }
4946 }
4947
4948 static int gfx_v8_0_pre_soft_reset(void *handle)
4949 {
4950         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4951         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
4952
4953         if ((!adev->gfx.grbm_soft_reset) &&
4954             (!adev->gfx.srbm_soft_reset))
4955                 return 0;
4956
4957         grbm_soft_reset = adev->gfx.grbm_soft_reset;
4958         srbm_soft_reset = adev->gfx.srbm_soft_reset;
4959
4960         /* stop the rlc */
4961         adev->gfx.rlc.funcs->stop(adev);
4962
4963         if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
4964             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
4965                 /* Disable GFX parsing/prefetching */
4966                 gfx_v8_0_cp_gfx_enable(adev, false);
4967
4968         if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
4969             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
4970             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
4971             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
4972                 int i;
4973
4974                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4975                         struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4976
4977                         mutex_lock(&adev->srbm_mutex);
4978                         vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4979                         gfx_v8_0_deactivate_hqd(adev, 2);
4980                         vi_srbm_select(adev, 0, 0, 0, 0);
4981                         mutex_unlock(&adev->srbm_mutex);
4982                 }
4983                 /* Disable MEC parsing/prefetching */
4984                 gfx_v8_0_cp_compute_enable(adev, false);
4985         }
4986
4987        return 0;
4988 }
4989
4990 static int gfx_v8_0_soft_reset(void *handle)
4991 {
4992         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4993         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
4994         u32 tmp;
4995
4996         if ((!adev->gfx.grbm_soft_reset) &&
4997             (!adev->gfx.srbm_soft_reset))
4998                 return 0;
4999
5000         grbm_soft_reset = adev->gfx.grbm_soft_reset;
5001         srbm_soft_reset = adev->gfx.srbm_soft_reset;
5002
5003         if (grbm_soft_reset || srbm_soft_reset) {
5004                 tmp = RREG32(mmGMCON_DEBUG);
5005                 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 1);
5006                 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 1);
5007                 WREG32(mmGMCON_DEBUG, tmp);
5008                 udelay(50);
5009         }
5010
5011         if (grbm_soft_reset) {
5012                 tmp = RREG32(mmGRBM_SOFT_RESET);
5013                 tmp |= grbm_soft_reset;
5014                 dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
5015                 WREG32(mmGRBM_SOFT_RESET, tmp);
5016                 tmp = RREG32(mmGRBM_SOFT_RESET);
5017
5018                 udelay(50);
5019
5020                 tmp &= ~grbm_soft_reset;
5021                 WREG32(mmGRBM_SOFT_RESET, tmp);
5022                 tmp = RREG32(mmGRBM_SOFT_RESET);
5023         }
5024
5025         if (srbm_soft_reset) {
5026                 tmp = RREG32(mmSRBM_SOFT_RESET);
5027                 tmp |= srbm_soft_reset;
5028                 dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
5029                 WREG32(mmSRBM_SOFT_RESET, tmp);
5030                 tmp = RREG32(mmSRBM_SOFT_RESET);
5031
5032                 udelay(50);
5033
5034                 tmp &= ~srbm_soft_reset;
5035                 WREG32(mmSRBM_SOFT_RESET, tmp);
5036                 tmp = RREG32(mmSRBM_SOFT_RESET);
5037         }
5038
5039         if (grbm_soft_reset || srbm_soft_reset) {
5040                 tmp = RREG32(mmGMCON_DEBUG);
5041                 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 0);
5042                 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 0);
5043                 WREG32(mmGMCON_DEBUG, tmp);
5044         }
5045
5046         /* Wait a little for things to settle down */
5047         udelay(50);
5048
5049         return 0;
5050 }
5051
5052 static int gfx_v8_0_post_soft_reset(void *handle)
5053 {
5054         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5055         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5056
5057         if ((!adev->gfx.grbm_soft_reset) &&
5058             (!adev->gfx.srbm_soft_reset))
5059                 return 0;
5060
5061         grbm_soft_reset = adev->gfx.grbm_soft_reset;
5062         srbm_soft_reset = adev->gfx.srbm_soft_reset;
5063
5064         if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5065             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
5066             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
5067             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
5068                 int i;
5069
5070                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5071                         struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5072
5073                         mutex_lock(&adev->srbm_mutex);
5074                         vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5075                         gfx_v8_0_deactivate_hqd(adev, 2);
5076                         vi_srbm_select(adev, 0, 0, 0, 0);
5077                         mutex_unlock(&adev->srbm_mutex);
5078                 }
5079                 gfx_v8_0_kiq_resume(adev);
5080                 gfx_v8_0_kcq_resume(adev);
5081         }
5082
5083         if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5084             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
5085                 gfx_v8_0_cp_gfx_resume(adev);
5086
5087         adev->gfx.rlc.funcs->start(adev);
5088
5089         return 0;
5090 }
5091
5092 /**
5093  * gfx_v8_0_get_gpu_clock_counter - return GPU clock counter snapshot
5094  *
5095  * @adev: amdgpu_device pointer
5096  *
5097  * Fetches a GPU clock counter snapshot.
5098  * Returns the 64 bit clock counter snapshot.
5099  */
5100 static uint64_t gfx_v8_0_get_gpu_clock_counter(struct amdgpu_device *adev)
5101 {
5102         uint64_t clock;
5103
5104         mutex_lock(&adev->gfx.gpu_clock_mutex);
5105         WREG32(mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
5106         clock = (uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_LSB) |
5107                 ((uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
5108         mutex_unlock(&adev->gfx.gpu_clock_mutex);
5109         return clock;
5110 }
5111
5112 static void gfx_v8_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
5113                                           uint32_t vmid,
5114                                           uint32_t gds_base, uint32_t gds_size,
5115                                           uint32_t gws_base, uint32_t gws_size,
5116                                           uint32_t oa_base, uint32_t oa_size)
5117 {
5118         /* GDS Base */
5119         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5120         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5121                                 WRITE_DATA_DST_SEL(0)));
5122         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_base);
5123         amdgpu_ring_write(ring, 0);
5124         amdgpu_ring_write(ring, gds_base);
5125
5126         /* GDS Size */
5127         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5128         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5129                                 WRITE_DATA_DST_SEL(0)));
5130         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_size);
5131         amdgpu_ring_write(ring, 0);
5132         amdgpu_ring_write(ring, gds_size);
5133
5134         /* GWS */
5135         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5136         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5137                                 WRITE_DATA_DST_SEL(0)));
5138         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].gws);
5139         amdgpu_ring_write(ring, 0);
5140         amdgpu_ring_write(ring, gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
5141
5142         /* OA */
5143         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5144         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5145                                 WRITE_DATA_DST_SEL(0)));
5146         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].oa);
5147         amdgpu_ring_write(ring, 0);
5148         amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << oa_base));
5149 }
5150
5151 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
5152 {
5153         WREG32(mmSQ_IND_INDEX,
5154                 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
5155                 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
5156                 (address << SQ_IND_INDEX__INDEX__SHIFT) |
5157                 (SQ_IND_INDEX__FORCE_READ_MASK));
5158         return RREG32(mmSQ_IND_DATA);
5159 }
5160
5161 static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
5162                            uint32_t wave, uint32_t thread,
5163                            uint32_t regno, uint32_t num, uint32_t *out)
5164 {
5165         WREG32(mmSQ_IND_INDEX,
5166                 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
5167                 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
5168                 (regno << SQ_IND_INDEX__INDEX__SHIFT) |
5169                 (thread << SQ_IND_INDEX__THREAD_ID__SHIFT) |
5170                 (SQ_IND_INDEX__FORCE_READ_MASK) |
5171                 (SQ_IND_INDEX__AUTO_INCR_MASK));
5172         while (num--)
5173                 *(out++) = RREG32(mmSQ_IND_DATA);
5174 }
5175
5176 static void gfx_v8_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
5177 {
5178         /* type 0 wave data */
5179         dst[(*no_fields)++] = 0;
5180         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS);
5181         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO);
5182         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI);
5183         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO);
5184         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI);
5185         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID);
5186         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0);
5187         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1);
5188         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC);
5189         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC);
5190         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS);
5191         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS);
5192         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_LO);
5193         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_HI);
5194         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_LO);
5195         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_HI);
5196         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0);
5197         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0);
5198 }
5199
5200 static void gfx_v8_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
5201                                      uint32_t wave, uint32_t start,
5202                                      uint32_t size, uint32_t *dst)
5203 {
5204         wave_read_regs(
5205                 adev, simd, wave, 0,
5206                 start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
5207 }
5208
5209
5210 static const struct amdgpu_gfx_funcs gfx_v8_0_gfx_funcs = {
5211         .get_gpu_clock_counter = &gfx_v8_0_get_gpu_clock_counter,
5212         .select_se_sh = &gfx_v8_0_select_se_sh,
5213         .read_wave_data = &gfx_v8_0_read_wave_data,
5214         .read_wave_sgprs = &gfx_v8_0_read_wave_sgprs,
5215         .select_me_pipe_q = &gfx_v8_0_select_me_pipe_q
5216 };
5217
5218 static int gfx_v8_0_early_init(void *handle)
5219 {
5220         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5221
5222         adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS;
5223         adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
5224         adev->gfx.funcs = &gfx_v8_0_gfx_funcs;
5225         gfx_v8_0_set_ring_funcs(adev);
5226         gfx_v8_0_set_irq_funcs(adev);
5227         gfx_v8_0_set_gds_init(adev);
5228         gfx_v8_0_set_rlc_funcs(adev);
5229
5230         return 0;
5231 }
5232
5233 static int gfx_v8_0_late_init(void *handle)
5234 {
5235         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5236         int r;
5237
5238         r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
5239         if (r)
5240                 return r;
5241
5242         r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
5243         if (r)
5244                 return r;
5245
5246         /* requires IBs so do in late init after IB pool is initialized */
5247         r = gfx_v8_0_do_edc_gpr_workarounds(adev);
5248         if (r)
5249                 return r;
5250
5251         r = amdgpu_irq_get(adev, &adev->gfx.cp_ecc_error_irq, 0);
5252         if (r) {
5253                 DRM_ERROR("amdgpu_irq_get() failed to get IRQ for EDC, r: %d.\n", r);
5254                 return r;
5255         }
5256
5257         r = amdgpu_irq_get(adev, &adev->gfx.sq_irq, 0);
5258         if (r) {
5259                 DRM_ERROR(
5260                         "amdgpu_irq_get() failed to get IRQ for SQ, r: %d.\n",
5261                         r);
5262                 return r;
5263         }
5264
5265         return 0;
5266 }
5267
5268 static void gfx_v8_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
5269                                                        bool enable)
5270 {
5271         if (((adev->asic_type == CHIP_POLARIS11) ||
5272             (adev->asic_type == CHIP_POLARIS12) ||
5273             (adev->asic_type == CHIP_VEGAM)) &&
5274             adev->powerplay.pp_funcs->set_powergating_by_smu)
5275                 /* Send msg to SMU via Powerplay */
5276                 amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, enable);
5277
5278         WREG32_FIELD(RLC_PG_CNTL, STATIC_PER_CU_PG_ENABLE, enable ? 1 : 0);
5279 }
5280
5281 static void gfx_v8_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
5282                                                         bool enable)
5283 {
5284         WREG32_FIELD(RLC_PG_CNTL, DYN_PER_CU_PG_ENABLE, enable ? 1 : 0);
5285 }
5286
5287 static void polaris11_enable_gfx_quick_mg_power_gating(struct amdgpu_device *adev,
5288                 bool enable)
5289 {
5290         WREG32_FIELD(RLC_PG_CNTL, QUICK_PG_ENABLE, enable ? 1 : 0);
5291 }
5292
5293 static void cz_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
5294                                           bool enable)
5295 {
5296         WREG32_FIELD(RLC_PG_CNTL, GFX_POWER_GATING_ENABLE, enable ? 1 : 0);
5297 }
5298
5299 static void cz_enable_gfx_pipeline_power_gating(struct amdgpu_device *adev,
5300                                                 bool enable)
5301 {
5302         WREG32_FIELD(RLC_PG_CNTL, GFX_PIPELINE_PG_ENABLE, enable ? 1 : 0);
5303
5304         /* Read any GFX register to wake up GFX. */
5305         if (!enable)
5306                 RREG32(mmDB_RENDER_CONTROL);
5307 }
5308
5309 static void cz_update_gfx_cg_power_gating(struct amdgpu_device *adev,
5310                                           bool enable)
5311 {
5312         if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
5313                 cz_enable_gfx_cg_power_gating(adev, true);
5314                 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
5315                         cz_enable_gfx_pipeline_power_gating(adev, true);
5316         } else {
5317                 cz_enable_gfx_cg_power_gating(adev, false);
5318                 cz_enable_gfx_pipeline_power_gating(adev, false);
5319         }
5320 }
5321
5322 static int gfx_v8_0_set_powergating_state(void *handle,
5323                                           enum amd_powergating_state state)
5324 {
5325         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5326         bool enable = (state == AMD_PG_STATE_GATE);
5327
5328         if (amdgpu_sriov_vf(adev))
5329                 return 0;
5330
5331         if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_SMG |
5332                                 AMD_PG_SUPPORT_RLC_SMU_HS |
5333                                 AMD_PG_SUPPORT_CP |
5334                                 AMD_PG_SUPPORT_GFX_DMG))
5335                 amdgpu_gfx_rlc_enter_safe_mode(adev);
5336         switch (adev->asic_type) {
5337         case CHIP_CARRIZO:
5338         case CHIP_STONEY:
5339
5340                 if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
5341                         cz_enable_sck_slow_down_on_power_up(adev, true);
5342                         cz_enable_sck_slow_down_on_power_down(adev, true);
5343                 } else {
5344                         cz_enable_sck_slow_down_on_power_up(adev, false);
5345                         cz_enable_sck_slow_down_on_power_down(adev, false);
5346                 }
5347                 if (adev->pg_flags & AMD_PG_SUPPORT_CP)
5348                         cz_enable_cp_power_gating(adev, true);
5349                 else
5350                         cz_enable_cp_power_gating(adev, false);
5351
5352                 cz_update_gfx_cg_power_gating(adev, enable);
5353
5354                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5355                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5356                 else
5357                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5358
5359                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5360                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5361                 else
5362                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5363                 break;
5364         case CHIP_POLARIS11:
5365         case CHIP_POLARIS12:
5366         case CHIP_VEGAM:
5367                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5368                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5369                 else
5370                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5371
5372                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5373                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5374                 else
5375                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5376
5377                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_QUICK_MG) && enable)
5378                         polaris11_enable_gfx_quick_mg_power_gating(adev, true);
5379                 else
5380                         polaris11_enable_gfx_quick_mg_power_gating(adev, false);
5381                 break;
5382         default:
5383                 break;
5384         }
5385         if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_SMG |
5386                                 AMD_PG_SUPPORT_RLC_SMU_HS |
5387                                 AMD_PG_SUPPORT_CP |
5388                                 AMD_PG_SUPPORT_GFX_DMG))
5389                 amdgpu_gfx_rlc_exit_safe_mode(adev);
5390         return 0;
5391 }
5392
5393 static void gfx_v8_0_get_clockgating_state(void *handle, u32 *flags)
5394 {
5395         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5396         int data;
5397
5398         if (amdgpu_sriov_vf(adev))
5399                 *flags = 0;
5400
5401         /* AMD_CG_SUPPORT_GFX_MGCG */
5402         data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5403         if (!(data & RLC_CGTT_MGCG_OVERRIDE__CPF_MASK))
5404                 *flags |= AMD_CG_SUPPORT_GFX_MGCG;
5405
5406         /* AMD_CG_SUPPORT_GFX_CGLG */
5407         data = RREG32(mmRLC_CGCG_CGLS_CTRL);
5408         if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
5409                 *flags |= AMD_CG_SUPPORT_GFX_CGCG;
5410
5411         /* AMD_CG_SUPPORT_GFX_CGLS */
5412         if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
5413                 *flags |= AMD_CG_SUPPORT_GFX_CGLS;
5414
5415         /* AMD_CG_SUPPORT_GFX_CGTS */
5416         data = RREG32(mmCGTS_SM_CTRL_REG);
5417         if (!(data & CGTS_SM_CTRL_REG__OVERRIDE_MASK))
5418                 *flags |= AMD_CG_SUPPORT_GFX_CGTS;
5419
5420         /* AMD_CG_SUPPORT_GFX_CGTS_LS */
5421         if (!(data & CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK))
5422                 *flags |= AMD_CG_SUPPORT_GFX_CGTS_LS;
5423
5424         /* AMD_CG_SUPPORT_GFX_RLC_LS */
5425         data = RREG32(mmRLC_MEM_SLP_CNTL);
5426         if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK)
5427                 *flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS;
5428
5429         /* AMD_CG_SUPPORT_GFX_CP_LS */
5430         data = RREG32(mmCP_MEM_SLP_CNTL);
5431         if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK)
5432                 *flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS;
5433 }
5434
5435 static void gfx_v8_0_send_serdes_cmd(struct amdgpu_device *adev,
5436                                      uint32_t reg_addr, uint32_t cmd)
5437 {
5438         uint32_t data;
5439
5440         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
5441
5442         WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5443         WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5444
5445         data = RREG32(mmRLC_SERDES_WR_CTRL);
5446         if (adev->asic_type == CHIP_STONEY)
5447                 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5448                           RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5449                           RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5450                           RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5451                           RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5452                           RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5453                           RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5454                           RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5455                           RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5456         else
5457                 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5458                           RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5459                           RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5460                           RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5461                           RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5462                           RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5463                           RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5464                           RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5465                           RLC_SERDES_WR_CTRL__BPM_DATA_MASK |
5466                           RLC_SERDES_WR_CTRL__REG_ADDR_MASK |
5467                           RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5468         data |= (RLC_SERDES_WR_CTRL__RSVD_BPM_ADDR_MASK |
5469                  (cmd << RLC_SERDES_WR_CTRL__BPM_DATA__SHIFT) |
5470                  (reg_addr << RLC_SERDES_WR_CTRL__REG_ADDR__SHIFT) |
5471                  (0xff << RLC_SERDES_WR_CTRL__BPM_ADDR__SHIFT));
5472
5473         WREG32(mmRLC_SERDES_WR_CTRL, data);
5474 }
5475
5476 #define MSG_ENTER_RLC_SAFE_MODE     1
5477 #define MSG_EXIT_RLC_SAFE_MODE      0
5478 #define RLC_GPR_REG2__REQ_MASK 0x00000001
5479 #define RLC_GPR_REG2__REQ__SHIFT 0
5480 #define RLC_GPR_REG2__MESSAGE__SHIFT 0x00000001
5481 #define RLC_GPR_REG2__MESSAGE_MASK 0x0000001e
5482
5483 static bool gfx_v8_0_is_rlc_enabled(struct amdgpu_device *adev)
5484 {
5485         uint32_t rlc_setting;
5486
5487         rlc_setting = RREG32(mmRLC_CNTL);
5488         if (!(rlc_setting & RLC_CNTL__RLC_ENABLE_F32_MASK))
5489                 return false;
5490
5491         return true;
5492 }
5493
5494 static void gfx_v8_0_set_safe_mode(struct amdgpu_device *adev)
5495 {
5496         uint32_t data;
5497         unsigned i;
5498         data = RREG32(mmRLC_CNTL);
5499         data |= RLC_SAFE_MODE__CMD_MASK;
5500         data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5501         data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
5502         WREG32(mmRLC_SAFE_MODE, data);
5503
5504         /* wait for RLC_SAFE_MODE */
5505         for (i = 0; i < adev->usec_timeout; i++) {
5506                 if ((RREG32(mmRLC_GPM_STAT) &
5507                      (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5508                       RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) ==
5509                     (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5510                      RLC_GPM_STAT__GFX_POWER_STATUS_MASK))
5511                         break;
5512                 udelay(1);
5513         }
5514         for (i = 0; i < adev->usec_timeout; i++) {
5515                 if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
5516                         break;
5517                 udelay(1);
5518         }
5519 }
5520
5521 static void gfx_v8_0_unset_safe_mode(struct amdgpu_device *adev)
5522 {
5523         uint32_t data;
5524         unsigned i;
5525
5526         data = RREG32(mmRLC_CNTL);
5527         data |= RLC_SAFE_MODE__CMD_MASK;
5528         data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5529         WREG32(mmRLC_SAFE_MODE, data);
5530
5531         for (i = 0; i < adev->usec_timeout; i++) {
5532                 if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
5533                         break;
5534                 udelay(1);
5535         }
5536 }
5537
5538 static const struct amdgpu_rlc_funcs iceland_rlc_funcs = {
5539         .is_rlc_enabled = gfx_v8_0_is_rlc_enabled,
5540         .set_safe_mode = gfx_v8_0_set_safe_mode,
5541         .unset_safe_mode = gfx_v8_0_unset_safe_mode,
5542         .init = gfx_v8_0_rlc_init,
5543         .get_csb_size = gfx_v8_0_get_csb_size,
5544         .get_csb_buffer = gfx_v8_0_get_csb_buffer,
5545         .get_cp_table_num = gfx_v8_0_cp_jump_table_num,
5546         .resume = gfx_v8_0_rlc_resume,
5547         .stop = gfx_v8_0_rlc_stop,
5548         .reset = gfx_v8_0_rlc_reset,
5549         .start = gfx_v8_0_rlc_start
5550 };
5551
5552 static void gfx_v8_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
5553                                                       bool enable)
5554 {
5555         uint32_t temp, data;
5556
5557         amdgpu_gfx_rlc_enter_safe_mode(adev);
5558
5559         /* It is disabled by HW by default */
5560         if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
5561                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5562                         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS)
5563                                 /* 1 - RLC memory Light sleep */
5564                                 WREG32_FIELD(RLC_MEM_SLP_CNTL, RLC_MEM_LS_EN, 1);
5565
5566                         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS)
5567                                 WREG32_FIELD(CP_MEM_SLP_CNTL, CP_MEM_LS_EN, 1);
5568                 }
5569
5570                 /* 3 - RLC_CGTT_MGCG_OVERRIDE */
5571                 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5572                 if (adev->flags & AMD_IS_APU)
5573                         data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5574                                   RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5575                                   RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK);
5576                 else
5577                         data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5578                                   RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5579                                   RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5580                                   RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5581
5582                 if (temp != data)
5583                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5584
5585                 /* 4 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5586                 gfx_v8_0_wait_for_rlc_serdes(adev);
5587
5588                 /* 5 - clear mgcg override */
5589                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5590
5591                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS) {
5592                         /* 6 - Enable CGTS(Tree Shade) MGCG /MGLS */
5593                         temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5594                         data &= ~(CGTS_SM_CTRL_REG__SM_MODE_MASK);
5595                         data |= (0x2 << CGTS_SM_CTRL_REG__SM_MODE__SHIFT);
5596                         data |= CGTS_SM_CTRL_REG__SM_MODE_ENABLE_MASK;
5597                         data &= ~CGTS_SM_CTRL_REG__OVERRIDE_MASK;
5598                         if ((adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) &&
5599                             (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS_LS))
5600                                 data &= ~CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK;
5601                         data |= CGTS_SM_CTRL_REG__ON_MONITOR_ADD_EN_MASK;
5602                         data |= (0x96 << CGTS_SM_CTRL_REG__ON_MONITOR_ADD__SHIFT);
5603                         if (temp != data)
5604                                 WREG32(mmCGTS_SM_CTRL_REG, data);
5605                 }
5606                 udelay(50);
5607
5608                 /* 7 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5609                 gfx_v8_0_wait_for_rlc_serdes(adev);
5610         } else {
5611                 /* 1 - MGCG_OVERRIDE[0] for CP and MGCG_OVERRIDE[1] for RLC */
5612                 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5613                 data |= (RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5614                                 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5615                                 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5616                                 RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5617                 if (temp != data)
5618                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5619
5620                 /* 2 - disable MGLS in RLC */
5621                 data = RREG32(mmRLC_MEM_SLP_CNTL);
5622                 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
5623                         data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
5624                         WREG32(mmRLC_MEM_SLP_CNTL, data);
5625                 }
5626
5627                 /* 3 - disable MGLS in CP */
5628                 data = RREG32(mmCP_MEM_SLP_CNTL);
5629                 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
5630                         data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
5631                         WREG32(mmCP_MEM_SLP_CNTL, data);
5632                 }
5633
5634                 /* 4 - Disable CGTS(Tree Shade) MGCG and MGLS */
5635                 temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5636                 data |= (CGTS_SM_CTRL_REG__OVERRIDE_MASK |
5637                                 CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK);
5638                 if (temp != data)
5639                         WREG32(mmCGTS_SM_CTRL_REG, data);
5640
5641                 /* 5 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5642                 gfx_v8_0_wait_for_rlc_serdes(adev);
5643
5644                 /* 6 - set mgcg override */
5645                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, SET_BPM_SERDES_CMD);
5646
5647                 udelay(50);
5648
5649                 /* 7- wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5650                 gfx_v8_0_wait_for_rlc_serdes(adev);
5651         }
5652
5653         amdgpu_gfx_rlc_exit_safe_mode(adev);
5654 }
5655
5656 static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
5657                                                       bool enable)
5658 {
5659         uint32_t temp, temp1, data, data1;
5660
5661         temp = data = RREG32(mmRLC_CGCG_CGLS_CTRL);
5662
5663         amdgpu_gfx_rlc_enter_safe_mode(adev);
5664
5665         if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
5666                 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5667                 data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK;
5668                 if (temp1 != data1)
5669                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5670
5671                 /* : wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5672                 gfx_v8_0_wait_for_rlc_serdes(adev);
5673
5674                 /* 2 - clear cgcg override */
5675                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5676
5677                 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5678                 gfx_v8_0_wait_for_rlc_serdes(adev);
5679
5680                 /* 3 - write cmd to set CGLS */
5681                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, SET_BPM_SERDES_CMD);
5682
5683                 /* 4 - enable cgcg */
5684                 data |= RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
5685
5686                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5687                         /* enable cgls*/
5688                         data |= RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5689
5690                         temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5691                         data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK;
5692
5693                         if (temp1 != data1)
5694                                 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5695                 } else {
5696                         data &= ~RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5697                 }
5698
5699                 if (temp != data)
5700                         WREG32(mmRLC_CGCG_CGLS_CTRL, data);
5701
5702                 /* 5 enable cntx_empty_int_enable/cntx_busy_int_enable/
5703                  * Cmp_busy/GFX_Idle interrupts
5704                  */
5705                 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
5706         } else {
5707                 /* disable cntx_empty_int_enable & GFX Idle interrupt */
5708                 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
5709
5710                 /* TEST CGCG */
5711                 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5712                 data1 |= (RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK |
5713                                 RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK);
5714                 if (temp1 != data1)
5715                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5716
5717                 /* read gfx register to wake up cgcg */
5718                 RREG32(mmCB_CGTT_SCLK_CTRL);
5719                 RREG32(mmCB_CGTT_SCLK_CTRL);
5720                 RREG32(mmCB_CGTT_SCLK_CTRL);
5721                 RREG32(mmCB_CGTT_SCLK_CTRL);
5722
5723                 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5724                 gfx_v8_0_wait_for_rlc_serdes(adev);
5725
5726                 /* write cmd to Set CGCG Overrride */
5727                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, SET_BPM_SERDES_CMD);
5728
5729                 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5730                 gfx_v8_0_wait_for_rlc_serdes(adev);
5731
5732                 /* write cmd to Clear CGLS */
5733                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, CLE_BPM_SERDES_CMD);
5734
5735                 /* disable cgcg, cgls should be disabled too. */
5736                 data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
5737                           RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
5738                 if (temp != data)
5739                         WREG32(mmRLC_CGCG_CGLS_CTRL, data);
5740                 /* enable interrupts again for PG */
5741                 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
5742         }
5743
5744         gfx_v8_0_wait_for_rlc_serdes(adev);
5745
5746         amdgpu_gfx_rlc_exit_safe_mode(adev);
5747 }
5748 static int gfx_v8_0_update_gfx_clock_gating(struct amdgpu_device *adev,
5749                                             bool enable)
5750 {
5751         if (enable) {
5752                 /* CGCG/CGLS should be enabled after MGCG/MGLS/TS(CG/LS)
5753                  * ===  MGCG + MGLS + TS(CG/LS) ===
5754                  */
5755                 gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
5756                 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
5757         } else {
5758                 /* CGCG/CGLS should be disabled before MGCG/MGLS/TS(CG/LS)
5759                  * ===  CGCG + CGLS ===
5760                  */
5761                 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
5762                 gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
5763         }
5764         return 0;
5765 }
5766
5767 static int gfx_v8_0_tonga_update_gfx_clock_gating(struct amdgpu_device *adev,
5768                                           enum amd_clockgating_state state)
5769 {
5770         uint32_t msg_id, pp_state = 0;
5771         uint32_t pp_support_state = 0;
5772
5773         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) {
5774                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5775                         pp_support_state = PP_STATE_SUPPORT_LS;
5776                         pp_state = PP_STATE_LS;
5777                 }
5778                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
5779                         pp_support_state |= PP_STATE_SUPPORT_CG;
5780                         pp_state |= PP_STATE_CG;
5781                 }
5782                 if (state == AMD_CG_STATE_UNGATE)
5783                         pp_state = 0;
5784
5785                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5786                                 PP_BLOCK_GFX_CG,
5787                                 pp_support_state,
5788                                 pp_state);
5789                 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
5790                         amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5791         }
5792
5793         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) {
5794                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5795                         pp_support_state = PP_STATE_SUPPORT_LS;
5796                         pp_state = PP_STATE_LS;
5797                 }
5798
5799                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
5800                         pp_support_state |= PP_STATE_SUPPORT_CG;
5801                         pp_state |= PP_STATE_CG;
5802                 }
5803
5804                 if (state == AMD_CG_STATE_UNGATE)
5805                         pp_state = 0;
5806
5807                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5808                                 PP_BLOCK_GFX_MG,
5809                                 pp_support_state,
5810                                 pp_state);
5811                 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
5812                         amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5813         }
5814
5815         return 0;
5816 }
5817
5818 static int gfx_v8_0_polaris_update_gfx_clock_gating(struct amdgpu_device *adev,
5819                                           enum amd_clockgating_state state)
5820 {
5821
5822         uint32_t msg_id, pp_state = 0;
5823         uint32_t pp_support_state = 0;
5824
5825         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) {
5826                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5827                         pp_support_state = PP_STATE_SUPPORT_LS;
5828                         pp_state = PP_STATE_LS;
5829                 }
5830                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
5831                         pp_support_state |= PP_STATE_SUPPORT_CG;
5832                         pp_state |= PP_STATE_CG;
5833                 }
5834                 if (state == AMD_CG_STATE_UNGATE)
5835                         pp_state = 0;
5836
5837                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5838                                 PP_BLOCK_GFX_CG,
5839                                 pp_support_state,
5840                                 pp_state);
5841                 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
5842                         amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5843         }
5844
5845         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_3D_CGCG | AMD_CG_SUPPORT_GFX_3D_CGLS)) {
5846                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) {
5847                         pp_support_state = PP_STATE_SUPPORT_LS;
5848                         pp_state = PP_STATE_LS;
5849                 }
5850                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG) {
5851                         pp_support_state |= PP_STATE_SUPPORT_CG;
5852                         pp_state |= PP_STATE_CG;
5853                 }
5854                 if (state == AMD_CG_STATE_UNGATE)
5855                         pp_state = 0;
5856
5857                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5858                                 PP_BLOCK_GFX_3D,
5859                                 pp_support_state,
5860                                 pp_state);
5861                 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
5862                         amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5863         }
5864
5865         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) {
5866                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5867                         pp_support_state = PP_STATE_SUPPORT_LS;
5868                         pp_state = PP_STATE_LS;
5869                 }
5870
5871                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
5872                         pp_support_state |= PP_STATE_SUPPORT_CG;
5873                         pp_state |= PP_STATE_CG;
5874                 }
5875
5876                 if (state == AMD_CG_STATE_UNGATE)
5877                         pp_state = 0;
5878
5879                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5880                                 PP_BLOCK_GFX_MG,
5881                                 pp_support_state,
5882                                 pp_state);
5883                 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
5884                         amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5885         }
5886
5887         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
5888                 pp_support_state = PP_STATE_SUPPORT_LS;
5889
5890                 if (state == AMD_CG_STATE_UNGATE)
5891                         pp_state = 0;
5892                 else
5893                         pp_state = PP_STATE_LS;
5894
5895                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5896                                 PP_BLOCK_GFX_RLC,
5897                                 pp_support_state,
5898                                 pp_state);
5899                 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
5900                         amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5901         }
5902
5903         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
5904                 pp_support_state = PP_STATE_SUPPORT_LS;
5905
5906                 if (state == AMD_CG_STATE_UNGATE)
5907                         pp_state = 0;
5908                 else
5909                         pp_state = PP_STATE_LS;
5910                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5911                         PP_BLOCK_GFX_CP,
5912                         pp_support_state,
5913                         pp_state);
5914                 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
5915                         amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5916         }
5917
5918         return 0;
5919 }
5920
5921 static int gfx_v8_0_set_clockgating_state(void *handle,
5922                                           enum amd_clockgating_state state)
5923 {
5924         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5925
5926         if (amdgpu_sriov_vf(adev))
5927                 return 0;
5928
5929         switch (adev->asic_type) {
5930         case CHIP_FIJI:
5931         case CHIP_CARRIZO:
5932         case CHIP_STONEY:
5933                 gfx_v8_0_update_gfx_clock_gating(adev,
5934                                                  state == AMD_CG_STATE_GATE);
5935                 break;
5936         case CHIP_TONGA:
5937                 gfx_v8_0_tonga_update_gfx_clock_gating(adev, state);
5938                 break;
5939         case CHIP_POLARIS10:
5940         case CHIP_POLARIS11:
5941         case CHIP_POLARIS12:
5942         case CHIP_VEGAM:
5943                 gfx_v8_0_polaris_update_gfx_clock_gating(adev, state);
5944                 break;
5945         default:
5946                 break;
5947         }
5948         return 0;
5949 }
5950
5951 static u64 gfx_v8_0_ring_get_rptr(struct amdgpu_ring *ring)
5952 {
5953         return ring->adev->wb.wb[ring->rptr_offs];
5954 }
5955
5956 static u64 gfx_v8_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
5957 {
5958         struct amdgpu_device *adev = ring->adev;
5959
5960         if (ring->use_doorbell)
5961                 /* XXX check if swapping is necessary on BE */
5962                 return ring->adev->wb.wb[ring->wptr_offs];
5963         else
5964                 return RREG32(mmCP_RB0_WPTR);
5965 }
5966
5967 static void gfx_v8_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
5968 {
5969         struct amdgpu_device *adev = ring->adev;
5970
5971         if (ring->use_doorbell) {
5972                 /* XXX check if swapping is necessary on BE */
5973                 adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
5974                 WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
5975         } else {
5976                 WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
5977                 (void)RREG32(mmCP_RB0_WPTR);
5978         }
5979 }
5980
5981 static void gfx_v8_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
5982 {
5983         u32 ref_and_mask, reg_mem_engine;
5984
5985         if ((ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) ||
5986             (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)) {
5987                 switch (ring->me) {
5988                 case 1:
5989                         ref_and_mask = GPU_HDP_FLUSH_DONE__CP2_MASK << ring->pipe;
5990                         break;
5991                 case 2:
5992                         ref_and_mask = GPU_HDP_FLUSH_DONE__CP6_MASK << ring->pipe;
5993                         break;
5994                 default:
5995                         return;
5996                 }
5997                 reg_mem_engine = 0;
5998         } else {
5999                 ref_and_mask = GPU_HDP_FLUSH_DONE__CP0_MASK;
6000                 reg_mem_engine = WAIT_REG_MEM_ENGINE(1); /* pfp */
6001         }
6002
6003         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6004         amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
6005                                  WAIT_REG_MEM_FUNCTION(3) |  /* == */
6006                                  reg_mem_engine));
6007         amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_REQ);
6008         amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_DONE);
6009         amdgpu_ring_write(ring, ref_and_mask);
6010         amdgpu_ring_write(ring, ref_and_mask);
6011         amdgpu_ring_write(ring, 0x20); /* poll interval */
6012 }
6013
6014 static void gfx_v8_0_ring_emit_vgt_flush(struct amdgpu_ring *ring)
6015 {
6016         amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
6017         amdgpu_ring_write(ring, EVENT_TYPE(VS_PARTIAL_FLUSH) |
6018                 EVENT_INDEX(4));
6019
6020         amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
6021         amdgpu_ring_write(ring, EVENT_TYPE(VGT_FLUSH) |
6022                 EVENT_INDEX(0));
6023 }
6024
6025 static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
6026                                         struct amdgpu_job *job,
6027                                         struct amdgpu_ib *ib,
6028                                         bool ctx_switch)
6029 {
6030         unsigned vmid = AMDGPU_JOB_GET_VMID(job);
6031         u32 header, control = 0;
6032
6033         if (ib->flags & AMDGPU_IB_FLAG_CE)
6034                 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
6035         else
6036                 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
6037
6038         control |= ib->length_dw | (vmid << 24);
6039
6040         if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) {
6041                 control |= INDIRECT_BUFFER_PRE_ENB(1);
6042
6043                 if (!(ib->flags & AMDGPU_IB_FLAG_CE))
6044                         gfx_v8_0_ring_emit_de_meta(ring);
6045         }
6046
6047         amdgpu_ring_write(ring, header);
6048         amdgpu_ring_write(ring,
6049 #ifdef __BIG_ENDIAN
6050                           (2 << 0) |
6051 #endif
6052                           (ib->gpu_addr & 0xFFFFFFFC));
6053         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6054         amdgpu_ring_write(ring, control);
6055 }
6056
6057 static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
6058                                           struct amdgpu_job *job,
6059                                           struct amdgpu_ib *ib,
6060                                           bool ctx_switch)
6061 {
6062         unsigned vmid = AMDGPU_JOB_GET_VMID(job);
6063         u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
6064
6065         amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
6066         amdgpu_ring_write(ring,
6067 #ifdef __BIG_ENDIAN
6068                                 (2 << 0) |
6069 #endif
6070                                 (ib->gpu_addr & 0xFFFFFFFC));
6071         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6072         amdgpu_ring_write(ring, control);
6073 }
6074
6075 static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr,
6076                                          u64 seq, unsigned flags)
6077 {
6078         bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6079         bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6080
6081         /* EVENT_WRITE_EOP - flush caches, send int */
6082         amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
6083         amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6084                                  EOP_TC_ACTION_EN |
6085                                  EOP_TC_WB_ACTION_EN |
6086                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6087                                  EVENT_INDEX(5)));
6088         amdgpu_ring_write(ring, addr & 0xfffffffc);
6089         amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
6090                           DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
6091         amdgpu_ring_write(ring, lower_32_bits(seq));
6092         amdgpu_ring_write(ring, upper_32_bits(seq));
6093
6094 }
6095
6096 static void gfx_v8_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
6097 {
6098         int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
6099         uint32_t seq = ring->fence_drv.sync_seq;
6100         uint64_t addr = ring->fence_drv.gpu_addr;
6101
6102         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6103         amdgpu_ring_write(ring, (WAIT_REG_MEM_MEM_SPACE(1) | /* memory */
6104                                  WAIT_REG_MEM_FUNCTION(3) | /* equal */
6105                                  WAIT_REG_MEM_ENGINE(usepfp))); /* pfp or me */
6106         amdgpu_ring_write(ring, addr & 0xfffffffc);
6107         amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
6108         amdgpu_ring_write(ring, seq);
6109         amdgpu_ring_write(ring, 0xffffffff);
6110         amdgpu_ring_write(ring, 4); /* poll interval */
6111 }
6112
6113 static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
6114                                         unsigned vmid, uint64_t pd_addr)
6115 {
6116         int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
6117
6118         amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
6119
6120         /* wait for the invalidate to complete */
6121         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6122         amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
6123                                  WAIT_REG_MEM_FUNCTION(0) |  /* always */
6124                                  WAIT_REG_MEM_ENGINE(0))); /* me */
6125         amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
6126         amdgpu_ring_write(ring, 0);
6127         amdgpu_ring_write(ring, 0); /* ref */
6128         amdgpu_ring_write(ring, 0); /* mask */
6129         amdgpu_ring_write(ring, 0x20); /* poll interval */
6130
6131         /* compute doesn't have PFP */
6132         if (usepfp) {
6133                 /* sync PFP to ME, otherwise we might get invalid PFP reads */
6134                 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
6135                 amdgpu_ring_write(ring, 0x0);
6136         }
6137 }
6138
6139 static u64 gfx_v8_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
6140 {
6141         return ring->adev->wb.wb[ring->wptr_offs];
6142 }
6143
6144 static void gfx_v8_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
6145 {
6146         struct amdgpu_device *adev = ring->adev;
6147
6148         /* XXX check if swapping is necessary on BE */
6149         adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
6150         WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
6151 }
6152
6153 static void gfx_v8_0_ring_set_pipe_percent(struct amdgpu_ring *ring,
6154                                            bool acquire)
6155 {
6156         struct amdgpu_device *adev = ring->adev;
6157         int pipe_num, tmp, reg;
6158         int pipe_percent = acquire ? SPI_WCL_PIPE_PERCENT_GFX__VALUE_MASK : 0x1;
6159
6160         pipe_num = ring->me * adev->gfx.mec.num_pipe_per_mec + ring->pipe;
6161
6162         /* first me only has 2 entries, GFX and HP3D */
6163         if (ring->me > 0)
6164                 pipe_num -= 2;
6165
6166         reg = mmSPI_WCL_PIPE_PERCENT_GFX + pipe_num;
6167         tmp = RREG32(reg);
6168         tmp = REG_SET_FIELD(tmp, SPI_WCL_PIPE_PERCENT_GFX, VALUE, pipe_percent);
6169         WREG32(reg, tmp);
6170 }
6171
6172 static void gfx_v8_0_pipe_reserve_resources(struct amdgpu_device *adev,
6173                                             struct amdgpu_ring *ring,
6174                                             bool acquire)
6175 {
6176         int i, pipe;
6177         bool reserve;
6178         struct amdgpu_ring *iring;
6179
6180         mutex_lock(&adev->gfx.pipe_reserve_mutex);
6181         pipe = amdgpu_gfx_queue_to_bit(adev, ring->me, ring->pipe, 0);
6182         if (acquire)
6183                 set_bit(pipe, adev->gfx.pipe_reserve_bitmap);
6184         else
6185                 clear_bit(pipe, adev->gfx.pipe_reserve_bitmap);
6186
6187         if (!bitmap_weight(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES)) {
6188                 /* Clear all reservations - everyone reacquires all resources */
6189                 for (i = 0; i < adev->gfx.num_gfx_rings; ++i)
6190                         gfx_v8_0_ring_set_pipe_percent(&adev->gfx.gfx_ring[i],
6191                                                        true);
6192
6193                 for (i = 0; i < adev->gfx.num_compute_rings; ++i)
6194                         gfx_v8_0_ring_set_pipe_percent(&adev->gfx.compute_ring[i],
6195                                                        true);
6196         } else {
6197                 /* Lower all pipes without a current reservation */
6198                 for (i = 0; i < adev->gfx.num_gfx_rings; ++i) {
6199                         iring = &adev->gfx.gfx_ring[i];
6200                         pipe = amdgpu_gfx_queue_to_bit(adev,
6201                                                        iring->me,
6202                                                        iring->pipe,
6203                                                        0);
6204                         reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap);
6205                         gfx_v8_0_ring_set_pipe_percent(iring, reserve);
6206                 }
6207
6208                 for (i = 0; i < adev->gfx.num_compute_rings; ++i) {
6209                         iring = &adev->gfx.compute_ring[i];
6210                         pipe = amdgpu_gfx_queue_to_bit(adev,
6211                                                        iring->me,
6212                                                        iring->pipe,
6213                                                        0);
6214                         reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap);
6215                         gfx_v8_0_ring_set_pipe_percent(iring, reserve);
6216                 }
6217         }
6218
6219         mutex_unlock(&adev->gfx.pipe_reserve_mutex);
6220 }
6221
6222 static void gfx_v8_0_hqd_set_priority(struct amdgpu_device *adev,
6223                                       struct amdgpu_ring *ring,
6224                                       bool acquire)
6225 {
6226         uint32_t pipe_priority = acquire ? 0x2 : 0x0;
6227         uint32_t queue_priority = acquire ? 0xf : 0x0;
6228
6229         mutex_lock(&adev->srbm_mutex);
6230         vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
6231
6232         WREG32(mmCP_HQD_PIPE_PRIORITY, pipe_priority);
6233         WREG32(mmCP_HQD_QUEUE_PRIORITY, queue_priority);
6234
6235         vi_srbm_select(adev, 0, 0, 0, 0);
6236         mutex_unlock(&adev->srbm_mutex);
6237 }
6238 static void gfx_v8_0_ring_set_priority_compute(struct amdgpu_ring *ring,
6239                                                enum drm_sched_priority priority)
6240 {
6241         struct amdgpu_device *adev = ring->adev;
6242         bool acquire = priority == DRM_SCHED_PRIORITY_HIGH_HW;
6243
6244         if (ring->funcs->type != AMDGPU_RING_TYPE_COMPUTE)
6245                 return;
6246
6247         gfx_v8_0_hqd_set_priority(adev, ring, acquire);
6248         gfx_v8_0_pipe_reserve_resources(adev, ring, acquire);
6249 }
6250
6251 static void gfx_v8_0_ring_emit_fence_compute(struct amdgpu_ring *ring,
6252                                              u64 addr, u64 seq,
6253                                              unsigned flags)
6254 {
6255         bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6256         bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6257
6258         /* RELEASE_MEM - flush caches, send int */
6259         amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
6260         amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6261                                  EOP_TC_ACTION_EN |
6262                                  EOP_TC_WB_ACTION_EN |
6263                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6264                                  EVENT_INDEX(5)));
6265         amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
6266         amdgpu_ring_write(ring, addr & 0xfffffffc);
6267         amdgpu_ring_write(ring, upper_32_bits(addr));
6268         amdgpu_ring_write(ring, lower_32_bits(seq));
6269         amdgpu_ring_write(ring, upper_32_bits(seq));
6270 }
6271
6272 static void gfx_v8_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
6273                                          u64 seq, unsigned int flags)
6274 {
6275         /* we only allocate 32bit for each seq wb address */
6276         BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
6277
6278         /* write fence seq to the "addr" */
6279         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6280         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6281                                  WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
6282         amdgpu_ring_write(ring, lower_32_bits(addr));
6283         amdgpu_ring_write(ring, upper_32_bits(addr));
6284         amdgpu_ring_write(ring, lower_32_bits(seq));
6285
6286         if (flags & AMDGPU_FENCE_FLAG_INT) {
6287                 /* set register to trigger INT */
6288                 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6289                 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6290                                          WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
6291                 amdgpu_ring_write(ring, mmCPC_INT_STATUS);
6292                 amdgpu_ring_write(ring, 0);
6293                 amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
6294         }
6295 }
6296
6297 static void gfx_v8_ring_emit_sb(struct amdgpu_ring *ring)
6298 {
6299         amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
6300         amdgpu_ring_write(ring, 0);
6301 }
6302
6303 static void gfx_v8_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
6304 {
6305         uint32_t dw2 = 0;
6306
6307         if (amdgpu_sriov_vf(ring->adev))
6308                 gfx_v8_0_ring_emit_ce_meta(ring);
6309
6310         dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
6311         if (flags & AMDGPU_HAVE_CTX_SWITCH) {
6312                 gfx_v8_0_ring_emit_vgt_flush(ring);
6313                 /* set load_global_config & load_global_uconfig */
6314                 dw2 |= 0x8001;
6315                 /* set load_cs_sh_regs */
6316                 dw2 |= 0x01000000;
6317                 /* set load_per_context_state & load_gfx_sh_regs for GFX */
6318                 dw2 |= 0x10002;
6319
6320                 /* set load_ce_ram if preamble presented */
6321                 if (AMDGPU_PREAMBLE_IB_PRESENT & flags)
6322                         dw2 |= 0x10000000;
6323         } else {
6324                 /* still load_ce_ram if this is the first time preamble presented
6325                  * although there is no context switch happens.
6326                  */
6327                 if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags)
6328                         dw2 |= 0x10000000;
6329         }
6330
6331         amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
6332         amdgpu_ring_write(ring, dw2);
6333         amdgpu_ring_write(ring, 0);
6334 }
6335
6336 static unsigned gfx_v8_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring)
6337 {
6338         unsigned ret;
6339
6340         amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
6341         amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
6342         amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
6343         amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */
6344         ret = ring->wptr & ring->buf_mask;
6345         amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */
6346         return ret;
6347 }
6348
6349 static void gfx_v8_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset)
6350 {
6351         unsigned cur;
6352
6353         BUG_ON(offset > ring->buf_mask);
6354         BUG_ON(ring->ring[offset] != 0x55aa55aa);
6355
6356         cur = (ring->wptr & ring->buf_mask) - 1;
6357         if (likely(cur > offset))
6358                 ring->ring[offset] = cur - offset;
6359         else
6360                 ring->ring[offset] = (ring->ring_size >> 2) - offset + cur;
6361 }
6362
6363 static void gfx_v8_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg)
6364 {
6365         struct amdgpu_device *adev = ring->adev;
6366
6367         amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
6368         amdgpu_ring_write(ring, 0 |     /* src: register*/
6369                                 (5 << 8) |      /* dst: memory */
6370                                 (1 << 20));     /* write confirm */
6371         amdgpu_ring_write(ring, reg);
6372         amdgpu_ring_write(ring, 0);
6373         amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
6374                                 adev->virt.reg_val_offs * 4));
6375         amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
6376                                 adev->virt.reg_val_offs * 4));
6377 }
6378
6379 static void gfx_v8_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
6380                                   uint32_t val)
6381 {
6382         uint32_t cmd;
6383
6384         switch (ring->funcs->type) {
6385         case AMDGPU_RING_TYPE_GFX:
6386                 cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM;
6387                 break;
6388         case AMDGPU_RING_TYPE_KIQ:
6389                 cmd = 1 << 16; /* no inc addr */
6390                 break;
6391         default:
6392                 cmd = WR_CONFIRM;
6393                 break;
6394         }
6395
6396         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6397         amdgpu_ring_write(ring, cmd);
6398         amdgpu_ring_write(ring, reg);
6399         amdgpu_ring_write(ring, 0);
6400         amdgpu_ring_write(ring, val);
6401 }
6402
6403 static void gfx_v8_0_ring_soft_recovery(struct amdgpu_ring *ring, unsigned vmid)
6404 {
6405         struct amdgpu_device *adev = ring->adev;
6406         uint32_t value = 0;
6407
6408         value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03);
6409         value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01);
6410         value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1);
6411         value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid);
6412         WREG32(mmSQ_CMD, value);
6413 }
6414
6415 static void gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
6416                                                  enum amdgpu_interrupt_state state)
6417 {
6418         WREG32_FIELD(CP_INT_CNTL_RING0, TIME_STAMP_INT_ENABLE,
6419                      state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6420 }
6421
6422 static void gfx_v8_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
6423                                                      int me, int pipe,
6424                                                      enum amdgpu_interrupt_state state)
6425 {
6426         u32 mec_int_cntl, mec_int_cntl_reg;
6427
6428         /*
6429          * amdgpu controls only the first MEC. That's why this function only
6430          * handles the setting of interrupts for this specific MEC. All other
6431          * pipes' interrupts are set by amdkfd.
6432          */
6433
6434         if (me == 1) {
6435                 switch (pipe) {
6436                 case 0:
6437                         mec_int_cntl_reg = mmCP_ME1_PIPE0_INT_CNTL;
6438                         break;
6439                 case 1:
6440                         mec_int_cntl_reg = mmCP_ME1_PIPE1_INT_CNTL;
6441                         break;
6442                 case 2:
6443                         mec_int_cntl_reg = mmCP_ME1_PIPE2_INT_CNTL;
6444                         break;
6445                 case 3:
6446                         mec_int_cntl_reg = mmCP_ME1_PIPE3_INT_CNTL;
6447                         break;
6448                 default:
6449                         DRM_DEBUG("invalid pipe %d\n", pipe);
6450                         return;
6451                 }
6452         } else {
6453                 DRM_DEBUG("invalid me %d\n", me);
6454                 return;
6455         }
6456
6457         switch (state) {
6458         case AMDGPU_IRQ_STATE_DISABLE:
6459                 mec_int_cntl = RREG32(mec_int_cntl_reg);
6460                 mec_int_cntl &= ~CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK;
6461                 WREG32(mec_int_cntl_reg, mec_int_cntl);
6462                 break;
6463         case AMDGPU_IRQ_STATE_ENABLE:
6464                 mec_int_cntl = RREG32(mec_int_cntl_reg);
6465                 mec_int_cntl |= CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK;
6466                 WREG32(mec_int_cntl_reg, mec_int_cntl);
6467                 break;
6468         default:
6469                 break;
6470         }
6471 }
6472
6473 static int gfx_v8_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
6474                                              struct amdgpu_irq_src *source,
6475                                              unsigned type,
6476                                              enum amdgpu_interrupt_state state)
6477 {
6478         WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_REG_INT_ENABLE,
6479                      state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6480
6481         return 0;
6482 }
6483
6484 static int gfx_v8_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
6485                                               struct amdgpu_irq_src *source,
6486                                               unsigned type,
6487                                               enum amdgpu_interrupt_state state)
6488 {
6489         WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_INSTR_INT_ENABLE,
6490                      state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6491
6492         return 0;
6493 }
6494
6495 static int gfx_v8_0_set_eop_interrupt_state(struct amdgpu_device *adev,
6496                                             struct amdgpu_irq_src *src,
6497                                             unsigned type,
6498                                             enum amdgpu_interrupt_state state)
6499 {
6500         switch (type) {
6501         case AMDGPU_CP_IRQ_GFX_EOP:
6502                 gfx_v8_0_set_gfx_eop_interrupt_state(adev, state);
6503                 break;
6504         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
6505                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
6506                 break;
6507         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
6508                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
6509                 break;
6510         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
6511                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
6512                 break;
6513         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
6514                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
6515                 break;
6516         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
6517                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
6518                 break;
6519         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
6520                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
6521                 break;
6522         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
6523                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
6524                 break;
6525         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
6526                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
6527                 break;
6528         default:
6529                 break;
6530         }
6531         return 0;
6532 }
6533
6534 static int gfx_v8_0_set_cp_ecc_int_state(struct amdgpu_device *adev,
6535                                          struct amdgpu_irq_src *source,
6536                                          unsigned int type,
6537                                          enum amdgpu_interrupt_state state)
6538 {
6539         int enable_flag;
6540
6541         switch (state) {
6542         case AMDGPU_IRQ_STATE_DISABLE:
6543                 enable_flag = 0;
6544                 break;
6545
6546         case AMDGPU_IRQ_STATE_ENABLE:
6547                 enable_flag = 1;
6548                 break;
6549
6550         default:
6551                 return -EINVAL;
6552         }
6553
6554         WREG32_FIELD(CP_INT_CNTL, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6555         WREG32_FIELD(CP_INT_CNTL_RING0, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6556         WREG32_FIELD(CP_INT_CNTL_RING1, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6557         WREG32_FIELD(CP_INT_CNTL_RING2, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6558         WREG32_FIELD(CPC_INT_CNTL, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6559         WREG32_FIELD(CP_ME1_PIPE0_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6560                      enable_flag);
6561         WREG32_FIELD(CP_ME1_PIPE1_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6562                      enable_flag);
6563         WREG32_FIELD(CP_ME1_PIPE2_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6564                      enable_flag);
6565         WREG32_FIELD(CP_ME1_PIPE3_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6566                      enable_flag);
6567         WREG32_FIELD(CP_ME2_PIPE0_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6568                      enable_flag);
6569         WREG32_FIELD(CP_ME2_PIPE1_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6570                      enable_flag);
6571         WREG32_FIELD(CP_ME2_PIPE2_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6572                      enable_flag);
6573         WREG32_FIELD(CP_ME2_PIPE3_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6574                      enable_flag);
6575
6576         return 0;
6577 }
6578
6579 static int gfx_v8_0_set_sq_int_state(struct amdgpu_device *adev,
6580                                      struct amdgpu_irq_src *source,
6581                                      unsigned int type,
6582                                      enum amdgpu_interrupt_state state)
6583 {
6584         int enable_flag;
6585
6586         switch (state) {
6587         case AMDGPU_IRQ_STATE_DISABLE:
6588                 enable_flag = 1;
6589                 break;
6590
6591         case AMDGPU_IRQ_STATE_ENABLE:
6592                 enable_flag = 0;
6593                 break;
6594
6595         default:
6596                 return -EINVAL;
6597         }
6598
6599         WREG32_FIELD(SQ_INTERRUPT_MSG_CTRL, STALL,
6600                      enable_flag);
6601
6602         return 0;
6603 }
6604
6605 static int gfx_v8_0_eop_irq(struct amdgpu_device *adev,
6606                             struct amdgpu_irq_src *source,
6607                             struct amdgpu_iv_entry *entry)
6608 {
6609         int i;
6610         u8 me_id, pipe_id, queue_id;
6611         struct amdgpu_ring *ring;
6612
6613         DRM_DEBUG("IH: CP EOP\n");
6614         me_id = (entry->ring_id & 0x0c) >> 2;
6615         pipe_id = (entry->ring_id & 0x03) >> 0;
6616         queue_id = (entry->ring_id & 0x70) >> 4;
6617
6618         switch (me_id) {
6619         case 0:
6620                 amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
6621                 break;
6622         case 1:
6623         case 2:
6624                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
6625                         ring = &adev->gfx.compute_ring[i];
6626                         /* Per-queue interrupt is supported for MEC starting from VI.
6627                           * The interrupt can only be enabled/disabled per pipe instead of per queue.
6628                           */
6629                         if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
6630                                 amdgpu_fence_process(ring);
6631                 }
6632                 break;
6633         }
6634         return 0;
6635 }
6636
6637 static void gfx_v8_0_fault(struct amdgpu_device *adev,
6638                            struct amdgpu_iv_entry *entry)
6639 {
6640         u8 me_id, pipe_id, queue_id;
6641         struct amdgpu_ring *ring;
6642         int i;
6643
6644         me_id = (entry->ring_id & 0x0c) >> 2;
6645         pipe_id = (entry->ring_id & 0x03) >> 0;
6646         queue_id = (entry->ring_id & 0x70) >> 4;
6647
6648         switch (me_id) {
6649         case 0:
6650                 drm_sched_fault(&adev->gfx.gfx_ring[0].sched);
6651                 break;
6652         case 1:
6653         case 2:
6654                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
6655                         ring = &adev->gfx.compute_ring[i];
6656                         if (ring->me == me_id && ring->pipe == pipe_id &&
6657                             ring->queue == queue_id)
6658                                 drm_sched_fault(&ring->sched);
6659                 }
6660                 break;
6661         }
6662 }
6663
6664 static int gfx_v8_0_priv_reg_irq(struct amdgpu_device *adev,
6665                                  struct amdgpu_irq_src *source,
6666                                  struct amdgpu_iv_entry *entry)
6667 {
6668         DRM_ERROR("Illegal register access in command stream\n");
6669         gfx_v8_0_fault(adev, entry);
6670         return 0;
6671 }
6672
6673 static int gfx_v8_0_priv_inst_irq(struct amdgpu_device *adev,
6674                                   struct amdgpu_irq_src *source,
6675                                   struct amdgpu_iv_entry *entry)
6676 {
6677         DRM_ERROR("Illegal instruction in command stream\n");
6678         gfx_v8_0_fault(adev, entry);
6679         return 0;
6680 }
6681
6682 static int gfx_v8_0_cp_ecc_error_irq(struct amdgpu_device *adev,
6683                                      struct amdgpu_irq_src *source,
6684                                      struct amdgpu_iv_entry *entry)
6685 {
6686         DRM_ERROR("CP EDC/ECC error detected.");
6687         return 0;
6688 }
6689
6690 static void gfx_v8_0_parse_sq_irq(struct amdgpu_device *adev, unsigned ih_data)
6691 {
6692         u32 enc, se_id, sh_id, cu_id;
6693         char type[20];
6694         int sq_edc_source = -1;
6695
6696         enc = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_CMN, ENCODING);
6697         se_id = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_CMN, SE_ID);
6698
6699         switch (enc) {
6700                 case 0:
6701                         DRM_INFO("SQ general purpose intr detected:"
6702                                         "se_id %d, immed_overflow %d, host_reg_overflow %d,"
6703                                         "host_cmd_overflow %d, cmd_timestamp %d,"
6704                                         "reg_timestamp %d, thread_trace_buff_full %d,"
6705                                         "wlt %d, thread_trace %d.\n",
6706                                         se_id,
6707                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, IMMED_OVERFLOW),
6708                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, HOST_REG_OVERFLOW),
6709                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, HOST_CMD_OVERFLOW),
6710                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, CMD_TIMESTAMP),
6711                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, REG_TIMESTAMP),
6712                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, THREAD_TRACE_BUF_FULL),
6713                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, WLT),
6714                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, THREAD_TRACE)
6715                                         );
6716                         break;
6717                 case 1:
6718                 case 2:
6719
6720                         cu_id = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, CU_ID);
6721                         sh_id = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, SH_ID);
6722
6723                         /*
6724                          * This function can be called either directly from ISR
6725                          * or from BH in which case we can access SQ_EDC_INFO
6726                          * instance
6727                          */
6728                         if (in_task()) {
6729                                 mutex_lock(&adev->grbm_idx_mutex);
6730                                 gfx_v8_0_select_se_sh(adev, se_id, sh_id, cu_id);
6731
6732                                 sq_edc_source = REG_GET_FIELD(RREG32(mmSQ_EDC_INFO), SQ_EDC_INFO, SOURCE);
6733
6734                                 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
6735                                 mutex_unlock(&adev->grbm_idx_mutex);
6736                         }
6737
6738                         if (enc == 1)
6739                                 sprintf(type, "instruction intr");
6740                         else
6741                                 sprintf(type, "EDC/ECC error");
6742
6743                         DRM_INFO(
6744                                 "SQ %s detected: "
6745                                         "se_id %d, sh_id %d, cu_id %d, simd_id %d, wave_id %d, vm_id %d "
6746                                         "trap %s, sq_ed_info.source %s.\n",
6747                                         type, se_id, sh_id, cu_id,
6748                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, SIMD_ID),
6749                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, WAVE_ID),
6750                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, VM_ID),
6751                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, PRIV) ? "true" : "false",
6752                                         (sq_edc_source != -1) ? sq_edc_source_names[sq_edc_source] : "unavailable"
6753                                 );
6754                         break;
6755                 default:
6756                         DRM_ERROR("SQ invalid encoding type\n.");
6757         }
6758 }
6759
6760 static void gfx_v8_0_sq_irq_work_func(struct work_struct *work)
6761 {
6762
6763         struct amdgpu_device *adev = container_of(work, struct amdgpu_device, gfx.sq_work.work);
6764         struct sq_work *sq_work = container_of(work, struct sq_work, work);
6765
6766         gfx_v8_0_parse_sq_irq(adev, sq_work->ih_data);
6767 }
6768
6769 static int gfx_v8_0_sq_irq(struct amdgpu_device *adev,
6770                            struct amdgpu_irq_src *source,
6771                            struct amdgpu_iv_entry *entry)
6772 {
6773         unsigned ih_data = entry->src_data[0];
6774
6775         /*
6776          * Try to submit work so SQ_EDC_INFO can be accessed from
6777          * BH. If previous work submission hasn't finished yet
6778          * just print whatever info is possible directly from the ISR.
6779          */
6780         if (work_pending(&adev->gfx.sq_work.work)) {
6781                 gfx_v8_0_parse_sq_irq(adev, ih_data);
6782         } else {
6783                 adev->gfx.sq_work.ih_data = ih_data;
6784                 schedule_work(&adev->gfx.sq_work.work);
6785         }
6786
6787         return 0;
6788 }
6789
6790 static const struct amd_ip_funcs gfx_v8_0_ip_funcs = {
6791         .name = "gfx_v8_0",
6792         .early_init = gfx_v8_0_early_init,
6793         .late_init = gfx_v8_0_late_init,
6794         .sw_init = gfx_v8_0_sw_init,
6795         .sw_fini = gfx_v8_0_sw_fini,
6796         .hw_init = gfx_v8_0_hw_init,
6797         .hw_fini = gfx_v8_0_hw_fini,
6798         .suspend = gfx_v8_0_suspend,
6799         .resume = gfx_v8_0_resume,
6800         .is_idle = gfx_v8_0_is_idle,
6801         .wait_for_idle = gfx_v8_0_wait_for_idle,
6802         .check_soft_reset = gfx_v8_0_check_soft_reset,
6803         .pre_soft_reset = gfx_v8_0_pre_soft_reset,
6804         .soft_reset = gfx_v8_0_soft_reset,
6805         .post_soft_reset = gfx_v8_0_post_soft_reset,
6806         .set_clockgating_state = gfx_v8_0_set_clockgating_state,
6807         .set_powergating_state = gfx_v8_0_set_powergating_state,
6808         .get_clockgating_state = gfx_v8_0_get_clockgating_state,
6809 };
6810
6811 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
6812         .type = AMDGPU_RING_TYPE_GFX,
6813         .align_mask = 0xff,
6814         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6815         .support_64bit_ptrs = false,
6816         .get_rptr = gfx_v8_0_ring_get_rptr,
6817         .get_wptr = gfx_v8_0_ring_get_wptr_gfx,
6818         .set_wptr = gfx_v8_0_ring_set_wptr_gfx,
6819         .emit_frame_size = /* maximum 215dw if count 16 IBs in */
6820                 5 +  /* COND_EXEC */
6821                 7 +  /* PIPELINE_SYNC */
6822                 VI_FLUSH_GPU_TLB_NUM_WREG * 5 + 9 + /* VM_FLUSH */
6823                 8 +  /* FENCE for VM_FLUSH */
6824                 20 + /* GDS switch */
6825                 4 + /* double SWITCH_BUFFER,
6826                        the first COND_EXEC jump to the place just
6827                            prior to this double SWITCH_BUFFER  */
6828                 5 + /* COND_EXEC */
6829                 7 +      /*     HDP_flush */
6830                 4 +      /*     VGT_flush */
6831                 14 + /* CE_META */
6832                 31 + /* DE_META */
6833                 3 + /* CNTX_CTRL */
6834                 5 + /* HDP_INVL */
6835                 8 + 8 + /* FENCE x2 */
6836                 2, /* SWITCH_BUFFER */
6837         .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_gfx */
6838         .emit_ib = gfx_v8_0_ring_emit_ib_gfx,
6839         .emit_fence = gfx_v8_0_ring_emit_fence_gfx,
6840         .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
6841         .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
6842         .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
6843         .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
6844         .test_ring = gfx_v8_0_ring_test_ring,
6845         .test_ib = gfx_v8_0_ring_test_ib,
6846         .insert_nop = amdgpu_ring_insert_nop,
6847         .pad_ib = amdgpu_ring_generic_pad_ib,
6848         .emit_switch_buffer = gfx_v8_ring_emit_sb,
6849         .emit_cntxcntl = gfx_v8_ring_emit_cntxcntl,
6850         .init_cond_exec = gfx_v8_0_ring_emit_init_cond_exec,
6851         .patch_cond_exec = gfx_v8_0_ring_emit_patch_cond_exec,
6852         .emit_wreg = gfx_v8_0_ring_emit_wreg,
6853         .soft_recovery = gfx_v8_0_ring_soft_recovery,
6854 };
6855
6856 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
6857         .type = AMDGPU_RING_TYPE_COMPUTE,
6858         .align_mask = 0xff,
6859         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6860         .support_64bit_ptrs = false,
6861         .get_rptr = gfx_v8_0_ring_get_rptr,
6862         .get_wptr = gfx_v8_0_ring_get_wptr_compute,
6863         .set_wptr = gfx_v8_0_ring_set_wptr_compute,
6864         .emit_frame_size =
6865                 20 + /* gfx_v8_0_ring_emit_gds_switch */
6866                 7 + /* gfx_v8_0_ring_emit_hdp_flush */
6867                 5 + /* hdp_invalidate */
6868                 7 + /* gfx_v8_0_ring_emit_pipeline_sync */
6869                 VI_FLUSH_GPU_TLB_NUM_WREG * 5 + 7 + /* gfx_v8_0_ring_emit_vm_flush */
6870                 7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_compute x3 for user fence, vm fence */
6871         .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_compute */
6872         .emit_ib = gfx_v8_0_ring_emit_ib_compute,
6873         .emit_fence = gfx_v8_0_ring_emit_fence_compute,
6874         .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
6875         .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
6876         .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
6877         .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
6878         .test_ring = gfx_v8_0_ring_test_ring,
6879         .test_ib = gfx_v8_0_ring_test_ib,
6880         .insert_nop = amdgpu_ring_insert_nop,
6881         .pad_ib = amdgpu_ring_generic_pad_ib,
6882         .set_priority = gfx_v8_0_ring_set_priority_compute,
6883         .emit_wreg = gfx_v8_0_ring_emit_wreg,
6884 };
6885
6886 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_kiq = {
6887         .type = AMDGPU_RING_TYPE_KIQ,
6888         .align_mask = 0xff,
6889         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6890         .support_64bit_ptrs = false,
6891         .get_rptr = gfx_v8_0_ring_get_rptr,
6892         .get_wptr = gfx_v8_0_ring_get_wptr_compute,
6893         .set_wptr = gfx_v8_0_ring_set_wptr_compute,
6894         .emit_frame_size =
6895                 20 + /* gfx_v8_0_ring_emit_gds_switch */
6896                 7 + /* gfx_v8_0_ring_emit_hdp_flush */
6897                 5 + /* hdp_invalidate */
6898                 7 + /* gfx_v8_0_ring_emit_pipeline_sync */
6899                 17 + /* gfx_v8_0_ring_emit_vm_flush */
6900                 7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_kiq x3 for user fence, vm fence */
6901         .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_compute */
6902         .emit_fence = gfx_v8_0_ring_emit_fence_kiq,
6903         .test_ring = gfx_v8_0_ring_test_ring,
6904         .insert_nop = amdgpu_ring_insert_nop,
6905         .pad_ib = amdgpu_ring_generic_pad_ib,
6906         .emit_rreg = gfx_v8_0_ring_emit_rreg,
6907         .emit_wreg = gfx_v8_0_ring_emit_wreg,
6908 };
6909
6910 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev)
6911 {
6912         int i;
6913
6914         adev->gfx.kiq.ring.funcs = &gfx_v8_0_ring_funcs_kiq;
6915
6916         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
6917                 adev->gfx.gfx_ring[i].funcs = &gfx_v8_0_ring_funcs_gfx;
6918
6919         for (i = 0; i < adev->gfx.num_compute_rings; i++)
6920                 adev->gfx.compute_ring[i].funcs = &gfx_v8_0_ring_funcs_compute;
6921 }
6922
6923 static const struct amdgpu_irq_src_funcs gfx_v8_0_eop_irq_funcs = {
6924         .set = gfx_v8_0_set_eop_interrupt_state,
6925         .process = gfx_v8_0_eop_irq,
6926 };
6927
6928 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_reg_irq_funcs = {
6929         .set = gfx_v8_0_set_priv_reg_fault_state,
6930         .process = gfx_v8_0_priv_reg_irq,
6931 };
6932
6933 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_inst_irq_funcs = {
6934         .set = gfx_v8_0_set_priv_inst_fault_state,
6935         .process = gfx_v8_0_priv_inst_irq,
6936 };
6937
6938 static const struct amdgpu_irq_src_funcs gfx_v8_0_cp_ecc_error_irq_funcs = {
6939         .set = gfx_v8_0_set_cp_ecc_int_state,
6940         .process = gfx_v8_0_cp_ecc_error_irq,
6941 };
6942
6943 static const struct amdgpu_irq_src_funcs gfx_v8_0_sq_irq_funcs = {
6944         .set = gfx_v8_0_set_sq_int_state,
6945         .process = gfx_v8_0_sq_irq,
6946 };
6947
6948 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev)
6949 {
6950         adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
6951         adev->gfx.eop_irq.funcs = &gfx_v8_0_eop_irq_funcs;
6952
6953         adev->gfx.priv_reg_irq.num_types = 1;
6954         adev->gfx.priv_reg_irq.funcs = &gfx_v8_0_priv_reg_irq_funcs;
6955
6956         adev->gfx.priv_inst_irq.num_types = 1;
6957         adev->gfx.priv_inst_irq.funcs = &gfx_v8_0_priv_inst_irq_funcs;
6958
6959         adev->gfx.cp_ecc_error_irq.num_types = 1;
6960         adev->gfx.cp_ecc_error_irq.funcs = &gfx_v8_0_cp_ecc_error_irq_funcs;
6961
6962         adev->gfx.sq_irq.num_types = 1;
6963         adev->gfx.sq_irq.funcs = &gfx_v8_0_sq_irq_funcs;
6964 }
6965
6966 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev)
6967 {
6968         adev->gfx.rlc.funcs = &iceland_rlc_funcs;
6969 }
6970
6971 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev)
6972 {
6973         /* init asci gds info */
6974         adev->gds.mem.total_size = RREG32(mmGDS_VMID0_SIZE);
6975         adev->gds.gws.total_size = 64;
6976         adev->gds.oa.total_size = 16;
6977
6978         if (adev->gds.mem.total_size == 64 * 1024) {
6979                 adev->gds.mem.gfx_partition_size = 4096;
6980                 adev->gds.mem.cs_partition_size = 4096;
6981
6982                 adev->gds.gws.gfx_partition_size = 4;
6983                 adev->gds.gws.cs_partition_size = 4;
6984
6985                 adev->gds.oa.gfx_partition_size = 4;
6986                 adev->gds.oa.cs_partition_size = 1;
6987         } else {
6988                 adev->gds.mem.gfx_partition_size = 1024;
6989                 adev->gds.mem.cs_partition_size = 1024;
6990
6991                 adev->gds.gws.gfx_partition_size = 16;
6992                 adev->gds.gws.cs_partition_size = 16;
6993
6994                 adev->gds.oa.gfx_partition_size = 4;
6995                 adev->gds.oa.cs_partition_size = 4;
6996         }
6997 }
6998
6999 static void gfx_v8_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
7000                                                  u32 bitmap)
7001 {
7002         u32 data;
7003
7004         if (!bitmap)
7005                 return;
7006
7007         data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
7008         data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
7009
7010         WREG32(mmGC_USER_SHADER_ARRAY_CONFIG, data);
7011 }
7012
7013 static u32 gfx_v8_0_get_cu_active_bitmap(struct amdgpu_device *adev)
7014 {
7015         u32 data, mask;
7016
7017         data =  RREG32(mmCC_GC_SHADER_ARRAY_CONFIG) |
7018                 RREG32(mmGC_USER_SHADER_ARRAY_CONFIG);
7019
7020         mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh);
7021
7022         return ~REG_GET_FIELD(data, CC_GC_SHADER_ARRAY_CONFIG, INACTIVE_CUS) & mask;
7023 }
7024
7025 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev)
7026 {
7027         int i, j, k, counter, active_cu_number = 0;
7028         u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
7029         struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
7030         unsigned disable_masks[4 * 2];
7031         u32 ao_cu_num;
7032
7033         memset(cu_info, 0, sizeof(*cu_info));
7034
7035         if (adev->flags & AMD_IS_APU)
7036                 ao_cu_num = 2;
7037         else
7038                 ao_cu_num = adev->gfx.config.max_cu_per_sh;
7039
7040         amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2);
7041
7042         mutex_lock(&adev->grbm_idx_mutex);
7043         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
7044                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
7045                         mask = 1;
7046                         ao_bitmap = 0;
7047                         counter = 0;
7048                         gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
7049                         if (i < 4 && j < 2)
7050                                 gfx_v8_0_set_user_cu_inactive_bitmap(
7051                                         adev, disable_masks[i * 2 + j]);
7052                         bitmap = gfx_v8_0_get_cu_active_bitmap(adev);
7053                         cu_info->bitmap[i][j] = bitmap;
7054
7055                         for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
7056                                 if (bitmap & mask) {
7057                                         if (counter < ao_cu_num)
7058                                                 ao_bitmap |= mask;
7059                                         counter ++;
7060                                 }
7061                                 mask <<= 1;
7062                         }
7063                         active_cu_number += counter;
7064                         if (i < 2 && j < 2)
7065                                 ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
7066                         cu_info->ao_cu_bitmap[i][j] = ao_bitmap;
7067                 }
7068         }
7069         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
7070         mutex_unlock(&adev->grbm_idx_mutex);
7071
7072         cu_info->number = active_cu_number;
7073         cu_info->ao_cu_mask = ao_cu_mask;
7074         cu_info->simd_per_cu = NUM_SIMD_PER_CU;
7075         cu_info->max_waves_per_simd = 10;
7076         cu_info->max_scratch_slots_per_cu = 32;
7077         cu_info->wave_front_size = 64;
7078         cu_info->lds_size = 64;
7079 }
7080
7081 const struct amdgpu_ip_block_version gfx_v8_0_ip_block =
7082 {
7083         .type = AMD_IP_BLOCK_TYPE_GFX,
7084         .major = 8,
7085         .minor = 0,
7086         .rev = 0,
7087         .funcs = &gfx_v8_0_ip_funcs,
7088 };
7089
7090 const struct amdgpu_ip_block_version gfx_v8_1_ip_block =
7091 {
7092         .type = AMD_IP_BLOCK_TYPE_GFX,
7093         .major = 8,
7094         .minor = 1,
7095         .rev = 0,
7096         .funcs = &gfx_v8_0_ip_funcs,
7097 };
7098
7099 static void gfx_v8_0_ring_emit_ce_meta(struct amdgpu_ring *ring)
7100 {
7101         uint64_t ce_payload_addr;
7102         int cnt_ce;
7103         union {
7104                 struct vi_ce_ib_state regular;
7105                 struct vi_ce_ib_state_chained_ib chained;
7106         } ce_payload = {};
7107
7108         if (ring->adev->virt.chained_ib_support) {
7109                 ce_payload_addr = amdgpu_csa_vaddr(ring->adev) +
7110                         offsetof(struct vi_gfx_meta_data_chained_ib, ce_payload);
7111                 cnt_ce = (sizeof(ce_payload.chained) >> 2) + 4 - 2;
7112         } else {
7113                 ce_payload_addr = amdgpu_csa_vaddr(ring->adev) +
7114                         offsetof(struct vi_gfx_meta_data, ce_payload);
7115                 cnt_ce = (sizeof(ce_payload.regular) >> 2) + 4 - 2;
7116         }
7117
7118         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_ce));
7119         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
7120                                 WRITE_DATA_DST_SEL(8) |
7121                                 WR_CONFIRM) |
7122                                 WRITE_DATA_CACHE_POLICY(0));
7123         amdgpu_ring_write(ring, lower_32_bits(ce_payload_addr));
7124         amdgpu_ring_write(ring, upper_32_bits(ce_payload_addr));
7125         amdgpu_ring_write_multiple(ring, (void *)&ce_payload, cnt_ce - 2);
7126 }
7127
7128 static void gfx_v8_0_ring_emit_de_meta(struct amdgpu_ring *ring)
7129 {
7130         uint64_t de_payload_addr, gds_addr, csa_addr;
7131         int cnt_de;
7132         union {
7133                 struct vi_de_ib_state regular;
7134                 struct vi_de_ib_state_chained_ib chained;
7135         } de_payload = {};
7136
7137         csa_addr = amdgpu_csa_vaddr(ring->adev);
7138         gds_addr = csa_addr + 4096;
7139         if (ring->adev->virt.chained_ib_support) {
7140                 de_payload.chained.gds_backup_addrlo = lower_32_bits(gds_addr);
7141                 de_payload.chained.gds_backup_addrhi = upper_32_bits(gds_addr);
7142                 de_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data_chained_ib, de_payload);
7143                 cnt_de = (sizeof(de_payload.chained) >> 2) + 4 - 2;
7144         } else {
7145                 de_payload.regular.gds_backup_addrlo = lower_32_bits(gds_addr);
7146                 de_payload.regular.gds_backup_addrhi = upper_32_bits(gds_addr);
7147                 de_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data, de_payload);
7148                 cnt_de = (sizeof(de_payload.regular) >> 2) + 4 - 2;
7149         }
7150
7151         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_de));
7152         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
7153                                 WRITE_DATA_DST_SEL(8) |
7154                                 WR_CONFIRM) |
7155                                 WRITE_DATA_CACHE_POLICY(0));
7156         amdgpu_ring_write(ring, lower_32_bits(de_payload_addr));
7157         amdgpu_ring_write(ring, upper_32_bits(de_payload_addr));
7158         amdgpu_ring_write_multiple(ring, (void *)&de_payload, cnt_de - 2);
7159 }
This page took 0.486841 seconds and 4 git commands to generate.