]> Git Repo - linux.git/blob - drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
Merge tag 'vmwgfx-next-4.19-3' of git://people.freedesktop.org/~thomash/linux into...
[linux.git] / drivers / gpu / drm / amd / amdgpu / gfx_v8_0.c
1 /*
2  * Copyright 2014 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23 #include <linux/kernel.h>
24 #include <linux/firmware.h>
25 #include <drm/drmP.h>
26 #include "amdgpu.h"
27 #include "amdgpu_gfx.h"
28 #include "vi.h"
29 #include "vi_structs.h"
30 #include "vid.h"
31 #include "amdgpu_ucode.h"
32 #include "amdgpu_atombios.h"
33 #include "atombios_i2c.h"
34 #include "clearstate_vi.h"
35
36 #include "gmc/gmc_8_2_d.h"
37 #include "gmc/gmc_8_2_sh_mask.h"
38
39 #include "oss/oss_3_0_d.h"
40 #include "oss/oss_3_0_sh_mask.h"
41
42 #include "bif/bif_5_0_d.h"
43 #include "bif/bif_5_0_sh_mask.h"
44 #include "gca/gfx_8_0_d.h"
45 #include "gca/gfx_8_0_enum.h"
46 #include "gca/gfx_8_0_sh_mask.h"
47 #include "gca/gfx_8_0_enum.h"
48
49 #include "dce/dce_10_0_d.h"
50 #include "dce/dce_10_0_sh_mask.h"
51
52 #include "smu/smu_7_1_3_d.h"
53
54 #define GFX8_NUM_GFX_RINGS     1
55 #define GFX8_MEC_HPD_SIZE 2048
56
57 #define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001
58 #define CARRIZO_GB_ADDR_CONFIG_GOLDEN 0x22010001
59 #define POLARIS11_GB_ADDR_CONFIG_GOLDEN 0x22011002
60 #define TONGA_GB_ADDR_CONFIG_GOLDEN 0x22011003
61
62 #define ARRAY_MODE(x)                                   ((x) << GB_TILE_MODE0__ARRAY_MODE__SHIFT)
63 #define PIPE_CONFIG(x)                                  ((x) << GB_TILE_MODE0__PIPE_CONFIG__SHIFT)
64 #define TILE_SPLIT(x)                                   ((x) << GB_TILE_MODE0__TILE_SPLIT__SHIFT)
65 #define MICRO_TILE_MODE_NEW(x)                          ((x) << GB_TILE_MODE0__MICRO_TILE_MODE_NEW__SHIFT)
66 #define SAMPLE_SPLIT(x)                                 ((x) << GB_TILE_MODE0__SAMPLE_SPLIT__SHIFT)
67 #define BANK_WIDTH(x)                                   ((x) << GB_MACROTILE_MODE0__BANK_WIDTH__SHIFT)
68 #define BANK_HEIGHT(x)                                  ((x) << GB_MACROTILE_MODE0__BANK_HEIGHT__SHIFT)
69 #define MACRO_TILE_ASPECT(x)                            ((x) << GB_MACROTILE_MODE0__MACRO_TILE_ASPECT__SHIFT)
70 #define NUM_BANKS(x)                                    ((x) << GB_MACROTILE_MODE0__NUM_BANKS__SHIFT)
71
72 #define RLC_CGTT_MGCG_OVERRIDE__CPF_MASK            0x00000001L
73 #define RLC_CGTT_MGCG_OVERRIDE__RLC_MASK            0x00000002L
74 #define RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK           0x00000004L
75 #define RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK           0x00000008L
76 #define RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK           0x00000010L
77 #define RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK           0x00000020L
78
79 /* BPM SERDES CMD */
80 #define SET_BPM_SERDES_CMD    1
81 #define CLE_BPM_SERDES_CMD    0
82
83 /* BPM Register Address*/
84 enum {
85         BPM_REG_CGLS_EN = 0,        /* Enable/Disable CGLS */
86         BPM_REG_CGLS_ON,            /* ON/OFF CGLS: shall be controlled by RLC FW */
87         BPM_REG_CGCG_OVERRIDE,      /* Set/Clear CGCG Override */
88         BPM_REG_MGCG_OVERRIDE,      /* Set/Clear MGCG Override */
89         BPM_REG_FGCG_OVERRIDE,      /* Set/Clear FGCG Override */
90         BPM_REG_FGCG_MAX
91 };
92
93 #define RLC_FormatDirectRegListLength        14
94
95 MODULE_FIRMWARE("amdgpu/carrizo_ce.bin");
96 MODULE_FIRMWARE("amdgpu/carrizo_pfp.bin");
97 MODULE_FIRMWARE("amdgpu/carrizo_me.bin");
98 MODULE_FIRMWARE("amdgpu/carrizo_mec.bin");
99 MODULE_FIRMWARE("amdgpu/carrizo_mec2.bin");
100 MODULE_FIRMWARE("amdgpu/carrizo_rlc.bin");
101
102 MODULE_FIRMWARE("amdgpu/stoney_ce.bin");
103 MODULE_FIRMWARE("amdgpu/stoney_pfp.bin");
104 MODULE_FIRMWARE("amdgpu/stoney_me.bin");
105 MODULE_FIRMWARE("amdgpu/stoney_mec.bin");
106 MODULE_FIRMWARE("amdgpu/stoney_rlc.bin");
107
108 MODULE_FIRMWARE("amdgpu/tonga_ce.bin");
109 MODULE_FIRMWARE("amdgpu/tonga_pfp.bin");
110 MODULE_FIRMWARE("amdgpu/tonga_me.bin");
111 MODULE_FIRMWARE("amdgpu/tonga_mec.bin");
112 MODULE_FIRMWARE("amdgpu/tonga_mec2.bin");
113 MODULE_FIRMWARE("amdgpu/tonga_rlc.bin");
114
115 MODULE_FIRMWARE("amdgpu/topaz_ce.bin");
116 MODULE_FIRMWARE("amdgpu/topaz_pfp.bin");
117 MODULE_FIRMWARE("amdgpu/topaz_me.bin");
118 MODULE_FIRMWARE("amdgpu/topaz_mec.bin");
119 MODULE_FIRMWARE("amdgpu/topaz_rlc.bin");
120
121 MODULE_FIRMWARE("amdgpu/fiji_ce.bin");
122 MODULE_FIRMWARE("amdgpu/fiji_pfp.bin");
123 MODULE_FIRMWARE("amdgpu/fiji_me.bin");
124 MODULE_FIRMWARE("amdgpu/fiji_mec.bin");
125 MODULE_FIRMWARE("amdgpu/fiji_mec2.bin");
126 MODULE_FIRMWARE("amdgpu/fiji_rlc.bin");
127
128 MODULE_FIRMWARE("amdgpu/polaris10_ce.bin");
129 MODULE_FIRMWARE("amdgpu/polaris10_ce_2.bin");
130 MODULE_FIRMWARE("amdgpu/polaris10_pfp.bin");
131 MODULE_FIRMWARE("amdgpu/polaris10_pfp_2.bin");
132 MODULE_FIRMWARE("amdgpu/polaris10_me.bin");
133 MODULE_FIRMWARE("amdgpu/polaris10_me_2.bin");
134 MODULE_FIRMWARE("amdgpu/polaris10_mec.bin");
135 MODULE_FIRMWARE("amdgpu/polaris10_mec_2.bin");
136 MODULE_FIRMWARE("amdgpu/polaris10_mec2.bin");
137 MODULE_FIRMWARE("amdgpu/polaris10_mec2_2.bin");
138 MODULE_FIRMWARE("amdgpu/polaris10_rlc.bin");
139
140 MODULE_FIRMWARE("amdgpu/polaris11_ce.bin");
141 MODULE_FIRMWARE("amdgpu/polaris11_ce_2.bin");
142 MODULE_FIRMWARE("amdgpu/polaris11_pfp.bin");
143 MODULE_FIRMWARE("amdgpu/polaris11_pfp_2.bin");
144 MODULE_FIRMWARE("amdgpu/polaris11_me.bin");
145 MODULE_FIRMWARE("amdgpu/polaris11_me_2.bin");
146 MODULE_FIRMWARE("amdgpu/polaris11_mec.bin");
147 MODULE_FIRMWARE("amdgpu/polaris11_mec_2.bin");
148 MODULE_FIRMWARE("amdgpu/polaris11_mec2.bin");
149 MODULE_FIRMWARE("amdgpu/polaris11_mec2_2.bin");
150 MODULE_FIRMWARE("amdgpu/polaris11_rlc.bin");
151
152 MODULE_FIRMWARE("amdgpu/polaris12_ce.bin");
153 MODULE_FIRMWARE("amdgpu/polaris12_ce_2.bin");
154 MODULE_FIRMWARE("amdgpu/polaris12_pfp.bin");
155 MODULE_FIRMWARE("amdgpu/polaris12_pfp_2.bin");
156 MODULE_FIRMWARE("amdgpu/polaris12_me.bin");
157 MODULE_FIRMWARE("amdgpu/polaris12_me_2.bin");
158 MODULE_FIRMWARE("amdgpu/polaris12_mec.bin");
159 MODULE_FIRMWARE("amdgpu/polaris12_mec_2.bin");
160 MODULE_FIRMWARE("amdgpu/polaris12_mec2.bin");
161 MODULE_FIRMWARE("amdgpu/polaris12_mec2_2.bin");
162 MODULE_FIRMWARE("amdgpu/polaris12_rlc.bin");
163
164 MODULE_FIRMWARE("amdgpu/vegam_ce.bin");
165 MODULE_FIRMWARE("amdgpu/vegam_pfp.bin");
166 MODULE_FIRMWARE("amdgpu/vegam_me.bin");
167 MODULE_FIRMWARE("amdgpu/vegam_mec.bin");
168 MODULE_FIRMWARE("amdgpu/vegam_mec2.bin");
169 MODULE_FIRMWARE("amdgpu/vegam_rlc.bin");
170
171 static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] =
172 {
173         {mmGDS_VMID0_BASE, mmGDS_VMID0_SIZE, mmGDS_GWS_VMID0, mmGDS_OA_VMID0},
174         {mmGDS_VMID1_BASE, mmGDS_VMID1_SIZE, mmGDS_GWS_VMID1, mmGDS_OA_VMID1},
175         {mmGDS_VMID2_BASE, mmGDS_VMID2_SIZE, mmGDS_GWS_VMID2, mmGDS_OA_VMID2},
176         {mmGDS_VMID3_BASE, mmGDS_VMID3_SIZE, mmGDS_GWS_VMID3, mmGDS_OA_VMID3},
177         {mmGDS_VMID4_BASE, mmGDS_VMID4_SIZE, mmGDS_GWS_VMID4, mmGDS_OA_VMID4},
178         {mmGDS_VMID5_BASE, mmGDS_VMID5_SIZE, mmGDS_GWS_VMID5, mmGDS_OA_VMID5},
179         {mmGDS_VMID6_BASE, mmGDS_VMID6_SIZE, mmGDS_GWS_VMID6, mmGDS_OA_VMID6},
180         {mmGDS_VMID7_BASE, mmGDS_VMID7_SIZE, mmGDS_GWS_VMID7, mmGDS_OA_VMID7},
181         {mmGDS_VMID8_BASE, mmGDS_VMID8_SIZE, mmGDS_GWS_VMID8, mmGDS_OA_VMID8},
182         {mmGDS_VMID9_BASE, mmGDS_VMID9_SIZE, mmGDS_GWS_VMID9, mmGDS_OA_VMID9},
183         {mmGDS_VMID10_BASE, mmGDS_VMID10_SIZE, mmGDS_GWS_VMID10, mmGDS_OA_VMID10},
184         {mmGDS_VMID11_BASE, mmGDS_VMID11_SIZE, mmGDS_GWS_VMID11, mmGDS_OA_VMID11},
185         {mmGDS_VMID12_BASE, mmGDS_VMID12_SIZE, mmGDS_GWS_VMID12, mmGDS_OA_VMID12},
186         {mmGDS_VMID13_BASE, mmGDS_VMID13_SIZE, mmGDS_GWS_VMID13, mmGDS_OA_VMID13},
187         {mmGDS_VMID14_BASE, mmGDS_VMID14_SIZE, mmGDS_GWS_VMID14, mmGDS_OA_VMID14},
188         {mmGDS_VMID15_BASE, mmGDS_VMID15_SIZE, mmGDS_GWS_VMID15, mmGDS_OA_VMID15}
189 };
190
191 static const u32 golden_settings_tonga_a11[] =
192 {
193         mmCB_HW_CONTROL, 0xfffdf3cf, 0x00007208,
194         mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
195         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
196         mmGB_GPU_ID, 0x0000000f, 0x00000000,
197         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
198         mmPA_SC_FIFO_DEPTH_CNTL, 0x000003ff, 0x000000fc,
199         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
200         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
201         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
202         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
203         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
204         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
205         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000002fb,
206         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x0000543b,
207         mmTCP_CHAN_STEER_LO, 0xffffffff, 0xa9210876,
208         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
209 };
210
211 static const u32 tonga_golden_common_all[] =
212 {
213         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
214         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
215         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
216         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
217         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
218         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
219         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
220         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
221 };
222
223 static const u32 tonga_mgcg_cgcg_init[] =
224 {
225         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
226         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
227         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
228         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
229         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
230         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
231         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
232         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
233         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
234         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
235         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
236         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
237         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
238         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
239         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
240         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
241         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
242         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
243         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
244         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
245         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
246         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
247         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
248         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
249         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
250         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
251         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
252         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
253         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
254         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
255         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
256         mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
257         mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
258         mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
259         mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
260         mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
261         mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
262         mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
263         mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
264         mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
265         mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
266         mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
267         mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
268         mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
269         mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
270         mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
271         mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
272         mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
273         mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
274         mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
275         mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
276         mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
277         mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
278         mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
279         mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
280         mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
281         mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
282         mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
283         mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
284         mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
285         mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
286         mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
287         mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
288         mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
289         mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
290         mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
291         mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
292         mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
293         mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
294         mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
295         mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
296         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
297         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
298         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
299         mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
300 };
301
302 static const u32 golden_settings_vegam_a11[] =
303 {
304         mmCB_HW_CONTROL, 0x0001f3cf, 0x00007208,
305         mmCB_HW_CONTROL_2, 0x0f000000, 0x0d000000,
306         mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
307         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
308         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
309         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
310         mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x3a00161a,
311         mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002e,
312         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
313         mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
314         mmSQ_CONFIG, 0x07f80000, 0x01180000,
315         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
316         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
317         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7,
318         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
319         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x32761054,
320         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
321 };
322
323 static const u32 vegam_golden_common_all[] =
324 {
325         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
326         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
327         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
328         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
329         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
330         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
331 };
332
333 static const u32 golden_settings_polaris11_a11[] =
334 {
335         mmCB_HW_CONTROL, 0x0000f3cf, 0x00007208,
336         mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
337         mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
338         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
339         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
340         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
341         mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
342         mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
343         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
344         mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
345         mmSQ_CONFIG, 0x07f80000, 0x01180000,
346         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
347         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
348         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f3,
349         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
350         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003210,
351         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
352 };
353
354 static const u32 polaris11_golden_common_all[] =
355 {
356         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
357         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011002,
358         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
359         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
360         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
361         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
362 };
363
364 static const u32 golden_settings_polaris10_a11[] =
365 {
366         mmATC_MISC_CG, 0x000c0fc0, 0x000c0200,
367         mmCB_HW_CONTROL, 0x0001f3cf, 0x00007208,
368         mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
369         mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
370         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
371         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
372         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
373         mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
374         mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002a,
375         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
376         mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
377         mmSQ_CONFIG, 0x07f80000, 0x07180000,
378         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
379         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
380         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7,
381         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
382         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
383 };
384
385 static const u32 polaris10_golden_common_all[] =
386 {
387         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
388         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
389         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
390         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
391         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
392         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
393         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
394         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
395 };
396
397 static const u32 fiji_golden_common_all[] =
398 {
399         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
400         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x3a00161a,
401         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002e,
402         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
403         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
404         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
405         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
406         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
407         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
408         mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x00000009,
409 };
410
411 static const u32 golden_settings_fiji_a10[] =
412 {
413         mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
414         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
415         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
416         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
417         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
418         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
419         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
420         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
421         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
422         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000ff,
423         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
424 };
425
426 static const u32 fiji_mgcg_cgcg_init[] =
427 {
428         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
429         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
430         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
431         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
432         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
433         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
434         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
435         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
436         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
437         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
438         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
439         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
440         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
441         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
442         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
443         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
444         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
445         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
446         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
447         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
448         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
449         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
450         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
451         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
452         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
453         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
454         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
455         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
456         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
457         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
458         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
459         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
460         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
461         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
462         mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
463 };
464
465 static const u32 golden_settings_iceland_a11[] =
466 {
467         mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
468         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
469         mmDB_DEBUG3, 0xc0000000, 0xc0000000,
470         mmGB_GPU_ID, 0x0000000f, 0x00000000,
471         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
472         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
473         mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x00000002,
474         mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
475         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
476         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
477         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
478         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
479         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
480         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f1,
481         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
482         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010,
483 };
484
485 static const u32 iceland_golden_common_all[] =
486 {
487         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
488         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
489         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
490         mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
491         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
492         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
493         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
494         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
495 };
496
497 static const u32 iceland_mgcg_cgcg_init[] =
498 {
499         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
500         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
501         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
502         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
503         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0xc0000100,
504         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0xc0000100,
505         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0xc0000100,
506         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
507         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
508         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
509         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
510         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
511         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
512         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
513         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
514         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
515         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
516         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
517         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
518         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
519         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
520         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
521         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0xff000100,
522         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
523         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
524         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
525         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
526         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
527         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
528         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
529         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
530         mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
531         mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
532         mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
533         mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
534         mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
535         mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
536         mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
537         mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
538         mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
539         mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
540         mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
541         mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
542         mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
543         mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
544         mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
545         mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
546         mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
547         mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
548         mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
549         mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
550         mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
551         mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
552         mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
553         mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
554         mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
555         mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
556         mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
557         mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
558         mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
559         mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
560         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
561         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
562         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
563 };
564
565 static const u32 cz_golden_settings_a11[] =
566 {
567         mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
568         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
569         mmGB_GPU_ID, 0x0000000f, 0x00000000,
570         mmPA_SC_ENHANCE, 0xffffffff, 0x00000001,
571         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
572         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
573         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
574         mmTA_CNTL_AUX, 0x000f000f, 0x00010000,
575         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
576         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
577         mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f3,
578         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00001302
579 };
580
581 static const u32 cz_golden_common_all[] =
582 {
583         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
584         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
585         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
586         mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
587         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
588         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
589         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
590         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
591 };
592
593 static const u32 cz_mgcg_cgcg_init[] =
594 {
595         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
596         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
597         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
598         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
599         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
600         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
601         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x00000100,
602         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
603         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
604         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
605         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
606         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
607         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
608         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
609         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
610         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
611         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
612         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
613         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
614         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
615         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
616         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
617         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
618         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
619         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
620         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
621         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
622         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
623         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
624         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
625         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
626         mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
627         mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
628         mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
629         mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
630         mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
631         mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
632         mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
633         mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
634         mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
635         mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
636         mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
637         mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
638         mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
639         mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
640         mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
641         mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
642         mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
643         mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
644         mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
645         mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
646         mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
647         mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
648         mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
649         mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
650         mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
651         mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
652         mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
653         mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
654         mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
655         mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
656         mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
657         mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
658         mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
659         mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
660         mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
661         mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
662         mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
663         mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
664         mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
665         mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
666         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
667         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
668         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
669         mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
670 };
671
672 static const u32 stoney_golden_settings_a11[] =
673 {
674         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
675         mmGB_GPU_ID, 0x0000000f, 0x00000000,
676         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
677         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
678         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
679         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
680         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
681         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
682         mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f1,
683         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x10101010,
684 };
685
686 static const u32 stoney_golden_common_all[] =
687 {
688         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
689         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000000,
690         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
691         mmGB_ADDR_CONFIG, 0xffffffff, 0x12010001,
692         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
693         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
694         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
695         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
696 };
697
698 static const u32 stoney_mgcg_cgcg_init[] =
699 {
700         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
701         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
702         mmCP_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
703         mmRLC_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
704         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96940200,
705 };
706
707
708 static const char * const sq_edc_source_names[] = {
709         "SQ_EDC_INFO_SOURCE_INVALID: No EDC error has occurred",
710         "SQ_EDC_INFO_SOURCE_INST: EDC source is Instruction Fetch",
711         "SQ_EDC_INFO_SOURCE_SGPR: EDC source is SGPR or SQC data return",
712         "SQ_EDC_INFO_SOURCE_VGPR: EDC source is VGPR",
713         "SQ_EDC_INFO_SOURCE_LDS: EDC source is LDS",
714         "SQ_EDC_INFO_SOURCE_GDS: EDC source is GDS",
715         "SQ_EDC_INFO_SOURCE_TA: EDC source is TA",
716 };
717
718 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev);
719 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev);
720 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev);
721 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev);
722 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev);
723 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev);
724 static void gfx_v8_0_ring_emit_ce_meta(struct amdgpu_ring *ring);
725 static void gfx_v8_0_ring_emit_de_meta(struct amdgpu_ring *ring);
726
727 static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev)
728 {
729         switch (adev->asic_type) {
730         case CHIP_TOPAZ:
731                 amdgpu_device_program_register_sequence(adev,
732                                                         iceland_mgcg_cgcg_init,
733                                                         ARRAY_SIZE(iceland_mgcg_cgcg_init));
734                 amdgpu_device_program_register_sequence(adev,
735                                                         golden_settings_iceland_a11,
736                                                         ARRAY_SIZE(golden_settings_iceland_a11));
737                 amdgpu_device_program_register_sequence(adev,
738                                                         iceland_golden_common_all,
739                                                         ARRAY_SIZE(iceland_golden_common_all));
740                 break;
741         case CHIP_FIJI:
742                 amdgpu_device_program_register_sequence(adev,
743                                                         fiji_mgcg_cgcg_init,
744                                                         ARRAY_SIZE(fiji_mgcg_cgcg_init));
745                 amdgpu_device_program_register_sequence(adev,
746                                                         golden_settings_fiji_a10,
747                                                         ARRAY_SIZE(golden_settings_fiji_a10));
748                 amdgpu_device_program_register_sequence(adev,
749                                                         fiji_golden_common_all,
750                                                         ARRAY_SIZE(fiji_golden_common_all));
751                 break;
752
753         case CHIP_TONGA:
754                 amdgpu_device_program_register_sequence(adev,
755                                                         tonga_mgcg_cgcg_init,
756                                                         ARRAY_SIZE(tonga_mgcg_cgcg_init));
757                 amdgpu_device_program_register_sequence(adev,
758                                                         golden_settings_tonga_a11,
759                                                         ARRAY_SIZE(golden_settings_tonga_a11));
760                 amdgpu_device_program_register_sequence(adev,
761                                                         tonga_golden_common_all,
762                                                         ARRAY_SIZE(tonga_golden_common_all));
763                 break;
764         case CHIP_VEGAM:
765                 amdgpu_device_program_register_sequence(adev,
766                                                         golden_settings_vegam_a11,
767                                                         ARRAY_SIZE(golden_settings_vegam_a11));
768                 amdgpu_device_program_register_sequence(adev,
769                                                         vegam_golden_common_all,
770                                                         ARRAY_SIZE(vegam_golden_common_all));
771                 break;
772         case CHIP_POLARIS11:
773         case CHIP_POLARIS12:
774                 amdgpu_device_program_register_sequence(adev,
775                                                         golden_settings_polaris11_a11,
776                                                         ARRAY_SIZE(golden_settings_polaris11_a11));
777                 amdgpu_device_program_register_sequence(adev,
778                                                         polaris11_golden_common_all,
779                                                         ARRAY_SIZE(polaris11_golden_common_all));
780                 break;
781         case CHIP_POLARIS10:
782                 amdgpu_device_program_register_sequence(adev,
783                                                         golden_settings_polaris10_a11,
784                                                         ARRAY_SIZE(golden_settings_polaris10_a11));
785                 amdgpu_device_program_register_sequence(adev,
786                                                         polaris10_golden_common_all,
787                                                         ARRAY_SIZE(polaris10_golden_common_all));
788                 WREG32_SMC(ixCG_ACLK_CNTL, 0x0000001C);
789                 if (adev->pdev->revision == 0xc7 &&
790                     ((adev->pdev->subsystem_device == 0xb37 && adev->pdev->subsystem_vendor == 0x1002) ||
791                      (adev->pdev->subsystem_device == 0x4a8 && adev->pdev->subsystem_vendor == 0x1043) ||
792                      (adev->pdev->subsystem_device == 0x9480 && adev->pdev->subsystem_vendor == 0x1682))) {
793                         amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1E, 0xDD);
794                         amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1F, 0xD0);
795                 }
796                 break;
797         case CHIP_CARRIZO:
798                 amdgpu_device_program_register_sequence(adev,
799                                                         cz_mgcg_cgcg_init,
800                                                         ARRAY_SIZE(cz_mgcg_cgcg_init));
801                 amdgpu_device_program_register_sequence(adev,
802                                                         cz_golden_settings_a11,
803                                                         ARRAY_SIZE(cz_golden_settings_a11));
804                 amdgpu_device_program_register_sequence(adev,
805                                                         cz_golden_common_all,
806                                                         ARRAY_SIZE(cz_golden_common_all));
807                 break;
808         case CHIP_STONEY:
809                 amdgpu_device_program_register_sequence(adev,
810                                                         stoney_mgcg_cgcg_init,
811                                                         ARRAY_SIZE(stoney_mgcg_cgcg_init));
812                 amdgpu_device_program_register_sequence(adev,
813                                                         stoney_golden_settings_a11,
814                                                         ARRAY_SIZE(stoney_golden_settings_a11));
815                 amdgpu_device_program_register_sequence(adev,
816                                                         stoney_golden_common_all,
817                                                         ARRAY_SIZE(stoney_golden_common_all));
818                 break;
819         default:
820                 break;
821         }
822 }
823
824 static void gfx_v8_0_scratch_init(struct amdgpu_device *adev)
825 {
826         adev->gfx.scratch.num_reg = 8;
827         adev->gfx.scratch.reg_base = mmSCRATCH_REG0;
828         adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1;
829 }
830
831 static int gfx_v8_0_ring_test_ring(struct amdgpu_ring *ring)
832 {
833         struct amdgpu_device *adev = ring->adev;
834         uint32_t scratch;
835         uint32_t tmp = 0;
836         unsigned i;
837         int r;
838
839         r = amdgpu_gfx_scratch_get(adev, &scratch);
840         if (r) {
841                 DRM_ERROR("amdgpu: cp failed to get scratch reg (%d).\n", r);
842                 return r;
843         }
844         WREG32(scratch, 0xCAFEDEAD);
845         r = amdgpu_ring_alloc(ring, 3);
846         if (r) {
847                 DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n",
848                           ring->idx, r);
849                 amdgpu_gfx_scratch_free(adev, scratch);
850                 return r;
851         }
852         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
853         amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
854         amdgpu_ring_write(ring, 0xDEADBEEF);
855         amdgpu_ring_commit(ring);
856
857         for (i = 0; i < adev->usec_timeout; i++) {
858                 tmp = RREG32(scratch);
859                 if (tmp == 0xDEADBEEF)
860                         break;
861                 DRM_UDELAY(1);
862         }
863         if (i < adev->usec_timeout) {
864                 DRM_DEBUG("ring test on %d succeeded in %d usecs\n",
865                          ring->idx, i);
866         } else {
867                 DRM_ERROR("amdgpu: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
868                           ring->idx, scratch, tmp);
869                 r = -EINVAL;
870         }
871         amdgpu_gfx_scratch_free(adev, scratch);
872         return r;
873 }
874
875 static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
876 {
877         struct amdgpu_device *adev = ring->adev;
878         struct amdgpu_ib ib;
879         struct dma_fence *f = NULL;
880
881         unsigned int index;
882         uint64_t gpu_addr;
883         uint32_t tmp;
884         long r;
885
886         r = amdgpu_device_wb_get(adev, &index);
887         if (r) {
888                 dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r);
889                 return r;
890         }
891
892         gpu_addr = adev->wb.gpu_addr + (index * 4);
893         adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
894         memset(&ib, 0, sizeof(ib));
895         r = amdgpu_ib_get(adev, NULL, 16, &ib);
896         if (r) {
897                 DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
898                 goto err1;
899         }
900         ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3);
901         ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
902         ib.ptr[2] = lower_32_bits(gpu_addr);
903         ib.ptr[3] = upper_32_bits(gpu_addr);
904         ib.ptr[4] = 0xDEADBEEF;
905         ib.length_dw = 5;
906
907         r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
908         if (r)
909                 goto err2;
910
911         r = dma_fence_wait_timeout(f, false, timeout);
912         if (r == 0) {
913                 DRM_ERROR("amdgpu: IB test timed out.\n");
914                 r = -ETIMEDOUT;
915                 goto err2;
916         } else if (r < 0) {
917                 DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
918                 goto err2;
919         }
920
921         tmp = adev->wb.wb[index];
922         if (tmp == 0xDEADBEEF) {
923                 DRM_DEBUG("ib test on ring %d succeeded\n", ring->idx);
924                 r = 0;
925         } else {
926                 DRM_ERROR("ib test on ring %d failed\n", ring->idx);
927                 r = -EINVAL;
928         }
929
930 err2:
931         amdgpu_ib_free(adev, &ib, NULL);
932         dma_fence_put(f);
933 err1:
934         amdgpu_device_wb_free(adev, index);
935         return r;
936 }
937
938
939 static void gfx_v8_0_free_microcode(struct amdgpu_device *adev)
940 {
941         release_firmware(adev->gfx.pfp_fw);
942         adev->gfx.pfp_fw = NULL;
943         release_firmware(adev->gfx.me_fw);
944         adev->gfx.me_fw = NULL;
945         release_firmware(adev->gfx.ce_fw);
946         adev->gfx.ce_fw = NULL;
947         release_firmware(adev->gfx.rlc_fw);
948         adev->gfx.rlc_fw = NULL;
949         release_firmware(adev->gfx.mec_fw);
950         adev->gfx.mec_fw = NULL;
951         if ((adev->asic_type != CHIP_STONEY) &&
952             (adev->asic_type != CHIP_TOPAZ))
953                 release_firmware(adev->gfx.mec2_fw);
954         adev->gfx.mec2_fw = NULL;
955
956         kfree(adev->gfx.rlc.register_list_format);
957 }
958
959 static int gfx_v8_0_init_microcode(struct amdgpu_device *adev)
960 {
961         const char *chip_name;
962         char fw_name[30];
963         int err;
964         struct amdgpu_firmware_info *info = NULL;
965         const struct common_firmware_header *header = NULL;
966         const struct gfx_firmware_header_v1_0 *cp_hdr;
967         const struct rlc_firmware_header_v2_0 *rlc_hdr;
968         unsigned int *tmp = NULL, i;
969
970         DRM_DEBUG("\n");
971
972         switch (adev->asic_type) {
973         case CHIP_TOPAZ:
974                 chip_name = "topaz";
975                 break;
976         case CHIP_TONGA:
977                 chip_name = "tonga";
978                 break;
979         case CHIP_CARRIZO:
980                 chip_name = "carrizo";
981                 break;
982         case CHIP_FIJI:
983                 chip_name = "fiji";
984                 break;
985         case CHIP_STONEY:
986                 chip_name = "stoney";
987                 break;
988         case CHIP_POLARIS10:
989                 chip_name = "polaris10";
990                 break;
991         case CHIP_POLARIS11:
992                 chip_name = "polaris11";
993                 break;
994         case CHIP_POLARIS12:
995                 chip_name = "polaris12";
996                 break;
997         case CHIP_VEGAM:
998                 chip_name = "vegam";
999                 break;
1000         default:
1001                 BUG();
1002         }
1003
1004         if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1005                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp_2.bin", chip_name);
1006                 err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
1007                 if (err == -ENOENT) {
1008                         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
1009                         err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
1010                 }
1011         } else {
1012                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
1013                 err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
1014         }
1015         if (err)
1016                 goto out;
1017         err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
1018         if (err)
1019                 goto out;
1020         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
1021         adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1022         adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1023
1024         if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1025                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me_2.bin", chip_name);
1026                 err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
1027                 if (err == -ENOENT) {
1028                         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
1029                         err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
1030                 }
1031         } else {
1032                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
1033                 err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
1034         }
1035         if (err)
1036                 goto out;
1037         err = amdgpu_ucode_validate(adev->gfx.me_fw);
1038         if (err)
1039                 goto out;
1040         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
1041         adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1042
1043         adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1044
1045         if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1046                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce_2.bin", chip_name);
1047                 err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
1048                 if (err == -ENOENT) {
1049                         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
1050                         err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
1051                 }
1052         } else {
1053                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
1054                 err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
1055         }
1056         if (err)
1057                 goto out;
1058         err = amdgpu_ucode_validate(adev->gfx.ce_fw);
1059         if (err)
1060                 goto out;
1061         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
1062         adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1063         adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1064
1065         /*
1066          * Support for MCBP/Virtualization in combination with chained IBs is
1067          * formal released on feature version #46
1068          */
1069         if (adev->gfx.ce_feature_version >= 46 &&
1070             adev->gfx.pfp_feature_version >= 46) {
1071                 adev->virt.chained_ib_support = true;
1072                 DRM_INFO("Chained IB support enabled!\n");
1073         } else
1074                 adev->virt.chained_ib_support = false;
1075
1076         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
1077         err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
1078         if (err)
1079                 goto out;
1080         err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
1081         rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
1082         adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
1083         adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
1084
1085         adev->gfx.rlc.save_and_restore_offset =
1086                         le32_to_cpu(rlc_hdr->save_and_restore_offset);
1087         adev->gfx.rlc.clear_state_descriptor_offset =
1088                         le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
1089         adev->gfx.rlc.avail_scratch_ram_locations =
1090                         le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
1091         adev->gfx.rlc.reg_restore_list_size =
1092                         le32_to_cpu(rlc_hdr->reg_restore_list_size);
1093         adev->gfx.rlc.reg_list_format_start =
1094                         le32_to_cpu(rlc_hdr->reg_list_format_start);
1095         adev->gfx.rlc.reg_list_format_separate_start =
1096                         le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
1097         adev->gfx.rlc.starting_offsets_start =
1098                         le32_to_cpu(rlc_hdr->starting_offsets_start);
1099         adev->gfx.rlc.reg_list_format_size_bytes =
1100                         le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
1101         adev->gfx.rlc.reg_list_size_bytes =
1102                         le32_to_cpu(rlc_hdr->reg_list_size_bytes);
1103
1104         adev->gfx.rlc.register_list_format =
1105                         kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
1106                                         adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
1107
1108         if (!adev->gfx.rlc.register_list_format) {
1109                 err = -ENOMEM;
1110                 goto out;
1111         }
1112
1113         tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1114                         le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
1115         for (i = 0 ; i < (rlc_hdr->reg_list_format_size_bytes >> 2); i++)
1116                 adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]);
1117
1118         adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
1119
1120         tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1121                         le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
1122         for (i = 0 ; i < (rlc_hdr->reg_list_size_bytes >> 2); i++)
1123                 adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
1124
1125         if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1126                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec_2.bin", chip_name);
1127                 err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1128                 if (err == -ENOENT) {
1129                         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
1130                         err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1131                 }
1132         } else {
1133                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
1134                 err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1135         }
1136         if (err)
1137                 goto out;
1138         err = amdgpu_ucode_validate(adev->gfx.mec_fw);
1139         if (err)
1140                 goto out;
1141         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1142         adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1143         adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1144
1145         if ((adev->asic_type != CHIP_STONEY) &&
1146             (adev->asic_type != CHIP_TOPAZ)) {
1147                 if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1148                         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2_2.bin", chip_name);
1149                         err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1150                         if (err == -ENOENT) {
1151                                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1152                                 err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1153                         }
1154                 } else {
1155                         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1156                         err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1157                 }
1158                 if (!err) {
1159                         err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
1160                         if (err)
1161                                 goto out;
1162                         cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1163                                 adev->gfx.mec2_fw->data;
1164                         adev->gfx.mec2_fw_version =
1165                                 le32_to_cpu(cp_hdr->header.ucode_version);
1166                         adev->gfx.mec2_feature_version =
1167                                 le32_to_cpu(cp_hdr->ucode_feature_version);
1168                 } else {
1169                         err = 0;
1170                         adev->gfx.mec2_fw = NULL;
1171                 }
1172         }
1173
1174         if (adev->firmware.load_type == AMDGPU_FW_LOAD_SMU) {
1175                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
1176                 info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
1177                 info->fw = adev->gfx.pfp_fw;
1178                 header = (const struct common_firmware_header *)info->fw->data;
1179                 adev->firmware.fw_size +=
1180                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1181
1182                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
1183                 info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
1184                 info->fw = adev->gfx.me_fw;
1185                 header = (const struct common_firmware_header *)info->fw->data;
1186                 adev->firmware.fw_size +=
1187                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1188
1189                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
1190                 info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
1191                 info->fw = adev->gfx.ce_fw;
1192                 header = (const struct common_firmware_header *)info->fw->data;
1193                 adev->firmware.fw_size +=
1194                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1195
1196                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
1197                 info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
1198                 info->fw = adev->gfx.rlc_fw;
1199                 header = (const struct common_firmware_header *)info->fw->data;
1200                 adev->firmware.fw_size +=
1201                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1202
1203                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
1204                 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
1205                 info->fw = adev->gfx.mec_fw;
1206                 header = (const struct common_firmware_header *)info->fw->data;
1207                 adev->firmware.fw_size +=
1208                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1209
1210                 /* we need account JT in */
1211                 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1212                 adev->firmware.fw_size +=
1213                         ALIGN(le32_to_cpu(cp_hdr->jt_size) << 2, PAGE_SIZE);
1214
1215                 if (amdgpu_sriov_vf(adev)) {
1216                         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_STORAGE];
1217                         info->ucode_id = AMDGPU_UCODE_ID_STORAGE;
1218                         info->fw = adev->gfx.mec_fw;
1219                         adev->firmware.fw_size +=
1220                                 ALIGN(le32_to_cpu(64 * PAGE_SIZE), PAGE_SIZE);
1221                 }
1222
1223                 if (adev->gfx.mec2_fw) {
1224                         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
1225                         info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
1226                         info->fw = adev->gfx.mec2_fw;
1227                         header = (const struct common_firmware_header *)info->fw->data;
1228                         adev->firmware.fw_size +=
1229                                 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1230                 }
1231
1232         }
1233
1234 out:
1235         if (err) {
1236                 dev_err(adev->dev,
1237                         "gfx8: Failed to load firmware \"%s\"\n",
1238                         fw_name);
1239                 release_firmware(adev->gfx.pfp_fw);
1240                 adev->gfx.pfp_fw = NULL;
1241                 release_firmware(adev->gfx.me_fw);
1242                 adev->gfx.me_fw = NULL;
1243                 release_firmware(adev->gfx.ce_fw);
1244                 adev->gfx.ce_fw = NULL;
1245                 release_firmware(adev->gfx.rlc_fw);
1246                 adev->gfx.rlc_fw = NULL;
1247                 release_firmware(adev->gfx.mec_fw);
1248                 adev->gfx.mec_fw = NULL;
1249                 release_firmware(adev->gfx.mec2_fw);
1250                 adev->gfx.mec2_fw = NULL;
1251         }
1252         return err;
1253 }
1254
1255 static void gfx_v8_0_get_csb_buffer(struct amdgpu_device *adev,
1256                                     volatile u32 *buffer)
1257 {
1258         u32 count = 0, i;
1259         const struct cs_section_def *sect = NULL;
1260         const struct cs_extent_def *ext = NULL;
1261
1262         if (adev->gfx.rlc.cs_data == NULL)
1263                 return;
1264         if (buffer == NULL)
1265                 return;
1266
1267         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1268         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1269
1270         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
1271         buffer[count++] = cpu_to_le32(0x80000000);
1272         buffer[count++] = cpu_to_le32(0x80000000);
1273
1274         for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
1275                 for (ext = sect->section; ext->extent != NULL; ++ext) {
1276                         if (sect->id == SECT_CONTEXT) {
1277                                 buffer[count++] =
1278                                         cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
1279                                 buffer[count++] = cpu_to_le32(ext->reg_index -
1280                                                 PACKET3_SET_CONTEXT_REG_START);
1281                                 for (i = 0; i < ext->reg_count; i++)
1282                                         buffer[count++] = cpu_to_le32(ext->extent[i]);
1283                         } else {
1284                                 return;
1285                         }
1286                 }
1287         }
1288
1289         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
1290         buffer[count++] = cpu_to_le32(mmPA_SC_RASTER_CONFIG -
1291                         PACKET3_SET_CONTEXT_REG_START);
1292         buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config);
1293         buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config_1);
1294
1295         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1296         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
1297
1298         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
1299         buffer[count++] = cpu_to_le32(0);
1300 }
1301
1302 static void cz_init_cp_jump_table(struct amdgpu_device *adev)
1303 {
1304         const __le32 *fw_data;
1305         volatile u32 *dst_ptr;
1306         int me, i, max_me = 4;
1307         u32 bo_offset = 0;
1308         u32 table_offset, table_size;
1309
1310         if (adev->asic_type == CHIP_CARRIZO)
1311                 max_me = 5;
1312
1313         /* write the cp table buffer */
1314         dst_ptr = adev->gfx.rlc.cp_table_ptr;
1315         for (me = 0; me < max_me; me++) {
1316                 if (me == 0) {
1317                         const struct gfx_firmware_header_v1_0 *hdr =
1318                                 (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
1319                         fw_data = (const __le32 *)
1320                                 (adev->gfx.ce_fw->data +
1321                                  le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1322                         table_offset = le32_to_cpu(hdr->jt_offset);
1323                         table_size = le32_to_cpu(hdr->jt_size);
1324                 } else if (me == 1) {
1325                         const struct gfx_firmware_header_v1_0 *hdr =
1326                                 (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
1327                         fw_data = (const __le32 *)
1328                                 (adev->gfx.pfp_fw->data +
1329                                  le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1330                         table_offset = le32_to_cpu(hdr->jt_offset);
1331                         table_size = le32_to_cpu(hdr->jt_size);
1332                 } else if (me == 2) {
1333                         const struct gfx_firmware_header_v1_0 *hdr =
1334                                 (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
1335                         fw_data = (const __le32 *)
1336                                 (adev->gfx.me_fw->data +
1337                                  le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1338                         table_offset = le32_to_cpu(hdr->jt_offset);
1339                         table_size = le32_to_cpu(hdr->jt_size);
1340                 } else if (me == 3) {
1341                         const struct gfx_firmware_header_v1_0 *hdr =
1342                                 (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1343                         fw_data = (const __le32 *)
1344                                 (adev->gfx.mec_fw->data +
1345                                  le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1346                         table_offset = le32_to_cpu(hdr->jt_offset);
1347                         table_size = le32_to_cpu(hdr->jt_size);
1348                 } else  if (me == 4) {
1349                         const struct gfx_firmware_header_v1_0 *hdr =
1350                                 (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
1351                         fw_data = (const __le32 *)
1352                                 (adev->gfx.mec2_fw->data +
1353                                  le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1354                         table_offset = le32_to_cpu(hdr->jt_offset);
1355                         table_size = le32_to_cpu(hdr->jt_size);
1356                 }
1357
1358                 for (i = 0; i < table_size; i ++) {
1359                         dst_ptr[bo_offset + i] =
1360                                 cpu_to_le32(le32_to_cpu(fw_data[table_offset + i]));
1361                 }
1362
1363                 bo_offset += table_size;
1364         }
1365 }
1366
1367 static void gfx_v8_0_rlc_fini(struct amdgpu_device *adev)
1368 {
1369         amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj, NULL, NULL);
1370         amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj, NULL, NULL);
1371 }
1372
1373 static int gfx_v8_0_rlc_init(struct amdgpu_device *adev)
1374 {
1375         volatile u32 *dst_ptr;
1376         u32 dws;
1377         const struct cs_section_def *cs_data;
1378         int r;
1379
1380         adev->gfx.rlc.cs_data = vi_cs_data;
1381
1382         cs_data = adev->gfx.rlc.cs_data;
1383
1384         if (cs_data) {
1385                 /* clear state block */
1386                 adev->gfx.rlc.clear_state_size = dws = gfx_v8_0_get_csb_size(adev);
1387
1388                 r = amdgpu_bo_create_reserved(adev, dws * 4, PAGE_SIZE,
1389                                               AMDGPU_GEM_DOMAIN_VRAM,
1390                                               &adev->gfx.rlc.clear_state_obj,
1391                                               &adev->gfx.rlc.clear_state_gpu_addr,
1392                                               (void **)&adev->gfx.rlc.cs_ptr);
1393                 if (r) {
1394                         dev_warn(adev->dev, "(%d) create RLC c bo failed\n", r);
1395                         gfx_v8_0_rlc_fini(adev);
1396                         return r;
1397                 }
1398
1399                 /* set up the cs buffer */
1400                 dst_ptr = adev->gfx.rlc.cs_ptr;
1401                 gfx_v8_0_get_csb_buffer(adev, dst_ptr);
1402                 amdgpu_bo_kunmap(adev->gfx.rlc.clear_state_obj);
1403                 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1404         }
1405
1406         if ((adev->asic_type == CHIP_CARRIZO) ||
1407             (adev->asic_type == CHIP_STONEY)) {
1408                 adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
1409                 r = amdgpu_bo_create_reserved(adev, adev->gfx.rlc.cp_table_size,
1410                                               PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM,
1411                                               &adev->gfx.rlc.cp_table_obj,
1412                                               &adev->gfx.rlc.cp_table_gpu_addr,
1413                                               (void **)&adev->gfx.rlc.cp_table_ptr);
1414                 if (r) {
1415                         dev_warn(adev->dev, "(%d) create RLC cp table bo failed\n", r);
1416                         return r;
1417                 }
1418
1419                 cz_init_cp_jump_table(adev);
1420
1421                 amdgpu_bo_kunmap(adev->gfx.rlc.cp_table_obj);
1422                 amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
1423         }
1424
1425         return 0;
1426 }
1427
1428 static void gfx_v8_0_mec_fini(struct amdgpu_device *adev)
1429 {
1430         amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
1431 }
1432
1433 static int gfx_v8_0_mec_init(struct amdgpu_device *adev)
1434 {
1435         int r;
1436         u32 *hpd;
1437         size_t mec_hpd_size;
1438
1439         bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
1440
1441         /* take ownership of the relevant compute queues */
1442         amdgpu_gfx_compute_queue_acquire(adev);
1443
1444         mec_hpd_size = adev->gfx.num_compute_rings * GFX8_MEC_HPD_SIZE;
1445
1446         r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
1447                                       AMDGPU_GEM_DOMAIN_GTT,
1448                                       &adev->gfx.mec.hpd_eop_obj,
1449                                       &adev->gfx.mec.hpd_eop_gpu_addr,
1450                                       (void **)&hpd);
1451         if (r) {
1452                 dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
1453                 return r;
1454         }
1455
1456         memset(hpd, 0, mec_hpd_size);
1457
1458         amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
1459         amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1460
1461         return 0;
1462 }
1463
1464 static const u32 vgpr_init_compute_shader[] =
1465 {
1466         0x7e000209, 0x7e020208,
1467         0x7e040207, 0x7e060206,
1468         0x7e080205, 0x7e0a0204,
1469         0x7e0c0203, 0x7e0e0202,
1470         0x7e100201, 0x7e120200,
1471         0x7e140209, 0x7e160208,
1472         0x7e180207, 0x7e1a0206,
1473         0x7e1c0205, 0x7e1e0204,
1474         0x7e200203, 0x7e220202,
1475         0x7e240201, 0x7e260200,
1476         0x7e280209, 0x7e2a0208,
1477         0x7e2c0207, 0x7e2e0206,
1478         0x7e300205, 0x7e320204,
1479         0x7e340203, 0x7e360202,
1480         0x7e380201, 0x7e3a0200,
1481         0x7e3c0209, 0x7e3e0208,
1482         0x7e400207, 0x7e420206,
1483         0x7e440205, 0x7e460204,
1484         0x7e480203, 0x7e4a0202,
1485         0x7e4c0201, 0x7e4e0200,
1486         0x7e500209, 0x7e520208,
1487         0x7e540207, 0x7e560206,
1488         0x7e580205, 0x7e5a0204,
1489         0x7e5c0203, 0x7e5e0202,
1490         0x7e600201, 0x7e620200,
1491         0x7e640209, 0x7e660208,
1492         0x7e680207, 0x7e6a0206,
1493         0x7e6c0205, 0x7e6e0204,
1494         0x7e700203, 0x7e720202,
1495         0x7e740201, 0x7e760200,
1496         0x7e780209, 0x7e7a0208,
1497         0x7e7c0207, 0x7e7e0206,
1498         0xbf8a0000, 0xbf810000,
1499 };
1500
1501 static const u32 sgpr_init_compute_shader[] =
1502 {
1503         0xbe8a0100, 0xbe8c0102,
1504         0xbe8e0104, 0xbe900106,
1505         0xbe920108, 0xbe940100,
1506         0xbe960102, 0xbe980104,
1507         0xbe9a0106, 0xbe9c0108,
1508         0xbe9e0100, 0xbea00102,
1509         0xbea20104, 0xbea40106,
1510         0xbea60108, 0xbea80100,
1511         0xbeaa0102, 0xbeac0104,
1512         0xbeae0106, 0xbeb00108,
1513         0xbeb20100, 0xbeb40102,
1514         0xbeb60104, 0xbeb80106,
1515         0xbeba0108, 0xbebc0100,
1516         0xbebe0102, 0xbec00104,
1517         0xbec20106, 0xbec40108,
1518         0xbec60100, 0xbec80102,
1519         0xbee60004, 0xbee70005,
1520         0xbeea0006, 0xbeeb0007,
1521         0xbee80008, 0xbee90009,
1522         0xbefc0000, 0xbf8a0000,
1523         0xbf810000, 0x00000000,
1524 };
1525
1526 static const u32 vgpr_init_regs[] =
1527 {
1528         mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xffffffff,
1529         mmCOMPUTE_RESOURCE_LIMITS, 0x1000000, /* CU_GROUP_COUNT=1 */
1530         mmCOMPUTE_NUM_THREAD_X, 256*4,
1531         mmCOMPUTE_NUM_THREAD_Y, 1,
1532         mmCOMPUTE_NUM_THREAD_Z, 1,
1533         mmCOMPUTE_PGM_RSRC1, 0x100004f, /* VGPRS=15 (64 logical VGPRs), SGPRS=1 (16 SGPRs), BULKY=1 */
1534         mmCOMPUTE_PGM_RSRC2, 20,
1535         mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1536         mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1537         mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1538         mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1539         mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1540         mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1541         mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1542         mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1543         mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1544         mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1545 };
1546
1547 static const u32 sgpr1_init_regs[] =
1548 {
1549         mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0x0f,
1550         mmCOMPUTE_RESOURCE_LIMITS, 0x1000000, /* CU_GROUP_COUNT=1 */
1551         mmCOMPUTE_NUM_THREAD_X, 256*5,
1552         mmCOMPUTE_NUM_THREAD_Y, 1,
1553         mmCOMPUTE_NUM_THREAD_Z, 1,
1554         mmCOMPUTE_PGM_RSRC1, 0x240, /* SGPRS=9 (80 GPRS) */
1555         mmCOMPUTE_PGM_RSRC2, 20,
1556         mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1557         mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1558         mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1559         mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1560         mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1561         mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1562         mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1563         mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1564         mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1565         mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1566 };
1567
1568 static const u32 sgpr2_init_regs[] =
1569 {
1570         mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xf0,
1571         mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1572         mmCOMPUTE_NUM_THREAD_X, 256*5,
1573         mmCOMPUTE_NUM_THREAD_Y, 1,
1574         mmCOMPUTE_NUM_THREAD_Z, 1,
1575         mmCOMPUTE_PGM_RSRC1, 0x240, /* SGPRS=9 (80 GPRS) */
1576         mmCOMPUTE_PGM_RSRC2, 20,
1577         mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1578         mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1579         mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1580         mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1581         mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1582         mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1583         mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1584         mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1585         mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1586         mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1587 };
1588
1589 static const u32 sec_ded_counter_registers[] =
1590 {
1591         mmCPC_EDC_ATC_CNT,
1592         mmCPC_EDC_SCRATCH_CNT,
1593         mmCPC_EDC_UCODE_CNT,
1594         mmCPF_EDC_ATC_CNT,
1595         mmCPF_EDC_ROQ_CNT,
1596         mmCPF_EDC_TAG_CNT,
1597         mmCPG_EDC_ATC_CNT,
1598         mmCPG_EDC_DMA_CNT,
1599         mmCPG_EDC_TAG_CNT,
1600         mmDC_EDC_CSINVOC_CNT,
1601         mmDC_EDC_RESTORE_CNT,
1602         mmDC_EDC_STATE_CNT,
1603         mmGDS_EDC_CNT,
1604         mmGDS_EDC_GRBM_CNT,
1605         mmGDS_EDC_OA_DED,
1606         mmSPI_EDC_CNT,
1607         mmSQC_ATC_EDC_GATCL1_CNT,
1608         mmSQC_EDC_CNT,
1609         mmSQ_EDC_DED_CNT,
1610         mmSQ_EDC_INFO,
1611         mmSQ_EDC_SEC_CNT,
1612         mmTCC_EDC_CNT,
1613         mmTCP_ATC_EDC_GATCL1_CNT,
1614         mmTCP_EDC_CNT,
1615         mmTD_EDC_CNT
1616 };
1617
1618 static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
1619 {
1620         struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
1621         struct amdgpu_ib ib;
1622         struct dma_fence *f = NULL;
1623         int r, i;
1624         u32 tmp;
1625         unsigned total_size, vgpr_offset, sgpr_offset;
1626         u64 gpu_addr;
1627
1628         /* only supported on CZ */
1629         if (adev->asic_type != CHIP_CARRIZO)
1630                 return 0;
1631
1632         /* bail if the compute ring is not ready */
1633         if (!ring->ready)
1634                 return 0;
1635
1636         tmp = RREG32(mmGB_EDC_MODE);
1637         WREG32(mmGB_EDC_MODE, 0);
1638
1639         total_size =
1640                 (((ARRAY_SIZE(vgpr_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1641         total_size +=
1642                 (((ARRAY_SIZE(sgpr1_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1643         total_size +=
1644                 (((ARRAY_SIZE(sgpr2_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1645         total_size = ALIGN(total_size, 256);
1646         vgpr_offset = total_size;
1647         total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256);
1648         sgpr_offset = total_size;
1649         total_size += sizeof(sgpr_init_compute_shader);
1650
1651         /* allocate an indirect buffer to put the commands in */
1652         memset(&ib, 0, sizeof(ib));
1653         r = amdgpu_ib_get(adev, NULL, total_size, &ib);
1654         if (r) {
1655                 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
1656                 return r;
1657         }
1658
1659         /* load the compute shaders */
1660         for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++)
1661                 ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i];
1662
1663         for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
1664                 ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
1665
1666         /* init the ib length to 0 */
1667         ib.length_dw = 0;
1668
1669         /* VGPR */
1670         /* write the register state for the compute dispatch */
1671         for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i += 2) {
1672                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1673                 ib.ptr[ib.length_dw++] = vgpr_init_regs[i] - PACKET3_SET_SH_REG_START;
1674                 ib.ptr[ib.length_dw++] = vgpr_init_regs[i + 1];
1675         }
1676         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1677         gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
1678         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1679         ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1680         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1681         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1682
1683         /* write dispatch packet */
1684         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1685         ib.ptr[ib.length_dw++] = 8; /* x */
1686         ib.ptr[ib.length_dw++] = 1; /* y */
1687         ib.ptr[ib.length_dw++] = 1; /* z */
1688         ib.ptr[ib.length_dw++] =
1689                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1690
1691         /* write CS partial flush packet */
1692         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1693         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1694
1695         /* SGPR1 */
1696         /* write the register state for the compute dispatch */
1697         for (i = 0; i < ARRAY_SIZE(sgpr1_init_regs); i += 2) {
1698                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1699                 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i] - PACKET3_SET_SH_REG_START;
1700                 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i + 1];
1701         }
1702         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1703         gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1704         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1705         ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1706         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1707         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1708
1709         /* write dispatch packet */
1710         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1711         ib.ptr[ib.length_dw++] = 8; /* x */
1712         ib.ptr[ib.length_dw++] = 1; /* y */
1713         ib.ptr[ib.length_dw++] = 1; /* z */
1714         ib.ptr[ib.length_dw++] =
1715                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1716
1717         /* write CS partial flush packet */
1718         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1719         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1720
1721         /* SGPR2 */
1722         /* write the register state for the compute dispatch */
1723         for (i = 0; i < ARRAY_SIZE(sgpr2_init_regs); i += 2) {
1724                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1725                 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i] - PACKET3_SET_SH_REG_START;
1726                 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i + 1];
1727         }
1728         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1729         gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1730         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1731         ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1732         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1733         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1734
1735         /* write dispatch packet */
1736         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1737         ib.ptr[ib.length_dw++] = 8; /* x */
1738         ib.ptr[ib.length_dw++] = 1; /* y */
1739         ib.ptr[ib.length_dw++] = 1; /* z */
1740         ib.ptr[ib.length_dw++] =
1741                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1742
1743         /* write CS partial flush packet */
1744         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1745         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1746
1747         /* shedule the ib on the ring */
1748         r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
1749         if (r) {
1750                 DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
1751                 goto fail;
1752         }
1753
1754         /* wait for the GPU to finish processing the IB */
1755         r = dma_fence_wait(f, false);
1756         if (r) {
1757                 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
1758                 goto fail;
1759         }
1760
1761         tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, DED_MODE, 2);
1762         tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, PROP_FED, 1);
1763         WREG32(mmGB_EDC_MODE, tmp);
1764
1765         tmp = RREG32(mmCC_GC_EDC_CONFIG);
1766         tmp = REG_SET_FIELD(tmp, CC_GC_EDC_CONFIG, DIS_EDC, 0) | 1;
1767         WREG32(mmCC_GC_EDC_CONFIG, tmp);
1768
1769
1770         /* read back registers to clear the counters */
1771         for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++)
1772                 RREG32(sec_ded_counter_registers[i]);
1773
1774 fail:
1775         amdgpu_ib_free(adev, &ib, NULL);
1776         dma_fence_put(f);
1777
1778         return r;
1779 }
1780
1781 static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev)
1782 {
1783         u32 gb_addr_config;
1784         u32 mc_shared_chmap, mc_arb_ramcfg;
1785         u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map;
1786         u32 tmp;
1787         int ret;
1788
1789         switch (adev->asic_type) {
1790         case CHIP_TOPAZ:
1791                 adev->gfx.config.max_shader_engines = 1;
1792                 adev->gfx.config.max_tile_pipes = 2;
1793                 adev->gfx.config.max_cu_per_sh = 6;
1794                 adev->gfx.config.max_sh_per_se = 1;
1795                 adev->gfx.config.max_backends_per_se = 2;
1796                 adev->gfx.config.max_texture_channel_caches = 2;
1797                 adev->gfx.config.max_gprs = 256;
1798                 adev->gfx.config.max_gs_threads = 32;
1799                 adev->gfx.config.max_hw_contexts = 8;
1800
1801                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1802                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1803                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1804                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1805                 gb_addr_config = TOPAZ_GB_ADDR_CONFIG_GOLDEN;
1806                 break;
1807         case CHIP_FIJI:
1808                 adev->gfx.config.max_shader_engines = 4;
1809                 adev->gfx.config.max_tile_pipes = 16;
1810                 adev->gfx.config.max_cu_per_sh = 16;
1811                 adev->gfx.config.max_sh_per_se = 1;
1812                 adev->gfx.config.max_backends_per_se = 4;
1813                 adev->gfx.config.max_texture_channel_caches = 16;
1814                 adev->gfx.config.max_gprs = 256;
1815                 adev->gfx.config.max_gs_threads = 32;
1816                 adev->gfx.config.max_hw_contexts = 8;
1817
1818                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1819                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1820                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1821                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1822                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1823                 break;
1824         case CHIP_POLARIS11:
1825         case CHIP_POLARIS12:
1826                 ret = amdgpu_atombios_get_gfx_info(adev);
1827                 if (ret)
1828                         return ret;
1829                 adev->gfx.config.max_gprs = 256;
1830                 adev->gfx.config.max_gs_threads = 32;
1831                 adev->gfx.config.max_hw_contexts = 8;
1832
1833                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1834                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1835                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1836                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1837                 gb_addr_config = POLARIS11_GB_ADDR_CONFIG_GOLDEN;
1838                 break;
1839         case CHIP_POLARIS10:
1840         case CHIP_VEGAM:
1841                 ret = amdgpu_atombios_get_gfx_info(adev);
1842                 if (ret)
1843                         return ret;
1844                 adev->gfx.config.max_gprs = 256;
1845                 adev->gfx.config.max_gs_threads = 32;
1846                 adev->gfx.config.max_hw_contexts = 8;
1847
1848                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1849                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1850                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1851                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1852                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1853                 break;
1854         case CHIP_TONGA:
1855                 adev->gfx.config.max_shader_engines = 4;
1856                 adev->gfx.config.max_tile_pipes = 8;
1857                 adev->gfx.config.max_cu_per_sh = 8;
1858                 adev->gfx.config.max_sh_per_se = 1;
1859                 adev->gfx.config.max_backends_per_se = 2;
1860                 adev->gfx.config.max_texture_channel_caches = 8;
1861                 adev->gfx.config.max_gprs = 256;
1862                 adev->gfx.config.max_gs_threads = 32;
1863                 adev->gfx.config.max_hw_contexts = 8;
1864
1865                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1866                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1867                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1868                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1869                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1870                 break;
1871         case CHIP_CARRIZO:
1872                 adev->gfx.config.max_shader_engines = 1;
1873                 adev->gfx.config.max_tile_pipes = 2;
1874                 adev->gfx.config.max_sh_per_se = 1;
1875                 adev->gfx.config.max_backends_per_se = 2;
1876                 adev->gfx.config.max_cu_per_sh = 8;
1877                 adev->gfx.config.max_texture_channel_caches = 2;
1878                 adev->gfx.config.max_gprs = 256;
1879                 adev->gfx.config.max_gs_threads = 32;
1880                 adev->gfx.config.max_hw_contexts = 8;
1881
1882                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1883                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1884                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1885                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1886                 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1887                 break;
1888         case CHIP_STONEY:
1889                 adev->gfx.config.max_shader_engines = 1;
1890                 adev->gfx.config.max_tile_pipes = 2;
1891                 adev->gfx.config.max_sh_per_se = 1;
1892                 adev->gfx.config.max_backends_per_se = 1;
1893                 adev->gfx.config.max_cu_per_sh = 3;
1894                 adev->gfx.config.max_texture_channel_caches = 2;
1895                 adev->gfx.config.max_gprs = 256;
1896                 adev->gfx.config.max_gs_threads = 16;
1897                 adev->gfx.config.max_hw_contexts = 8;
1898
1899                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1900                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1901                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1902                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1903                 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1904                 break;
1905         default:
1906                 adev->gfx.config.max_shader_engines = 2;
1907                 adev->gfx.config.max_tile_pipes = 4;
1908                 adev->gfx.config.max_cu_per_sh = 2;
1909                 adev->gfx.config.max_sh_per_se = 1;
1910                 adev->gfx.config.max_backends_per_se = 2;
1911                 adev->gfx.config.max_texture_channel_caches = 4;
1912                 adev->gfx.config.max_gprs = 256;
1913                 adev->gfx.config.max_gs_threads = 32;
1914                 adev->gfx.config.max_hw_contexts = 8;
1915
1916                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1917                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1918                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1919                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1920                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1921                 break;
1922         }
1923
1924         mc_shared_chmap = RREG32(mmMC_SHARED_CHMAP);
1925         adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG);
1926         mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg;
1927
1928         adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes;
1929         adev->gfx.config.mem_max_burst_length_bytes = 256;
1930         if (adev->flags & AMD_IS_APU) {
1931                 /* Get memory bank mapping mode. */
1932                 tmp = RREG32(mmMC_FUS_DRAM0_BANK_ADDR_MAPPING);
1933                 dimm00_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1934                 dimm01_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1935
1936                 tmp = RREG32(mmMC_FUS_DRAM1_BANK_ADDR_MAPPING);
1937                 dimm10_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1938                 dimm11_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1939
1940                 /* Validate settings in case only one DIMM installed. */
1941                 if ((dimm00_addr_map == 0) || (dimm00_addr_map == 3) || (dimm00_addr_map == 4) || (dimm00_addr_map > 12))
1942                         dimm00_addr_map = 0;
1943                 if ((dimm01_addr_map == 0) || (dimm01_addr_map == 3) || (dimm01_addr_map == 4) || (dimm01_addr_map > 12))
1944                         dimm01_addr_map = 0;
1945                 if ((dimm10_addr_map == 0) || (dimm10_addr_map == 3) || (dimm10_addr_map == 4) || (dimm10_addr_map > 12))
1946                         dimm10_addr_map = 0;
1947                 if ((dimm11_addr_map == 0) || (dimm11_addr_map == 3) || (dimm11_addr_map == 4) || (dimm11_addr_map > 12))
1948                         dimm11_addr_map = 0;
1949
1950                 /* If DIMM Addr map is 8GB, ROW size should be 2KB. Otherwise 1KB. */
1951                 /* If ROW size(DIMM1) != ROW size(DMIMM0), ROW size should be larger one. */
1952                 if ((dimm00_addr_map == 11) || (dimm01_addr_map == 11) || (dimm10_addr_map == 11) || (dimm11_addr_map == 11))
1953                         adev->gfx.config.mem_row_size_in_kb = 2;
1954                 else
1955                         adev->gfx.config.mem_row_size_in_kb = 1;
1956         } else {
1957                 tmp = REG_GET_FIELD(mc_arb_ramcfg, MC_ARB_RAMCFG, NOOFCOLS);
1958                 adev->gfx.config.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
1959                 if (adev->gfx.config.mem_row_size_in_kb > 4)
1960                         adev->gfx.config.mem_row_size_in_kb = 4;
1961         }
1962
1963         adev->gfx.config.shader_engine_tile_size = 32;
1964         adev->gfx.config.num_gpus = 1;
1965         adev->gfx.config.multi_gpu_tile_size = 64;
1966
1967         /* fix up row size */
1968         switch (adev->gfx.config.mem_row_size_in_kb) {
1969         case 1:
1970         default:
1971                 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 0);
1972                 break;
1973         case 2:
1974                 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 1);
1975                 break;
1976         case 4:
1977                 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 2);
1978                 break;
1979         }
1980         adev->gfx.config.gb_addr_config = gb_addr_config;
1981
1982         return 0;
1983 }
1984
1985 static int gfx_v8_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
1986                                         int mec, int pipe, int queue)
1987 {
1988         int r;
1989         unsigned irq_type;
1990         struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
1991
1992         ring = &adev->gfx.compute_ring[ring_id];
1993
1994         /* mec0 is me1 */
1995         ring->me = mec + 1;
1996         ring->pipe = pipe;
1997         ring->queue = queue;
1998
1999         ring->ring_obj = NULL;
2000         ring->use_doorbell = true;
2001         ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + ring_id;
2002         ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
2003                                 + (ring_id * GFX8_MEC_HPD_SIZE);
2004         sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
2005
2006         irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
2007                 + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
2008                 + ring->pipe;
2009
2010         /* type-2 packets are deprecated on MEC, use type-3 instead */
2011         r = amdgpu_ring_init(adev, ring, 1024,
2012                         &adev->gfx.eop_irq, irq_type);
2013         if (r)
2014                 return r;
2015
2016
2017         return 0;
2018 }
2019
2020 static void gfx_v8_0_sq_irq_work_func(struct work_struct *work);
2021
2022 static int gfx_v8_0_sw_init(void *handle)
2023 {
2024         int i, j, k, r, ring_id;
2025         struct amdgpu_ring *ring;
2026         struct amdgpu_kiq *kiq;
2027         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2028
2029         switch (adev->asic_type) {
2030         case CHIP_TONGA:
2031         case CHIP_CARRIZO:
2032         case CHIP_FIJI:
2033         case CHIP_POLARIS10:
2034         case CHIP_POLARIS11:
2035         case CHIP_POLARIS12:
2036         case CHIP_VEGAM:
2037                 adev->gfx.mec.num_mec = 2;
2038                 break;
2039         case CHIP_TOPAZ:
2040         case CHIP_STONEY:
2041         default:
2042                 adev->gfx.mec.num_mec = 1;
2043                 break;
2044         }
2045
2046         adev->gfx.mec.num_pipe_per_mec = 4;
2047         adev->gfx.mec.num_queue_per_pipe = 8;
2048
2049         /* KIQ event */
2050         r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 178, &adev->gfx.kiq.irq);
2051         if (r)
2052                 return r;
2053
2054         /* EOP Event */
2055         r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 181, &adev->gfx.eop_irq);
2056         if (r)
2057                 return r;
2058
2059         /* Privileged reg */
2060         r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 184,
2061                               &adev->gfx.priv_reg_irq);
2062         if (r)
2063                 return r;
2064
2065         /* Privileged inst */
2066         r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 185,
2067                               &adev->gfx.priv_inst_irq);
2068         if (r)
2069                 return r;
2070
2071         /* Add CP EDC/ECC irq  */
2072         r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 197,
2073                               &adev->gfx.cp_ecc_error_irq);
2074         if (r)
2075                 return r;
2076
2077         /* SQ interrupts. */
2078         r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 239,
2079                               &adev->gfx.sq_irq);
2080         if (r) {
2081                 DRM_ERROR("amdgpu_irq_add() for SQ failed: %d\n", r);
2082                 return r;
2083         }
2084
2085         INIT_WORK(&adev->gfx.sq_work.work, gfx_v8_0_sq_irq_work_func);
2086
2087         adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
2088
2089         gfx_v8_0_scratch_init(adev);
2090
2091         r = gfx_v8_0_init_microcode(adev);
2092         if (r) {
2093                 DRM_ERROR("Failed to load gfx firmware!\n");
2094                 return r;
2095         }
2096
2097         r = gfx_v8_0_rlc_init(adev);
2098         if (r) {
2099                 DRM_ERROR("Failed to init rlc BOs!\n");
2100                 return r;
2101         }
2102
2103         r = gfx_v8_0_mec_init(adev);
2104         if (r) {
2105                 DRM_ERROR("Failed to init MEC BOs!\n");
2106                 return r;
2107         }
2108
2109         /* set up the gfx ring */
2110         for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
2111                 ring = &adev->gfx.gfx_ring[i];
2112                 ring->ring_obj = NULL;
2113                 sprintf(ring->name, "gfx");
2114                 /* no gfx doorbells on iceland */
2115                 if (adev->asic_type != CHIP_TOPAZ) {
2116                         ring->use_doorbell = true;
2117                         ring->doorbell_index = AMDGPU_DOORBELL_GFX_RING0;
2118                 }
2119
2120                 r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
2121                                      AMDGPU_CP_IRQ_GFX_EOP);
2122                 if (r)
2123                         return r;
2124         }
2125
2126
2127         /* set up the compute queues - allocate horizontally across pipes */
2128         ring_id = 0;
2129         for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
2130                 for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
2131                         for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
2132                                 if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, j))
2133                                         continue;
2134
2135                                 r = gfx_v8_0_compute_ring_init(adev,
2136                                                                 ring_id,
2137                                                                 i, k, j);
2138                                 if (r)
2139                                         return r;
2140
2141                                 ring_id++;
2142                         }
2143                 }
2144         }
2145
2146         r = amdgpu_gfx_kiq_init(adev, GFX8_MEC_HPD_SIZE);
2147         if (r) {
2148                 DRM_ERROR("Failed to init KIQ BOs!\n");
2149                 return r;
2150         }
2151
2152         kiq = &adev->gfx.kiq;
2153         r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
2154         if (r)
2155                 return r;
2156
2157         /* create MQD for all compute queues as well as KIQ for SRIOV case */
2158         r = amdgpu_gfx_compute_mqd_sw_init(adev, sizeof(struct vi_mqd_allocation));
2159         if (r)
2160                 return r;
2161
2162         /* reserve GDS, GWS and OA resource for gfx */
2163         r = amdgpu_bo_create_kernel(adev, adev->gds.mem.gfx_partition_size,
2164                                     PAGE_SIZE, AMDGPU_GEM_DOMAIN_GDS,
2165                                     &adev->gds.gds_gfx_bo, NULL, NULL);
2166         if (r)
2167                 return r;
2168
2169         r = amdgpu_bo_create_kernel(adev, adev->gds.gws.gfx_partition_size,
2170                                     PAGE_SIZE, AMDGPU_GEM_DOMAIN_GWS,
2171                                     &adev->gds.gws_gfx_bo, NULL, NULL);
2172         if (r)
2173                 return r;
2174
2175         r = amdgpu_bo_create_kernel(adev, adev->gds.oa.gfx_partition_size,
2176                                     PAGE_SIZE, AMDGPU_GEM_DOMAIN_OA,
2177                                     &adev->gds.oa_gfx_bo, NULL, NULL);
2178         if (r)
2179                 return r;
2180
2181         adev->gfx.ce_ram_size = 0x8000;
2182
2183         r = gfx_v8_0_gpu_early_init(adev);
2184         if (r)
2185                 return r;
2186
2187         return 0;
2188 }
2189
2190 static int gfx_v8_0_sw_fini(void *handle)
2191 {
2192         int i;
2193         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2194
2195         amdgpu_bo_free_kernel(&adev->gds.oa_gfx_bo, NULL, NULL);
2196         amdgpu_bo_free_kernel(&adev->gds.gws_gfx_bo, NULL, NULL);
2197         amdgpu_bo_free_kernel(&adev->gds.gds_gfx_bo, NULL, NULL);
2198
2199         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2200                 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
2201         for (i = 0; i < adev->gfx.num_compute_rings; i++)
2202                 amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
2203
2204         amdgpu_gfx_compute_mqd_sw_fini(adev);
2205         amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq);
2206         amdgpu_gfx_kiq_fini(adev);
2207
2208         gfx_v8_0_mec_fini(adev);
2209         gfx_v8_0_rlc_fini(adev);
2210         amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj,
2211                                 &adev->gfx.rlc.clear_state_gpu_addr,
2212                                 (void **)&adev->gfx.rlc.cs_ptr);
2213         if ((adev->asic_type == CHIP_CARRIZO) ||
2214             (adev->asic_type == CHIP_STONEY)) {
2215                 amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj,
2216                                 &adev->gfx.rlc.cp_table_gpu_addr,
2217                                 (void **)&adev->gfx.rlc.cp_table_ptr);
2218         }
2219         gfx_v8_0_free_microcode(adev);
2220
2221         return 0;
2222 }
2223
2224 static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev)
2225 {
2226         uint32_t *modearray, *mod2array;
2227         const u32 num_tile_mode_states = ARRAY_SIZE(adev->gfx.config.tile_mode_array);
2228         const u32 num_secondary_tile_mode_states = ARRAY_SIZE(adev->gfx.config.macrotile_mode_array);
2229         u32 reg_offset;
2230
2231         modearray = adev->gfx.config.tile_mode_array;
2232         mod2array = adev->gfx.config.macrotile_mode_array;
2233
2234         for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2235                 modearray[reg_offset] = 0;
2236
2237         for (reg_offset = 0; reg_offset <  num_secondary_tile_mode_states; reg_offset++)
2238                 mod2array[reg_offset] = 0;
2239
2240         switch (adev->asic_type) {
2241         case CHIP_TOPAZ:
2242                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2243                                 PIPE_CONFIG(ADDR_SURF_P2) |
2244                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2245                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2246                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2247                                 PIPE_CONFIG(ADDR_SURF_P2) |
2248                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2249                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2250                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2251                                 PIPE_CONFIG(ADDR_SURF_P2) |
2252                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2253                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2254                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2255                                 PIPE_CONFIG(ADDR_SURF_P2) |
2256                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2257                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2258                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2259                                 PIPE_CONFIG(ADDR_SURF_P2) |
2260                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2261                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2262                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2263                                 PIPE_CONFIG(ADDR_SURF_P2) |
2264                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2265                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2266                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2267                                 PIPE_CONFIG(ADDR_SURF_P2) |
2268                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2269                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2270                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2271                                 PIPE_CONFIG(ADDR_SURF_P2));
2272                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2273                                 PIPE_CONFIG(ADDR_SURF_P2) |
2274                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2275                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2276                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2277                                  PIPE_CONFIG(ADDR_SURF_P2) |
2278                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2279                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2280                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2281                                  PIPE_CONFIG(ADDR_SURF_P2) |
2282                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2283                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2284                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2285                                  PIPE_CONFIG(ADDR_SURF_P2) |
2286                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2287                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2288                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2289                                  PIPE_CONFIG(ADDR_SURF_P2) |
2290                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2291                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2292                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2293                                  PIPE_CONFIG(ADDR_SURF_P2) |
2294                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2295                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2296                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2297                                  PIPE_CONFIG(ADDR_SURF_P2) |
2298                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2299                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2300                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2301                                  PIPE_CONFIG(ADDR_SURF_P2) |
2302                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2303                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2304                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2305                                  PIPE_CONFIG(ADDR_SURF_P2) |
2306                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2307                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2308                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2309                                  PIPE_CONFIG(ADDR_SURF_P2) |
2310                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2311                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2312                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2313                                  PIPE_CONFIG(ADDR_SURF_P2) |
2314                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2315                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2316                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2317                                  PIPE_CONFIG(ADDR_SURF_P2) |
2318                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2319                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2320                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2321                                  PIPE_CONFIG(ADDR_SURF_P2) |
2322                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2323                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2324                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2325                                  PIPE_CONFIG(ADDR_SURF_P2) |
2326                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2327                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2328                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2329                                  PIPE_CONFIG(ADDR_SURF_P2) |
2330                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2331                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2332                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2333                                  PIPE_CONFIG(ADDR_SURF_P2) |
2334                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2335                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2336                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2337                                  PIPE_CONFIG(ADDR_SURF_P2) |
2338                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2339                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2340                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2341                                  PIPE_CONFIG(ADDR_SURF_P2) |
2342                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2343                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2344
2345                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2346                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2347                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2348                                 NUM_BANKS(ADDR_SURF_8_BANK));
2349                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2350                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2351                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2352                                 NUM_BANKS(ADDR_SURF_8_BANK));
2353                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2354                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2355                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2356                                 NUM_BANKS(ADDR_SURF_8_BANK));
2357                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2358                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2359                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2360                                 NUM_BANKS(ADDR_SURF_8_BANK));
2361                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2362                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2363                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2364                                 NUM_BANKS(ADDR_SURF_8_BANK));
2365                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2366                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2367                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2368                                 NUM_BANKS(ADDR_SURF_8_BANK));
2369                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2370                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2371                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2372                                 NUM_BANKS(ADDR_SURF_8_BANK));
2373                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2374                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2375                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2376                                 NUM_BANKS(ADDR_SURF_16_BANK));
2377                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2378                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2379                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2380                                 NUM_BANKS(ADDR_SURF_16_BANK));
2381                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2382                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2383                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2384                                  NUM_BANKS(ADDR_SURF_16_BANK));
2385                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2386                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2387                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2388                                  NUM_BANKS(ADDR_SURF_16_BANK));
2389                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2390                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2391                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2392                                  NUM_BANKS(ADDR_SURF_16_BANK));
2393                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2394                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2395                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2396                                  NUM_BANKS(ADDR_SURF_16_BANK));
2397                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2398                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2399                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2400                                  NUM_BANKS(ADDR_SURF_8_BANK));
2401
2402                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2403                         if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
2404                             reg_offset != 23)
2405                                 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2406
2407                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2408                         if (reg_offset != 7)
2409                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2410
2411                 break;
2412         case CHIP_FIJI:
2413         case CHIP_VEGAM:
2414                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2415                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2416                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2417                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2418                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2419                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2420                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2421                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2422                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2423                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2424                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2425                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2426                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2427                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2428                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2429                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2430                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2431                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2432                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2433                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2434                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2435                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2436                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2437                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2438                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2439                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2440                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2441                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2442                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2443                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2444                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2445                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2446                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2447                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2448                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2449                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2450                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2451                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2452                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2453                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2454                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2455                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2456                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2457                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2458                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2459                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2460                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2461                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2462                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2463                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2464                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2465                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2466                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2467                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2468                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2469                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2470                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2471                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2472                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2473                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2474                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2475                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2476                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2477                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2478                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2479                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2480                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2481                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2482                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2483                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2484                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2485                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2486                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2487                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2488                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2489                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2490                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2491                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2492                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2493                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2494                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2495                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2496                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2497                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2498                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2499                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2500                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2501                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2502                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2503                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2504                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2505                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2506                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2507                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2508                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2509                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2510                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2511                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2512                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2513                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2514                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2515                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2516                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2517                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2518                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2519                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2520                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2521                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2522                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2523                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2524                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2525                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2526                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2527                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2528                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2529                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2530                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2531                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2532                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2533                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2534                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2535                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2536
2537                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2538                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2539                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2540                                 NUM_BANKS(ADDR_SURF_8_BANK));
2541                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2542                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2543                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2544                                 NUM_BANKS(ADDR_SURF_8_BANK));
2545                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2546                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2547                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2548                                 NUM_BANKS(ADDR_SURF_8_BANK));
2549                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2550                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2551                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2552                                 NUM_BANKS(ADDR_SURF_8_BANK));
2553                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2554                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2555                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2556                                 NUM_BANKS(ADDR_SURF_8_BANK));
2557                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2558                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2559                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2560                                 NUM_BANKS(ADDR_SURF_8_BANK));
2561                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2562                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2563                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2564                                 NUM_BANKS(ADDR_SURF_8_BANK));
2565                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2566                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2567                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2568                                 NUM_BANKS(ADDR_SURF_8_BANK));
2569                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2570                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2571                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2572                                 NUM_BANKS(ADDR_SURF_8_BANK));
2573                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2574                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2575                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2576                                  NUM_BANKS(ADDR_SURF_8_BANK));
2577                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2578                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2579                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2580                                  NUM_BANKS(ADDR_SURF_8_BANK));
2581                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2582                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2583                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2584                                  NUM_BANKS(ADDR_SURF_8_BANK));
2585                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2586                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2587                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2588                                  NUM_BANKS(ADDR_SURF_8_BANK));
2589                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2590                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2591                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2592                                  NUM_BANKS(ADDR_SURF_4_BANK));
2593
2594                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2595                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2596
2597                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2598                         if (reg_offset != 7)
2599                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2600
2601                 break;
2602         case CHIP_TONGA:
2603                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2604                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2605                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2606                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2607                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2608                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2609                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2610                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2611                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2612                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2613                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2614                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2615                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2616                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2617                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2618                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2619                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2620                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2621                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2622                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2623                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2624                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2625                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2626                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2627                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2628                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2629                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2630                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2631                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2632                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2633                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2634                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2635                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2636                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2637                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2638                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2639                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2640                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2641                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2642                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2643                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2644                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2645                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2646                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2647                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2648                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2649                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2650                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2651                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2652                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2653                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2654                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2655                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2656                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2657                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2658                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2659                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2660                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2661                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2662                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2663                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2664                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2665                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2666                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2667                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2668                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2669                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2670                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2671                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2672                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2673                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2674                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2675                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2676                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2677                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2678                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2679                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2680                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2681                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2682                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2683                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2684                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2685                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2686                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2687                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2688                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2689                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2690                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2691                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2692                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2693                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2694                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2695                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2696                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2697                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2698                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2699                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2700                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2701                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2702                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2703                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2704                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2705                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2706                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2707                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2708                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2709                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2710                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2711                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2712                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2713                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2714                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2715                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2716                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2717                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2718                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2719                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2720                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2721                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2722                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2723                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2724                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2725
2726                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2727                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2728                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2729                                 NUM_BANKS(ADDR_SURF_16_BANK));
2730                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2731                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2732                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2733                                 NUM_BANKS(ADDR_SURF_16_BANK));
2734                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2735                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2736                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2737                                 NUM_BANKS(ADDR_SURF_16_BANK));
2738                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2739                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2740                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2741                                 NUM_BANKS(ADDR_SURF_16_BANK));
2742                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2743                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2744                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2745                                 NUM_BANKS(ADDR_SURF_16_BANK));
2746                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2747                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2748                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2749                                 NUM_BANKS(ADDR_SURF_16_BANK));
2750                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2751                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2752                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2753                                 NUM_BANKS(ADDR_SURF_16_BANK));
2754                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2755                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2756                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2757                                 NUM_BANKS(ADDR_SURF_16_BANK));
2758                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2759                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2760                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2761                                 NUM_BANKS(ADDR_SURF_16_BANK));
2762                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2763                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2764                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2765                                  NUM_BANKS(ADDR_SURF_16_BANK));
2766                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2767                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2768                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2769                                  NUM_BANKS(ADDR_SURF_16_BANK));
2770                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2771                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2772                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2773                                  NUM_BANKS(ADDR_SURF_8_BANK));
2774                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2775                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2776                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2777                                  NUM_BANKS(ADDR_SURF_4_BANK));
2778                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2779                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2780                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2781                                  NUM_BANKS(ADDR_SURF_4_BANK));
2782
2783                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2784                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2785
2786                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2787                         if (reg_offset != 7)
2788                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2789
2790                 break;
2791         case CHIP_POLARIS11:
2792         case CHIP_POLARIS12:
2793                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2794                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2795                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2796                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2797                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2798                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2799                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2800                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2801                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2802                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2803                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2804                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2805                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2806                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2807                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2808                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2809                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2810                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2811                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2812                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2813                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2814                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2815                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2816                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2817                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2818                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2819                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2820                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2821                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2822                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2823                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2824                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2825                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2826                                 PIPE_CONFIG(ADDR_SURF_P4_16x16));
2827                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2828                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2829                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2830                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2831                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2832                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2833                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2834                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2835                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2836                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2837                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2838                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2839                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2840                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2841                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2842                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2843                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2844                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2845                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2846                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2847                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2848                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2849                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2850                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2851                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2852                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2853                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2854                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2855                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2856                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2857                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2858                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2859                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2860                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2861                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2862                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2863                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2864                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2865                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2866                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2867                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2868                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2869                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2870                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2871                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2872                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2873                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2874                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2875                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2876                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2877                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2878                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2879                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2880                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2881                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2882                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2883                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2884                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2885                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2886                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2887                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2888                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2889                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2890                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2891                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2892                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2893                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2894                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2895                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2896                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2897                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2898                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2899                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2900                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2901                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2902                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2903                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2904                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2905                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2906                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2907                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2908                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2909                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2910                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2911                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2912                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2913                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2914                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2915
2916                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2917                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2918                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2919                                 NUM_BANKS(ADDR_SURF_16_BANK));
2920
2921                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2922                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2923                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2924                                 NUM_BANKS(ADDR_SURF_16_BANK));
2925
2926                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2927                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2928                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2929                                 NUM_BANKS(ADDR_SURF_16_BANK));
2930
2931                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2932                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2933                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2934                                 NUM_BANKS(ADDR_SURF_16_BANK));
2935
2936                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2937                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2938                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2939                                 NUM_BANKS(ADDR_SURF_16_BANK));
2940
2941                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2942                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2943                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2944                                 NUM_BANKS(ADDR_SURF_16_BANK));
2945
2946                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2947                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2948                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2949                                 NUM_BANKS(ADDR_SURF_16_BANK));
2950
2951                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2952                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2953                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2954                                 NUM_BANKS(ADDR_SURF_16_BANK));
2955
2956                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2957                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2958                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2959                                 NUM_BANKS(ADDR_SURF_16_BANK));
2960
2961                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2962                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2963                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2964                                 NUM_BANKS(ADDR_SURF_16_BANK));
2965
2966                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2967                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2968                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2969                                 NUM_BANKS(ADDR_SURF_16_BANK));
2970
2971                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2972                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2973                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2974                                 NUM_BANKS(ADDR_SURF_16_BANK));
2975
2976                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2977                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2978                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2979                                 NUM_BANKS(ADDR_SURF_8_BANK));
2980
2981                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2982                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2983                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2984                                 NUM_BANKS(ADDR_SURF_4_BANK));
2985
2986                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2987                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2988
2989                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2990                         if (reg_offset != 7)
2991                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2992
2993                 break;
2994         case CHIP_POLARIS10:
2995                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2996                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2997                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2998                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2999                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3000                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3001                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3002                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3003                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3004                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3005                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3006                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3007                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3008                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3009                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3010                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3011                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3012                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3013                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3014                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3015                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3016                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3017                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3018                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3019                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3020                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3021                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3022                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3023                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3024                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3025                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3026                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3027                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3028                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
3029                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3030                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3031                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3032                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3033                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3034                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3035                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3036                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3037                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3038                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3039                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3040                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3041                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3042                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3043                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3044                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3045                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3046                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3047                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3048                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3049                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3050                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3051                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3052                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3053                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3054                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3055                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3056                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3057                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3058                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3059                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3060                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3061                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3062                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3063                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3064                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3065                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3066                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3067                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3068                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3069                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3070                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3071                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3072                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3073                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3074                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3075                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3076                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3077                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3078                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3079                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3080                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3081                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3082                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3083                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3084                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3085                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3086                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3087                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3088                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3089                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3090                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3091                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3092                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3093                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3094                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3095                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3096                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3097                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3098                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3099                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3100                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3101                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3102                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3103                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3104                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3105                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3106                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3107                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3108                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3109                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3110                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3111                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3112                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3113                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3114                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3115                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3116                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3117
3118                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3119                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3120                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3121                                 NUM_BANKS(ADDR_SURF_16_BANK));
3122
3123                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3124                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3125                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3126                                 NUM_BANKS(ADDR_SURF_16_BANK));
3127
3128                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3129                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3130                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3131                                 NUM_BANKS(ADDR_SURF_16_BANK));
3132
3133                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3134                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3135                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3136                                 NUM_BANKS(ADDR_SURF_16_BANK));
3137
3138                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3139                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3140                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3141                                 NUM_BANKS(ADDR_SURF_16_BANK));
3142
3143                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3144                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3145                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3146                                 NUM_BANKS(ADDR_SURF_16_BANK));
3147
3148                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3149                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3150                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3151                                 NUM_BANKS(ADDR_SURF_16_BANK));
3152
3153                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3154                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3155                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3156                                 NUM_BANKS(ADDR_SURF_16_BANK));
3157
3158                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3159                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3160                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3161                                 NUM_BANKS(ADDR_SURF_16_BANK));
3162
3163                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3164                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3165                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3166                                 NUM_BANKS(ADDR_SURF_16_BANK));
3167
3168                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3169                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3170                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3171                                 NUM_BANKS(ADDR_SURF_16_BANK));
3172
3173                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3174                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3175                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3176                                 NUM_BANKS(ADDR_SURF_8_BANK));
3177
3178                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3179                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3180                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3181                                 NUM_BANKS(ADDR_SURF_4_BANK));
3182
3183                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3184                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3185                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3186                                 NUM_BANKS(ADDR_SURF_4_BANK));
3187
3188                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3189                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3190
3191                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3192                         if (reg_offset != 7)
3193                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3194
3195                 break;
3196         case CHIP_STONEY:
3197                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3198                                 PIPE_CONFIG(ADDR_SURF_P2) |
3199                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3200                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3201                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3202                                 PIPE_CONFIG(ADDR_SURF_P2) |
3203                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3204                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3205                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3206                                 PIPE_CONFIG(ADDR_SURF_P2) |
3207                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3208                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3209                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3210                                 PIPE_CONFIG(ADDR_SURF_P2) |
3211                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3212                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3213                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3214                                 PIPE_CONFIG(ADDR_SURF_P2) |
3215                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3216                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3217                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3218                                 PIPE_CONFIG(ADDR_SURF_P2) |
3219                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3220                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3221                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3222                                 PIPE_CONFIG(ADDR_SURF_P2) |
3223                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3224                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3225                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3226                                 PIPE_CONFIG(ADDR_SURF_P2));
3227                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3228                                 PIPE_CONFIG(ADDR_SURF_P2) |
3229                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3230                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3231                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3232                                  PIPE_CONFIG(ADDR_SURF_P2) |
3233                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3234                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3235                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3236                                  PIPE_CONFIG(ADDR_SURF_P2) |
3237                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3238                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3239                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3240                                  PIPE_CONFIG(ADDR_SURF_P2) |
3241                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3242                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3243                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3244                                  PIPE_CONFIG(ADDR_SURF_P2) |
3245                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3246                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3247                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3248                                  PIPE_CONFIG(ADDR_SURF_P2) |
3249                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3250                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3251                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3252                                  PIPE_CONFIG(ADDR_SURF_P2) |
3253                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3254                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3255                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3256                                  PIPE_CONFIG(ADDR_SURF_P2) |
3257                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3258                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3259                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3260                                  PIPE_CONFIG(ADDR_SURF_P2) |
3261                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3262                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3263                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3264                                  PIPE_CONFIG(ADDR_SURF_P2) |
3265                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3266                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3267                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3268                                  PIPE_CONFIG(ADDR_SURF_P2) |
3269                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3270                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3271                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3272                                  PIPE_CONFIG(ADDR_SURF_P2) |
3273                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3274                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3275                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3276                                  PIPE_CONFIG(ADDR_SURF_P2) |
3277                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3278                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3279                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3280                                  PIPE_CONFIG(ADDR_SURF_P2) |
3281                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3282                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3283                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3284                                  PIPE_CONFIG(ADDR_SURF_P2) |
3285                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3286                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3287                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3288                                  PIPE_CONFIG(ADDR_SURF_P2) |
3289                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3290                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3291                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3292                                  PIPE_CONFIG(ADDR_SURF_P2) |
3293                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3294                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3295                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3296                                  PIPE_CONFIG(ADDR_SURF_P2) |
3297                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3298                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3299
3300                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3301                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3302                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3303                                 NUM_BANKS(ADDR_SURF_8_BANK));
3304                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3305                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3306                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3307                                 NUM_BANKS(ADDR_SURF_8_BANK));
3308                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3309                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3310                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3311                                 NUM_BANKS(ADDR_SURF_8_BANK));
3312                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3313                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3314                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3315                                 NUM_BANKS(ADDR_SURF_8_BANK));
3316                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3317                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3318                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3319                                 NUM_BANKS(ADDR_SURF_8_BANK));
3320                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3321                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3322                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3323                                 NUM_BANKS(ADDR_SURF_8_BANK));
3324                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3325                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3326                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3327                                 NUM_BANKS(ADDR_SURF_8_BANK));
3328                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3329                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3330                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3331                                 NUM_BANKS(ADDR_SURF_16_BANK));
3332                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3333                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3334                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3335                                 NUM_BANKS(ADDR_SURF_16_BANK));
3336                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3337                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3338                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3339                                  NUM_BANKS(ADDR_SURF_16_BANK));
3340                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3341                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3342                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3343                                  NUM_BANKS(ADDR_SURF_16_BANK));
3344                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3345                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3346                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3347                                  NUM_BANKS(ADDR_SURF_16_BANK));
3348                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3349                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3350                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3351                                  NUM_BANKS(ADDR_SURF_16_BANK));
3352                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3353                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3354                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3355                                  NUM_BANKS(ADDR_SURF_8_BANK));
3356
3357                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3358                         if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3359                             reg_offset != 23)
3360                                 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3361
3362                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3363                         if (reg_offset != 7)
3364                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3365
3366                 break;
3367         default:
3368                 dev_warn(adev->dev,
3369                          "Unknown chip type (%d) in function gfx_v8_0_tiling_mode_table_init() falling through to CHIP_CARRIZO\n",
3370                          adev->asic_type);
3371
3372         case CHIP_CARRIZO:
3373                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3374                                 PIPE_CONFIG(ADDR_SURF_P2) |
3375                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3376                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3377                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3378                                 PIPE_CONFIG(ADDR_SURF_P2) |
3379                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3380                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3381                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3382                                 PIPE_CONFIG(ADDR_SURF_P2) |
3383                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3384                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3385                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3386                                 PIPE_CONFIG(ADDR_SURF_P2) |
3387                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3388                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3389                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3390                                 PIPE_CONFIG(ADDR_SURF_P2) |
3391                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3392                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3393                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3394                                 PIPE_CONFIG(ADDR_SURF_P2) |
3395                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3396                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3397                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3398                                 PIPE_CONFIG(ADDR_SURF_P2) |
3399                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3400                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3401                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3402                                 PIPE_CONFIG(ADDR_SURF_P2));
3403                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3404                                 PIPE_CONFIG(ADDR_SURF_P2) |
3405                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3406                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3407                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3408                                  PIPE_CONFIG(ADDR_SURF_P2) |
3409                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3410                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3411                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3412                                  PIPE_CONFIG(ADDR_SURF_P2) |
3413                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3414                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3415                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3416                                  PIPE_CONFIG(ADDR_SURF_P2) |
3417                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3418                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3419                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3420                                  PIPE_CONFIG(ADDR_SURF_P2) |
3421                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3422                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3423                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3424                                  PIPE_CONFIG(ADDR_SURF_P2) |
3425                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3426                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3427                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3428                                  PIPE_CONFIG(ADDR_SURF_P2) |
3429                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3430                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3431                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3432                                  PIPE_CONFIG(ADDR_SURF_P2) |
3433                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3434                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3435                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3436                                  PIPE_CONFIG(ADDR_SURF_P2) |
3437                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3438                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3439                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3440                                  PIPE_CONFIG(ADDR_SURF_P2) |
3441                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3442                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3443                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3444                                  PIPE_CONFIG(ADDR_SURF_P2) |
3445                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3446                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3447                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3448                                  PIPE_CONFIG(ADDR_SURF_P2) |
3449                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3450                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3451                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3452                                  PIPE_CONFIG(ADDR_SURF_P2) |
3453                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3454                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3455                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3456                                  PIPE_CONFIG(ADDR_SURF_P2) |
3457                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3458                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3459                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3460                                  PIPE_CONFIG(ADDR_SURF_P2) |
3461                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3462                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3463                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3464                                  PIPE_CONFIG(ADDR_SURF_P2) |
3465                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3466                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3467                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3468                                  PIPE_CONFIG(ADDR_SURF_P2) |
3469                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3470                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3471                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3472                                  PIPE_CONFIG(ADDR_SURF_P2) |
3473                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3474                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3475
3476                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3477                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3478                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3479                                 NUM_BANKS(ADDR_SURF_8_BANK));
3480                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3481                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3482                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3483                                 NUM_BANKS(ADDR_SURF_8_BANK));
3484                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3485                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3486                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3487                                 NUM_BANKS(ADDR_SURF_8_BANK));
3488                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3489                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3490                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3491                                 NUM_BANKS(ADDR_SURF_8_BANK));
3492                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3493                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3494                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3495                                 NUM_BANKS(ADDR_SURF_8_BANK));
3496                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3497                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3498                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3499                                 NUM_BANKS(ADDR_SURF_8_BANK));
3500                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3501                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3502                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3503                                 NUM_BANKS(ADDR_SURF_8_BANK));
3504                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3505                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3506                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3507                                 NUM_BANKS(ADDR_SURF_16_BANK));
3508                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3509                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3510                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3511                                 NUM_BANKS(ADDR_SURF_16_BANK));
3512                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3513                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3514                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3515                                  NUM_BANKS(ADDR_SURF_16_BANK));
3516                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3517                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3518                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3519                                  NUM_BANKS(ADDR_SURF_16_BANK));
3520                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3521                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3522                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3523                                  NUM_BANKS(ADDR_SURF_16_BANK));
3524                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3525                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3526                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3527                                  NUM_BANKS(ADDR_SURF_16_BANK));
3528                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3529                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3530                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3531                                  NUM_BANKS(ADDR_SURF_8_BANK));
3532
3533                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3534                         if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3535                             reg_offset != 23)
3536                                 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3537
3538                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3539                         if (reg_offset != 7)
3540                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3541
3542                 break;
3543         }
3544 }
3545
3546 static void gfx_v8_0_select_se_sh(struct amdgpu_device *adev,
3547                                   u32 se_num, u32 sh_num, u32 instance)
3548 {
3549         u32 data;
3550
3551         if (instance == 0xffffffff)
3552                 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
3553         else
3554                 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
3555
3556         if (se_num == 0xffffffff)
3557                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
3558         else
3559                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
3560
3561         if (sh_num == 0xffffffff)
3562                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
3563         else
3564                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
3565
3566         WREG32(mmGRBM_GFX_INDEX, data);
3567 }
3568
3569 static void gfx_v8_0_select_me_pipe_q(struct amdgpu_device *adev,
3570                                   u32 me, u32 pipe, u32 q)
3571 {
3572         vi_srbm_select(adev, me, pipe, q, 0);
3573 }
3574
3575 static u32 gfx_v8_0_get_rb_active_bitmap(struct amdgpu_device *adev)
3576 {
3577         u32 data, mask;
3578
3579         data =  RREG32(mmCC_RB_BACKEND_DISABLE) |
3580                 RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3581
3582         data = REG_GET_FIELD(data, GC_USER_RB_BACKEND_DISABLE, BACKEND_DISABLE);
3583
3584         mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se /
3585                                          adev->gfx.config.max_sh_per_se);
3586
3587         return (~data) & mask;
3588 }
3589
3590 static void
3591 gfx_v8_0_raster_config(struct amdgpu_device *adev, u32 *rconf, u32 *rconf1)
3592 {
3593         switch (adev->asic_type) {
3594         case CHIP_FIJI:
3595         case CHIP_VEGAM:
3596                 *rconf |= RB_MAP_PKR0(2) | RB_MAP_PKR1(2) |
3597                           RB_XSEL2(1) | PKR_MAP(2) |
3598                           PKR_XSEL(1) | PKR_YSEL(1) |
3599                           SE_MAP(2) | SE_XSEL(2) | SE_YSEL(3);
3600                 *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(3) |
3601                            SE_PAIR_YSEL(2);
3602                 break;
3603         case CHIP_TONGA:
3604         case CHIP_POLARIS10:
3605                 *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3606                           SE_XSEL(1) | SE_YSEL(1);
3607                 *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(2) |
3608                            SE_PAIR_YSEL(2);
3609                 break;
3610         case CHIP_TOPAZ:
3611         case CHIP_CARRIZO:
3612                 *rconf |= RB_MAP_PKR0(2);
3613                 *rconf1 |= 0x0;
3614                 break;
3615         case CHIP_POLARIS11:
3616         case CHIP_POLARIS12:
3617                 *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3618                           SE_XSEL(1) | SE_YSEL(1);
3619                 *rconf1 |= 0x0;
3620                 break;
3621         case CHIP_STONEY:
3622                 *rconf |= 0x0;
3623                 *rconf1 |= 0x0;
3624                 break;
3625         default:
3626                 DRM_ERROR("unknown asic: 0x%x\n", adev->asic_type);
3627                 break;
3628         }
3629 }
3630
3631 static void
3632 gfx_v8_0_write_harvested_raster_configs(struct amdgpu_device *adev,
3633                                         u32 raster_config, u32 raster_config_1,
3634                                         unsigned rb_mask, unsigned num_rb)
3635 {
3636         unsigned sh_per_se = max_t(unsigned, adev->gfx.config.max_sh_per_se, 1);
3637         unsigned num_se = max_t(unsigned, adev->gfx.config.max_shader_engines, 1);
3638         unsigned rb_per_pkr = min_t(unsigned, num_rb / num_se / sh_per_se, 2);
3639         unsigned rb_per_se = num_rb / num_se;
3640         unsigned se_mask[4];
3641         unsigned se;
3642
3643         se_mask[0] = ((1 << rb_per_se) - 1) & rb_mask;
3644         se_mask[1] = (se_mask[0] << rb_per_se) & rb_mask;
3645         se_mask[2] = (se_mask[1] << rb_per_se) & rb_mask;
3646         se_mask[3] = (se_mask[2] << rb_per_se) & rb_mask;
3647
3648         WARN_ON(!(num_se == 1 || num_se == 2 || num_se == 4));
3649         WARN_ON(!(sh_per_se == 1 || sh_per_se == 2));
3650         WARN_ON(!(rb_per_pkr == 1 || rb_per_pkr == 2));
3651
3652         if ((num_se > 2) && ((!se_mask[0] && !se_mask[1]) ||
3653                              (!se_mask[2] && !se_mask[3]))) {
3654                 raster_config_1 &= ~SE_PAIR_MAP_MASK;
3655
3656                 if (!se_mask[0] && !se_mask[1]) {
3657                         raster_config_1 |=
3658                                 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_3);
3659                 } else {
3660                         raster_config_1 |=
3661                                 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_0);
3662                 }
3663         }
3664
3665         for (se = 0; se < num_se; se++) {
3666                 unsigned raster_config_se = raster_config;
3667                 unsigned pkr0_mask = ((1 << rb_per_pkr) - 1) << (se * rb_per_se);
3668                 unsigned pkr1_mask = pkr0_mask << rb_per_pkr;
3669                 int idx = (se / 2) * 2;
3670
3671                 if ((num_se > 1) && (!se_mask[idx] || !se_mask[idx + 1])) {
3672                         raster_config_se &= ~SE_MAP_MASK;
3673
3674                         if (!se_mask[idx]) {
3675                                 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_3);
3676                         } else {
3677                                 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_0);
3678                         }
3679                 }
3680
3681                 pkr0_mask &= rb_mask;
3682                 pkr1_mask &= rb_mask;
3683                 if (rb_per_se > 2 && (!pkr0_mask || !pkr1_mask)) {
3684                         raster_config_se &= ~PKR_MAP_MASK;
3685
3686                         if (!pkr0_mask) {
3687                                 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_3);
3688                         } else {
3689                                 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_0);
3690                         }
3691                 }
3692
3693                 if (rb_per_se >= 2) {
3694                         unsigned rb0_mask = 1 << (se * rb_per_se);
3695                         unsigned rb1_mask = rb0_mask << 1;
3696
3697                         rb0_mask &= rb_mask;
3698                         rb1_mask &= rb_mask;
3699                         if (!rb0_mask || !rb1_mask) {
3700                                 raster_config_se &= ~RB_MAP_PKR0_MASK;
3701
3702                                 if (!rb0_mask) {
3703                                         raster_config_se |=
3704                                                 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_3);
3705                                 } else {
3706                                         raster_config_se |=
3707                                                 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_0);
3708                                 }
3709                         }
3710
3711                         if (rb_per_se > 2) {
3712                                 rb0_mask = 1 << (se * rb_per_se + rb_per_pkr);
3713                                 rb1_mask = rb0_mask << 1;
3714                                 rb0_mask &= rb_mask;
3715                                 rb1_mask &= rb_mask;
3716                                 if (!rb0_mask || !rb1_mask) {
3717                                         raster_config_se &= ~RB_MAP_PKR1_MASK;
3718
3719                                         if (!rb0_mask) {
3720                                                 raster_config_se |=
3721                                                         RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_3);
3722                                         } else {
3723                                                 raster_config_se |=
3724                                                         RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_0);
3725                                         }
3726                                 }
3727                         }
3728                 }
3729
3730                 /* GRBM_GFX_INDEX has a different offset on VI */
3731                 gfx_v8_0_select_se_sh(adev, se, 0xffffffff, 0xffffffff);
3732                 WREG32(mmPA_SC_RASTER_CONFIG, raster_config_se);
3733                 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3734         }
3735
3736         /* GRBM_GFX_INDEX has a different offset on VI */
3737         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3738 }
3739
3740 static void gfx_v8_0_setup_rb(struct amdgpu_device *adev)
3741 {
3742         int i, j;
3743         u32 data;
3744         u32 raster_config = 0, raster_config_1 = 0;
3745         u32 active_rbs = 0;
3746         u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
3747                                         adev->gfx.config.max_sh_per_se;
3748         unsigned num_rb_pipes;
3749
3750         mutex_lock(&adev->grbm_idx_mutex);
3751         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3752                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3753                         gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3754                         data = gfx_v8_0_get_rb_active_bitmap(adev);
3755                         active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
3756                                                rb_bitmap_width_per_sh);
3757                 }
3758         }
3759         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3760
3761         adev->gfx.config.backend_enable_mask = active_rbs;
3762         adev->gfx.config.num_rbs = hweight32(active_rbs);
3763
3764         num_rb_pipes = min_t(unsigned, adev->gfx.config.max_backends_per_se *
3765                              adev->gfx.config.max_shader_engines, 16);
3766
3767         gfx_v8_0_raster_config(adev, &raster_config, &raster_config_1);
3768
3769         if (!adev->gfx.config.backend_enable_mask ||
3770                         adev->gfx.config.num_rbs >= num_rb_pipes) {
3771                 WREG32(mmPA_SC_RASTER_CONFIG, raster_config);
3772                 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3773         } else {
3774                 gfx_v8_0_write_harvested_raster_configs(adev, raster_config, raster_config_1,
3775                                                         adev->gfx.config.backend_enable_mask,
3776                                                         num_rb_pipes);
3777         }
3778
3779         /* cache the values for userspace */
3780         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3781                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3782                         gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3783                         adev->gfx.config.rb_config[i][j].rb_backend_disable =
3784                                 RREG32(mmCC_RB_BACKEND_DISABLE);
3785                         adev->gfx.config.rb_config[i][j].user_rb_backend_disable =
3786                                 RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3787                         adev->gfx.config.rb_config[i][j].raster_config =
3788                                 RREG32(mmPA_SC_RASTER_CONFIG);
3789                         adev->gfx.config.rb_config[i][j].raster_config_1 =
3790                                 RREG32(mmPA_SC_RASTER_CONFIG_1);
3791                 }
3792         }
3793         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3794         mutex_unlock(&adev->grbm_idx_mutex);
3795 }
3796
3797 /**
3798  * gfx_v8_0_init_compute_vmid - gart enable
3799  *
3800  * @adev: amdgpu_device pointer
3801  *
3802  * Initialize compute vmid sh_mem registers
3803  *
3804  */
3805 #define DEFAULT_SH_MEM_BASES    (0x6000)
3806 #define FIRST_COMPUTE_VMID      (8)
3807 #define LAST_COMPUTE_VMID       (16)
3808 static void gfx_v8_0_init_compute_vmid(struct amdgpu_device *adev)
3809 {
3810         int i;
3811         uint32_t sh_mem_config;
3812         uint32_t sh_mem_bases;
3813
3814         /*
3815          * Configure apertures:
3816          * LDS:         0x60000000'00000000 - 0x60000001'00000000 (4GB)
3817          * Scratch:     0x60000001'00000000 - 0x60000002'00000000 (4GB)
3818          * GPUVM:       0x60010000'00000000 - 0x60020000'00000000 (1TB)
3819          */
3820         sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
3821
3822         sh_mem_config = SH_MEM_ADDRESS_MODE_HSA64 <<
3823                         SH_MEM_CONFIG__ADDRESS_MODE__SHIFT |
3824                         SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
3825                         SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT |
3826                         MTYPE_CC << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT |
3827                         SH_MEM_CONFIG__PRIVATE_ATC_MASK;
3828
3829         mutex_lock(&adev->srbm_mutex);
3830         for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
3831                 vi_srbm_select(adev, 0, 0, 0, i);
3832                 /* CP and shaders */
3833                 WREG32(mmSH_MEM_CONFIG, sh_mem_config);
3834                 WREG32(mmSH_MEM_APE1_BASE, 1);
3835                 WREG32(mmSH_MEM_APE1_LIMIT, 0);
3836                 WREG32(mmSH_MEM_BASES, sh_mem_bases);
3837         }
3838         vi_srbm_select(adev, 0, 0, 0, 0);
3839         mutex_unlock(&adev->srbm_mutex);
3840 }
3841
3842 static void gfx_v8_0_config_init(struct amdgpu_device *adev)
3843 {
3844         switch (adev->asic_type) {
3845         default:
3846                 adev->gfx.config.double_offchip_lds_buf = 1;
3847                 break;
3848         case CHIP_CARRIZO:
3849         case CHIP_STONEY:
3850                 adev->gfx.config.double_offchip_lds_buf = 0;
3851                 break;
3852         }
3853 }
3854
3855 static void gfx_v8_0_gpu_init(struct amdgpu_device *adev)
3856 {
3857         u32 tmp, sh_static_mem_cfg;
3858         int i;
3859
3860         WREG32_FIELD(GRBM_CNTL, READ_TIMEOUT, 0xFF);
3861         WREG32(mmGB_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3862         WREG32(mmHDP_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3863         WREG32(mmDMIF_ADDR_CALC, adev->gfx.config.gb_addr_config);
3864
3865         gfx_v8_0_tiling_mode_table_init(adev);
3866         gfx_v8_0_setup_rb(adev);
3867         gfx_v8_0_get_cu_info(adev);
3868         gfx_v8_0_config_init(adev);
3869
3870         /* XXX SH_MEM regs */
3871         /* where to put LDS, scratch, GPUVM in FSA64 space */
3872         sh_static_mem_cfg = REG_SET_FIELD(0, SH_STATIC_MEM_CONFIG,
3873                                    SWIZZLE_ENABLE, 1);
3874         sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG,
3875                                    ELEMENT_SIZE, 1);
3876         sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG,
3877                                    INDEX_STRIDE, 3);
3878         WREG32(mmSH_STATIC_MEM_CONFIG, sh_static_mem_cfg);
3879
3880         mutex_lock(&adev->srbm_mutex);
3881         for (i = 0; i < adev->vm_manager.id_mgr[0].num_ids; i++) {
3882                 vi_srbm_select(adev, 0, 0, 0, i);
3883                 /* CP and shaders */
3884                 if (i == 0) {
3885                         tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_UC);
3886                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
3887                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3888                                             SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3889                         WREG32(mmSH_MEM_CONFIG, tmp);
3890                         WREG32(mmSH_MEM_BASES, 0);
3891                 } else {
3892                         tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_NC);
3893                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
3894                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3895                                             SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3896                         WREG32(mmSH_MEM_CONFIG, tmp);
3897                         tmp = adev->gmc.shared_aperture_start >> 48;
3898                         WREG32(mmSH_MEM_BASES, tmp);
3899                 }
3900
3901                 WREG32(mmSH_MEM_APE1_BASE, 1);
3902                 WREG32(mmSH_MEM_APE1_LIMIT, 0);
3903         }
3904         vi_srbm_select(adev, 0, 0, 0, 0);
3905         mutex_unlock(&adev->srbm_mutex);
3906
3907         gfx_v8_0_init_compute_vmid(adev);
3908
3909         mutex_lock(&adev->grbm_idx_mutex);
3910         /*
3911          * making sure that the following register writes will be broadcasted
3912          * to all the shaders
3913          */
3914         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3915
3916         WREG32(mmPA_SC_FIFO_SIZE,
3917                    (adev->gfx.config.sc_prim_fifo_size_frontend <<
3918                         PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) |
3919                    (adev->gfx.config.sc_prim_fifo_size_backend <<
3920                         PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT) |
3921                    (adev->gfx.config.sc_hiz_tile_fifo_size <<
3922                         PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) |
3923                    (adev->gfx.config.sc_earlyz_tile_fifo_size <<
3924                         PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT));
3925
3926         tmp = RREG32(mmSPI_ARB_PRIORITY);
3927         tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS0, 2);
3928         tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS1, 2);
3929         tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS2, 2);
3930         tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS3, 2);
3931         WREG32(mmSPI_ARB_PRIORITY, tmp);
3932
3933         mutex_unlock(&adev->grbm_idx_mutex);
3934
3935 }
3936
3937 static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
3938 {
3939         u32 i, j, k;
3940         u32 mask;
3941
3942         mutex_lock(&adev->grbm_idx_mutex);
3943         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3944                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3945                         gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3946                         for (k = 0; k < adev->usec_timeout; k++) {
3947                                 if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0)
3948                                         break;
3949                                 udelay(1);
3950                         }
3951                         if (k == adev->usec_timeout) {
3952                                 gfx_v8_0_select_se_sh(adev, 0xffffffff,
3953                                                       0xffffffff, 0xffffffff);
3954                                 mutex_unlock(&adev->grbm_idx_mutex);
3955                                 DRM_INFO("Timeout wait for RLC serdes %u,%u\n",
3956                                          i, j);
3957                                 return;
3958                         }
3959                 }
3960         }
3961         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3962         mutex_unlock(&adev->grbm_idx_mutex);
3963
3964         mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
3965                 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
3966                 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
3967                 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
3968         for (k = 0; k < adev->usec_timeout; k++) {
3969                 if ((RREG32(mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
3970                         break;
3971                 udelay(1);
3972         }
3973 }
3974
3975 static void gfx_v8_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
3976                                                bool enable)
3977 {
3978         u32 tmp = RREG32(mmCP_INT_CNTL_RING0);
3979
3980         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
3981         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
3982         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
3983         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
3984
3985         WREG32(mmCP_INT_CNTL_RING0, tmp);
3986 }
3987
3988 static void gfx_v8_0_init_csb(struct amdgpu_device *adev)
3989 {
3990         /* csib */
3991         WREG32(mmRLC_CSIB_ADDR_HI,
3992                         adev->gfx.rlc.clear_state_gpu_addr >> 32);
3993         WREG32(mmRLC_CSIB_ADDR_LO,
3994                         adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
3995         WREG32(mmRLC_CSIB_LENGTH,
3996                         adev->gfx.rlc.clear_state_size);
3997 }
3998
3999 static void gfx_v8_0_parse_ind_reg_list(int *register_list_format,
4000                                 int ind_offset,
4001                                 int list_size,
4002                                 int *unique_indices,
4003                                 int *indices_count,
4004                                 int max_indices,
4005                                 int *ind_start_offsets,
4006                                 int *offset_count,
4007                                 int max_offset)
4008 {
4009         int indices;
4010         bool new_entry = true;
4011
4012         for (; ind_offset < list_size; ind_offset++) {
4013
4014                 if (new_entry) {
4015                         new_entry = false;
4016                         ind_start_offsets[*offset_count] = ind_offset;
4017                         *offset_count = *offset_count + 1;
4018                         BUG_ON(*offset_count >= max_offset);
4019                 }
4020
4021                 if (register_list_format[ind_offset] == 0xFFFFFFFF) {
4022                         new_entry = true;
4023                         continue;
4024                 }
4025
4026                 ind_offset += 2;
4027
4028                 /* look for the matching indice */
4029                 for (indices = 0;
4030                         indices < *indices_count;
4031                         indices++) {
4032                         if (unique_indices[indices] ==
4033                                 register_list_format[ind_offset])
4034                                 break;
4035                 }
4036
4037                 if (indices >= *indices_count) {
4038                         unique_indices[*indices_count] =
4039                                 register_list_format[ind_offset];
4040                         indices = *indices_count;
4041                         *indices_count = *indices_count + 1;
4042                         BUG_ON(*indices_count >= max_indices);
4043                 }
4044
4045                 register_list_format[ind_offset] = indices;
4046         }
4047 }
4048
4049 static int gfx_v8_0_init_save_restore_list(struct amdgpu_device *adev)
4050 {
4051         int i, temp, data;
4052         int unique_indices[] = {0, 0, 0, 0, 0, 0, 0, 0};
4053         int indices_count = 0;
4054         int indirect_start_offsets[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
4055         int offset_count = 0;
4056
4057         int list_size;
4058         unsigned int *register_list_format =
4059                 kmalloc(adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
4060         if (!register_list_format)
4061                 return -ENOMEM;
4062         memcpy(register_list_format, adev->gfx.rlc.register_list_format,
4063                         adev->gfx.rlc.reg_list_format_size_bytes);
4064
4065         gfx_v8_0_parse_ind_reg_list(register_list_format,
4066                                 RLC_FormatDirectRegListLength,
4067                                 adev->gfx.rlc.reg_list_format_size_bytes >> 2,
4068                                 unique_indices,
4069                                 &indices_count,
4070                                 ARRAY_SIZE(unique_indices),
4071                                 indirect_start_offsets,
4072                                 &offset_count,
4073                                 ARRAY_SIZE(indirect_start_offsets));
4074
4075         /* save and restore list */
4076         WREG32_FIELD(RLC_SRM_CNTL, AUTO_INCR_ADDR, 1);
4077
4078         WREG32(mmRLC_SRM_ARAM_ADDR, 0);
4079         for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
4080                 WREG32(mmRLC_SRM_ARAM_DATA, adev->gfx.rlc.register_restore[i]);
4081
4082         /* indirect list */
4083         WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_list_format_start);
4084         for (i = 0; i < adev->gfx.rlc.reg_list_format_size_bytes >> 2; i++)
4085                 WREG32(mmRLC_GPM_SCRATCH_DATA, register_list_format[i]);
4086
4087         list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
4088         list_size = list_size >> 1;
4089         WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_restore_list_size);
4090         WREG32(mmRLC_GPM_SCRATCH_DATA, list_size);
4091
4092         /* starting offsets starts */
4093         WREG32(mmRLC_GPM_SCRATCH_ADDR,
4094                 adev->gfx.rlc.starting_offsets_start);
4095         for (i = 0; i < ARRAY_SIZE(indirect_start_offsets); i++)
4096                 WREG32(mmRLC_GPM_SCRATCH_DATA,
4097                                 indirect_start_offsets[i]);
4098
4099         /* unique indices */
4100         temp = mmRLC_SRM_INDEX_CNTL_ADDR_0;
4101         data = mmRLC_SRM_INDEX_CNTL_DATA_0;
4102         for (i = 0; i < ARRAY_SIZE(unique_indices); i++) {
4103                 if (unique_indices[i] != 0) {
4104                         WREG32(temp + i, unique_indices[i] & 0x3FFFF);
4105                         WREG32(data + i, unique_indices[i] >> 20);
4106                 }
4107         }
4108         kfree(register_list_format);
4109
4110         return 0;
4111 }
4112
4113 static void gfx_v8_0_enable_save_restore_machine(struct amdgpu_device *adev)
4114 {
4115         WREG32_FIELD(RLC_SRM_CNTL, SRM_ENABLE, 1);
4116 }
4117
4118 static void gfx_v8_0_init_power_gating(struct amdgpu_device *adev)
4119 {
4120         uint32_t data;
4121
4122         WREG32_FIELD(CP_RB_WPTR_POLL_CNTL, IDLE_POLL_COUNT, 0x60);
4123
4124         data = REG_SET_FIELD(0, RLC_PG_DELAY, POWER_UP_DELAY, 0x10);
4125         data = REG_SET_FIELD(data, RLC_PG_DELAY, POWER_DOWN_DELAY, 0x10);
4126         data = REG_SET_FIELD(data, RLC_PG_DELAY, CMD_PROPAGATE_DELAY, 0x10);
4127         data = REG_SET_FIELD(data, RLC_PG_DELAY, MEM_SLEEP_DELAY, 0x10);
4128         WREG32(mmRLC_PG_DELAY, data);
4129
4130         WREG32_FIELD(RLC_PG_DELAY_2, SERDES_CMD_DELAY, 0x3);
4131         WREG32_FIELD(RLC_AUTO_PG_CTRL, GRBM_REG_SAVE_GFX_IDLE_THRESHOLD, 0x55f0);
4132
4133 }
4134
4135 static void cz_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
4136                                                 bool enable)
4137 {
4138         WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PU_ENABLE, enable ? 1 : 0);
4139 }
4140
4141 static void cz_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
4142                                                   bool enable)
4143 {
4144         WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PD_ENABLE, enable ? 1 : 0);
4145 }
4146
4147 static void cz_enable_cp_power_gating(struct amdgpu_device *adev, bool enable)
4148 {
4149         WREG32_FIELD(RLC_PG_CNTL, CP_PG_DISABLE, enable ? 0 : 1);
4150 }
4151
4152 static void gfx_v8_0_init_pg(struct amdgpu_device *adev)
4153 {
4154         if ((adev->asic_type == CHIP_CARRIZO) ||
4155             (adev->asic_type == CHIP_STONEY)) {
4156                 gfx_v8_0_init_csb(adev);
4157                 gfx_v8_0_init_save_restore_list(adev);
4158                 gfx_v8_0_enable_save_restore_machine(adev);
4159                 WREG32(mmRLC_JUMP_TABLE_RESTORE, adev->gfx.rlc.cp_table_gpu_addr >> 8);
4160                 gfx_v8_0_init_power_gating(adev);
4161                 WREG32(mmRLC_PG_ALWAYS_ON_CU_MASK, adev->gfx.cu_info.ao_cu_mask);
4162         } else if ((adev->asic_type == CHIP_POLARIS11) ||
4163                    (adev->asic_type == CHIP_POLARIS12) ||
4164                    (adev->asic_type == CHIP_VEGAM)) {
4165                 gfx_v8_0_init_csb(adev);
4166                 gfx_v8_0_init_save_restore_list(adev);
4167                 gfx_v8_0_enable_save_restore_machine(adev);
4168                 gfx_v8_0_init_power_gating(adev);
4169         }
4170
4171 }
4172
4173 static void gfx_v8_0_rlc_stop(struct amdgpu_device *adev)
4174 {
4175         WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 0);
4176
4177         gfx_v8_0_enable_gui_idle_interrupt(adev, false);
4178         gfx_v8_0_wait_for_rlc_serdes(adev);
4179 }
4180
4181 static void gfx_v8_0_rlc_reset(struct amdgpu_device *adev)
4182 {
4183         WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
4184         udelay(50);
4185
4186         WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
4187         udelay(50);
4188 }
4189
4190 static void gfx_v8_0_rlc_start(struct amdgpu_device *adev)
4191 {
4192         WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 1);
4193
4194         /* carrizo do enable cp interrupt after cp inited */
4195         if (!(adev->flags & AMD_IS_APU))
4196                 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
4197
4198         udelay(50);
4199 }
4200
4201 static int gfx_v8_0_rlc_load_microcode(struct amdgpu_device *adev)
4202 {
4203         const struct rlc_firmware_header_v2_0 *hdr;
4204         const __le32 *fw_data;
4205         unsigned i, fw_size;
4206
4207         if (!adev->gfx.rlc_fw)
4208                 return -EINVAL;
4209
4210         hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
4211         amdgpu_ucode_print_rlc_hdr(&hdr->header);
4212
4213         fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
4214                            le32_to_cpu(hdr->header.ucode_array_offset_bytes));
4215         fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
4216
4217         WREG32(mmRLC_GPM_UCODE_ADDR, 0);
4218         for (i = 0; i < fw_size; i++)
4219                 WREG32(mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
4220         WREG32(mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
4221
4222         return 0;
4223 }
4224
4225 static int gfx_v8_0_rlc_resume(struct amdgpu_device *adev)
4226 {
4227         int r;
4228         u32 tmp;
4229
4230         gfx_v8_0_rlc_stop(adev);
4231
4232         /* disable CG */
4233         tmp = RREG32(mmRLC_CGCG_CGLS_CTRL);
4234         tmp &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
4235                  RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
4236         WREG32(mmRLC_CGCG_CGLS_CTRL, tmp);
4237         if (adev->asic_type == CHIP_POLARIS11 ||
4238             adev->asic_type == CHIP_POLARIS10 ||
4239             adev->asic_type == CHIP_POLARIS12 ||
4240             adev->asic_type == CHIP_VEGAM) {
4241                 tmp = RREG32(mmRLC_CGCG_CGLS_CTRL_3D);
4242                 tmp &= ~0x3;
4243                 WREG32(mmRLC_CGCG_CGLS_CTRL_3D, tmp);
4244         }
4245
4246         /* disable PG */
4247         WREG32(mmRLC_PG_CNTL, 0);
4248
4249         gfx_v8_0_rlc_reset(adev);
4250         gfx_v8_0_init_pg(adev);
4251
4252
4253         if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
4254                 /* legacy rlc firmware loading */
4255                 r = gfx_v8_0_rlc_load_microcode(adev);
4256                 if (r)
4257                         return r;
4258         }
4259
4260         gfx_v8_0_rlc_start(adev);
4261
4262         return 0;
4263 }
4264
4265 static void gfx_v8_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
4266 {
4267         int i;
4268         u32 tmp = RREG32(mmCP_ME_CNTL);
4269
4270         if (enable) {
4271                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 0);
4272                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 0);
4273                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 0);
4274         } else {
4275                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 1);
4276                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 1);
4277                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 1);
4278                 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
4279                         adev->gfx.gfx_ring[i].ready = false;
4280         }
4281         WREG32(mmCP_ME_CNTL, tmp);
4282         udelay(50);
4283 }
4284
4285 static int gfx_v8_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
4286 {
4287         const struct gfx_firmware_header_v1_0 *pfp_hdr;
4288         const struct gfx_firmware_header_v1_0 *ce_hdr;
4289         const struct gfx_firmware_header_v1_0 *me_hdr;
4290         const __le32 *fw_data;
4291         unsigned i, fw_size;
4292
4293         if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
4294                 return -EINVAL;
4295
4296         pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
4297                 adev->gfx.pfp_fw->data;
4298         ce_hdr = (const struct gfx_firmware_header_v1_0 *)
4299                 adev->gfx.ce_fw->data;
4300         me_hdr = (const struct gfx_firmware_header_v1_0 *)
4301                 adev->gfx.me_fw->data;
4302
4303         amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
4304         amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
4305         amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
4306
4307         gfx_v8_0_cp_gfx_enable(adev, false);
4308
4309         /* PFP */
4310         fw_data = (const __le32 *)
4311                 (adev->gfx.pfp_fw->data +
4312                  le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
4313         fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
4314         WREG32(mmCP_PFP_UCODE_ADDR, 0);
4315         for (i = 0; i < fw_size; i++)
4316                 WREG32(mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
4317         WREG32(mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
4318
4319         /* CE */
4320         fw_data = (const __le32 *)
4321                 (adev->gfx.ce_fw->data +
4322                  le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
4323         fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
4324         WREG32(mmCP_CE_UCODE_ADDR, 0);
4325         for (i = 0; i < fw_size; i++)
4326                 WREG32(mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
4327         WREG32(mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
4328
4329         /* ME */
4330         fw_data = (const __le32 *)
4331                 (adev->gfx.me_fw->data +
4332                  le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
4333         fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
4334         WREG32(mmCP_ME_RAM_WADDR, 0);
4335         for (i = 0; i < fw_size; i++)
4336                 WREG32(mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
4337         WREG32(mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version);
4338
4339         return 0;
4340 }
4341
4342 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev)
4343 {
4344         u32 count = 0;
4345         const struct cs_section_def *sect = NULL;
4346         const struct cs_extent_def *ext = NULL;
4347
4348         /* begin clear state */
4349         count += 2;
4350         /* context control state */
4351         count += 3;
4352
4353         for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4354                 for (ext = sect->section; ext->extent != NULL; ++ext) {
4355                         if (sect->id == SECT_CONTEXT)
4356                                 count += 2 + ext->reg_count;
4357                         else
4358                                 return 0;
4359                 }
4360         }
4361         /* pa_sc_raster_config/pa_sc_raster_config1 */
4362         count += 4;
4363         /* end clear state */
4364         count += 2;
4365         /* clear state */
4366         count += 2;
4367
4368         return count;
4369 }
4370
4371 static int gfx_v8_0_cp_gfx_start(struct amdgpu_device *adev)
4372 {
4373         struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
4374         const struct cs_section_def *sect = NULL;
4375         const struct cs_extent_def *ext = NULL;
4376         int r, i;
4377
4378         /* init the CP */
4379         WREG32(mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
4380         WREG32(mmCP_ENDIAN_SWAP, 0);
4381         WREG32(mmCP_DEVICE_ID, 1);
4382
4383         gfx_v8_0_cp_gfx_enable(adev, true);
4384
4385         r = amdgpu_ring_alloc(ring, gfx_v8_0_get_csb_size(adev) + 4);
4386         if (r) {
4387                 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
4388                 return r;
4389         }
4390
4391         /* clear state buffer */
4392         amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4393         amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
4394
4395         amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4396         amdgpu_ring_write(ring, 0x80000000);
4397         amdgpu_ring_write(ring, 0x80000000);
4398
4399         for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4400                 for (ext = sect->section; ext->extent != NULL; ++ext) {
4401                         if (sect->id == SECT_CONTEXT) {
4402                                 amdgpu_ring_write(ring,
4403                                        PACKET3(PACKET3_SET_CONTEXT_REG,
4404                                                ext->reg_count));
4405                                 amdgpu_ring_write(ring,
4406                                        ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
4407                                 for (i = 0; i < ext->reg_count; i++)
4408                                         amdgpu_ring_write(ring, ext->extent[i]);
4409                         }
4410                 }
4411         }
4412
4413         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
4414         amdgpu_ring_write(ring, mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
4415         amdgpu_ring_write(ring, adev->gfx.config.rb_config[0][0].raster_config);
4416         amdgpu_ring_write(ring, adev->gfx.config.rb_config[0][0].raster_config_1);
4417
4418         amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4419         amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
4420
4421         amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
4422         amdgpu_ring_write(ring, 0);
4423
4424         /* init the CE partitions */
4425         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
4426         amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
4427         amdgpu_ring_write(ring, 0x8000);
4428         amdgpu_ring_write(ring, 0x8000);
4429
4430         amdgpu_ring_commit(ring);
4431
4432         return 0;
4433 }
4434 static void gfx_v8_0_set_cpg_door_bell(struct amdgpu_device *adev, struct amdgpu_ring *ring)
4435 {
4436         u32 tmp;
4437         /* no gfx doorbells on iceland */
4438         if (adev->asic_type == CHIP_TOPAZ)
4439                 return;
4440
4441         tmp = RREG32(mmCP_RB_DOORBELL_CONTROL);
4442
4443         if (ring->use_doorbell) {
4444                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4445                                 DOORBELL_OFFSET, ring->doorbell_index);
4446                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4447                                                 DOORBELL_HIT, 0);
4448                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4449                                             DOORBELL_EN, 1);
4450         } else {
4451                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0);
4452         }
4453
4454         WREG32(mmCP_RB_DOORBELL_CONTROL, tmp);
4455
4456         if (adev->flags & AMD_IS_APU)
4457                 return;
4458
4459         tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
4460                                         DOORBELL_RANGE_LOWER,
4461                                         AMDGPU_DOORBELL_GFX_RING0);
4462         WREG32(mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
4463
4464         WREG32(mmCP_RB_DOORBELL_RANGE_UPPER,
4465                 CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
4466 }
4467
4468 static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev)
4469 {
4470         struct amdgpu_ring *ring;
4471         u32 tmp;
4472         u32 rb_bufsz;
4473         u64 rb_addr, rptr_addr, wptr_gpu_addr;
4474         int r;
4475
4476         /* Set the write pointer delay */
4477         WREG32(mmCP_RB_WPTR_DELAY, 0);
4478
4479         /* set the RB to use vmid 0 */
4480         WREG32(mmCP_RB_VMID, 0);
4481
4482         /* Set ring buffer size */
4483         ring = &adev->gfx.gfx_ring[0];
4484         rb_bufsz = order_base_2(ring->ring_size / 8);
4485         tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
4486         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
4487         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MTYPE, 3);
4488         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MIN_IB_AVAILSZ, 1);
4489 #ifdef __BIG_ENDIAN
4490         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
4491 #endif
4492         WREG32(mmCP_RB0_CNTL, tmp);
4493
4494         /* Initialize the ring buffer's read and write pointers */
4495         WREG32(mmCP_RB0_CNTL, tmp | CP_RB0_CNTL__RB_RPTR_WR_ENA_MASK);
4496         ring->wptr = 0;
4497         WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
4498
4499         /* set the wb address wether it's enabled or not */
4500         rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4501         WREG32(mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
4502         WREG32(mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 0xFF);
4503
4504         wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4505         WREG32(mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr));
4506         WREG32(mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr));
4507         mdelay(1);
4508         WREG32(mmCP_RB0_CNTL, tmp);
4509
4510         rb_addr = ring->gpu_addr >> 8;
4511         WREG32(mmCP_RB0_BASE, rb_addr);
4512         WREG32(mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
4513
4514         gfx_v8_0_set_cpg_door_bell(adev, ring);
4515         /* start the ring */
4516         amdgpu_ring_clear_ring(ring);
4517         gfx_v8_0_cp_gfx_start(adev);
4518         ring->ready = true;
4519         r = amdgpu_ring_test_ring(ring);
4520         if (r)
4521                 ring->ready = false;
4522
4523         return r;
4524 }
4525
4526 static void gfx_v8_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
4527 {
4528         int i;
4529
4530         if (enable) {
4531                 WREG32(mmCP_MEC_CNTL, 0);
4532         } else {
4533                 WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
4534                 for (i = 0; i < adev->gfx.num_compute_rings; i++)
4535                         adev->gfx.compute_ring[i].ready = false;
4536                 adev->gfx.kiq.ring.ready = false;
4537         }
4538         udelay(50);
4539 }
4540
4541 static int gfx_v8_0_cp_compute_load_microcode(struct amdgpu_device *adev)
4542 {
4543         const struct gfx_firmware_header_v1_0 *mec_hdr;
4544         const __le32 *fw_data;
4545         unsigned i, fw_size;
4546
4547         if (!adev->gfx.mec_fw)
4548                 return -EINVAL;
4549
4550         gfx_v8_0_cp_compute_enable(adev, false);
4551
4552         mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
4553         amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
4554
4555         fw_data = (const __le32 *)
4556                 (adev->gfx.mec_fw->data +
4557                  le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
4558         fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
4559
4560         /* MEC1 */
4561         WREG32(mmCP_MEC_ME1_UCODE_ADDR, 0);
4562         for (i = 0; i < fw_size; i++)
4563                 WREG32(mmCP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data+i));
4564         WREG32(mmCP_MEC_ME1_UCODE_ADDR, adev->gfx.mec_fw_version);
4565
4566         /* Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */
4567         if (adev->gfx.mec2_fw) {
4568                 const struct gfx_firmware_header_v1_0 *mec2_hdr;
4569
4570                 mec2_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
4571                 amdgpu_ucode_print_gfx_hdr(&mec2_hdr->header);
4572
4573                 fw_data = (const __le32 *)
4574                         (adev->gfx.mec2_fw->data +
4575                          le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes));
4576                 fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4;
4577
4578                 WREG32(mmCP_MEC_ME2_UCODE_ADDR, 0);
4579                 for (i = 0; i < fw_size; i++)
4580                         WREG32(mmCP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data+i));
4581                 WREG32(mmCP_MEC_ME2_UCODE_ADDR, adev->gfx.mec2_fw_version);
4582         }
4583
4584         return 0;
4585 }
4586
4587 /* KIQ functions */
4588 static void gfx_v8_0_kiq_setting(struct amdgpu_ring *ring)
4589 {
4590         uint32_t tmp;
4591         struct amdgpu_device *adev = ring->adev;
4592
4593         /* tell RLC which is KIQ queue */
4594         tmp = RREG32(mmRLC_CP_SCHEDULERS);
4595         tmp &= 0xffffff00;
4596         tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
4597         WREG32(mmRLC_CP_SCHEDULERS, tmp);
4598         tmp |= 0x80;
4599         WREG32(mmRLC_CP_SCHEDULERS, tmp);
4600 }
4601
4602 static int gfx_v8_0_kiq_kcq_enable(struct amdgpu_device *adev)
4603 {
4604         struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
4605         uint32_t scratch, tmp = 0;
4606         uint64_t queue_mask = 0;
4607         int r, i;
4608
4609         for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) {
4610                 if (!test_bit(i, adev->gfx.mec.queue_bitmap))
4611                         continue;
4612
4613                 /* This situation may be hit in the future if a new HW
4614                  * generation exposes more than 64 queues. If so, the
4615                  * definition of queue_mask needs updating */
4616                 if (WARN_ON(i >= (sizeof(queue_mask)*8))) {
4617                         DRM_ERROR("Invalid KCQ enabled: %d\n", i);
4618                         break;
4619                 }
4620
4621                 queue_mask |= (1ull << i);
4622         }
4623
4624         r = amdgpu_gfx_scratch_get(adev, &scratch);
4625         if (r) {
4626                 DRM_ERROR("Failed to get scratch reg (%d).\n", r);
4627                 return r;
4628         }
4629         WREG32(scratch, 0xCAFEDEAD);
4630
4631         r = amdgpu_ring_alloc(kiq_ring, (8 * adev->gfx.num_compute_rings) + 11);
4632         if (r) {
4633                 DRM_ERROR("Failed to lock KIQ (%d).\n", r);
4634                 amdgpu_gfx_scratch_free(adev, scratch);
4635                 return r;
4636         }
4637         /* set resources */
4638         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
4639         amdgpu_ring_write(kiq_ring, 0); /* vmid_mask:0 queue_type:0 (KIQ) */
4640         amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask)); /* queue mask lo */
4641         amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask)); /* queue mask hi */
4642         amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */
4643         amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */
4644         amdgpu_ring_write(kiq_ring, 0); /* oac mask */
4645         amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */
4646         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4647                 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4648                 uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
4649                 uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4650
4651                 /* map queues */
4652                 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
4653                 /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
4654                 amdgpu_ring_write(kiq_ring,
4655                                   PACKET3_MAP_QUEUES_NUM_QUEUES(1));
4656                 amdgpu_ring_write(kiq_ring,
4657                                   PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index) |
4658                                   PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
4659                                   PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
4660                                   PACKET3_MAP_QUEUES_ME(ring->me == 1 ? 0 : 1)); /* doorbell */
4661                 amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
4662                 amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
4663                 amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
4664                 amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
4665         }
4666         /* write to scratch for completion */
4667         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
4668         amdgpu_ring_write(kiq_ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
4669         amdgpu_ring_write(kiq_ring, 0xDEADBEEF);
4670         amdgpu_ring_commit(kiq_ring);
4671
4672         for (i = 0; i < adev->usec_timeout; i++) {
4673                 tmp = RREG32(scratch);
4674                 if (tmp == 0xDEADBEEF)
4675                         break;
4676                 DRM_UDELAY(1);
4677         }
4678         if (i >= adev->usec_timeout) {
4679                 DRM_ERROR("KCQ enable failed (scratch(0x%04X)=0x%08X)\n",
4680                           scratch, tmp);
4681                 r = -EINVAL;
4682         }
4683         amdgpu_gfx_scratch_free(adev, scratch);
4684
4685         return r;
4686 }
4687
4688 static int gfx_v8_0_deactivate_hqd(struct amdgpu_device *adev, u32 req)
4689 {
4690         int i, r = 0;
4691
4692         if (RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK) {
4693                 WREG32_FIELD(CP_HQD_DEQUEUE_REQUEST, DEQUEUE_REQ, req);
4694                 for (i = 0; i < adev->usec_timeout; i++) {
4695                         if (!(RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK))
4696                                 break;
4697                         udelay(1);
4698                 }
4699                 if (i == adev->usec_timeout)
4700                         r = -ETIMEDOUT;
4701         }
4702         WREG32(mmCP_HQD_DEQUEUE_REQUEST, 0);
4703         WREG32(mmCP_HQD_PQ_RPTR, 0);
4704         WREG32(mmCP_HQD_PQ_WPTR, 0);
4705
4706         return r;
4707 }
4708
4709 static int gfx_v8_0_mqd_init(struct amdgpu_ring *ring)
4710 {
4711         struct amdgpu_device *adev = ring->adev;
4712         struct vi_mqd *mqd = ring->mqd_ptr;
4713         uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
4714         uint32_t tmp;
4715
4716         mqd->header = 0xC0310800;
4717         mqd->compute_pipelinestat_enable = 0x00000001;
4718         mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
4719         mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
4720         mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
4721         mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
4722         mqd->compute_misc_reserved = 0x00000003;
4723         mqd->dynamic_cu_mask_addr_lo = lower_32_bits(ring->mqd_gpu_addr
4724                                                      + offsetof(struct vi_mqd_allocation, dynamic_cu_mask));
4725         mqd->dynamic_cu_mask_addr_hi = upper_32_bits(ring->mqd_gpu_addr
4726                                                      + offsetof(struct vi_mqd_allocation, dynamic_cu_mask));
4727         eop_base_addr = ring->eop_gpu_addr >> 8;
4728         mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
4729         mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
4730
4731         /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4732         tmp = RREG32(mmCP_HQD_EOP_CONTROL);
4733         tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
4734                         (order_base_2(GFX8_MEC_HPD_SIZE / 4) - 1));
4735
4736         mqd->cp_hqd_eop_control = tmp;
4737
4738         /* enable doorbell? */
4739         tmp = REG_SET_FIELD(RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL),
4740                             CP_HQD_PQ_DOORBELL_CONTROL,
4741                             DOORBELL_EN,
4742                             ring->use_doorbell ? 1 : 0);
4743
4744         mqd->cp_hqd_pq_doorbell_control = tmp;
4745
4746         /* set the pointer to the MQD */
4747         mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
4748         mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
4749
4750         /* set MQD vmid to 0 */
4751         tmp = RREG32(mmCP_MQD_CONTROL);
4752         tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
4753         mqd->cp_mqd_control = tmp;
4754
4755         /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4756         hqd_gpu_addr = ring->gpu_addr >> 8;
4757         mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
4758         mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4759
4760         /* set up the HQD, this is similar to CP_RB0_CNTL */
4761         tmp = RREG32(mmCP_HQD_PQ_CONTROL);
4762         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
4763                             (order_base_2(ring->ring_size / 4) - 1));
4764         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
4765                         ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
4766 #ifdef __BIG_ENDIAN
4767         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
4768 #endif
4769         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
4770         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
4771         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
4772         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
4773         mqd->cp_hqd_pq_control = tmp;
4774
4775         /* set the wb address whether it's enabled or not */
4776         wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4777         mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
4778         mqd->cp_hqd_pq_rptr_report_addr_hi =
4779                 upper_32_bits(wb_gpu_addr) & 0xffff;
4780
4781         /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
4782         wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4783         mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
4784         mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4785
4786         tmp = 0;
4787         /* enable the doorbell if requested */
4788         if (ring->use_doorbell) {
4789                 tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
4790                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4791                                 DOORBELL_OFFSET, ring->doorbell_index);
4792
4793                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4794                                          DOORBELL_EN, 1);
4795                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4796                                          DOORBELL_SOURCE, 0);
4797                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4798                                          DOORBELL_HIT, 0);
4799         }
4800
4801         mqd->cp_hqd_pq_doorbell_control = tmp;
4802
4803         /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4804         ring->wptr = 0;
4805         mqd->cp_hqd_pq_wptr = ring->wptr;
4806         mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
4807
4808         /* set the vmid for the queue */
4809         mqd->cp_hqd_vmid = 0;
4810
4811         tmp = RREG32(mmCP_HQD_PERSISTENT_STATE);
4812         tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
4813         mqd->cp_hqd_persistent_state = tmp;
4814
4815         /* set MTYPE */
4816         tmp = RREG32(mmCP_HQD_IB_CONTROL);
4817         tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
4818         tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MTYPE, 3);
4819         mqd->cp_hqd_ib_control = tmp;
4820
4821         tmp = RREG32(mmCP_HQD_IQ_TIMER);
4822         tmp = REG_SET_FIELD(tmp, CP_HQD_IQ_TIMER, MTYPE, 3);
4823         mqd->cp_hqd_iq_timer = tmp;
4824
4825         tmp = RREG32(mmCP_HQD_CTX_SAVE_CONTROL);
4826         tmp = REG_SET_FIELD(tmp, CP_HQD_CTX_SAVE_CONTROL, MTYPE, 3);
4827         mqd->cp_hqd_ctx_save_control = tmp;
4828
4829         /* defaults */
4830         mqd->cp_hqd_eop_rptr = RREG32(mmCP_HQD_EOP_RPTR);
4831         mqd->cp_hqd_eop_wptr = RREG32(mmCP_HQD_EOP_WPTR);
4832         mqd->cp_hqd_pipe_priority = RREG32(mmCP_HQD_PIPE_PRIORITY);
4833         mqd->cp_hqd_queue_priority = RREG32(mmCP_HQD_QUEUE_PRIORITY);
4834         mqd->cp_hqd_quantum = RREG32(mmCP_HQD_QUANTUM);
4835         mqd->cp_hqd_ctx_save_base_addr_lo = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_LO);
4836         mqd->cp_hqd_ctx_save_base_addr_hi = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_HI);
4837         mqd->cp_hqd_cntl_stack_offset = RREG32(mmCP_HQD_CNTL_STACK_OFFSET);
4838         mqd->cp_hqd_cntl_stack_size = RREG32(mmCP_HQD_CNTL_STACK_SIZE);
4839         mqd->cp_hqd_wg_state_offset = RREG32(mmCP_HQD_WG_STATE_OFFSET);
4840         mqd->cp_hqd_ctx_save_size = RREG32(mmCP_HQD_CTX_SAVE_SIZE);
4841         mqd->cp_hqd_eop_done_events = RREG32(mmCP_HQD_EOP_EVENTS);
4842         mqd->cp_hqd_error = RREG32(mmCP_HQD_ERROR);
4843         mqd->cp_hqd_eop_wptr_mem = RREG32(mmCP_HQD_EOP_WPTR_MEM);
4844         mqd->cp_hqd_eop_dones = RREG32(mmCP_HQD_EOP_DONES);
4845
4846         /* activate the queue */
4847         mqd->cp_hqd_active = 1;
4848
4849         return 0;
4850 }
4851
4852 int gfx_v8_0_mqd_commit(struct amdgpu_device *adev,
4853                         struct vi_mqd *mqd)
4854 {
4855         uint32_t mqd_reg;
4856         uint32_t *mqd_data;
4857
4858         /* HQD registers extend from mmCP_MQD_BASE_ADDR to mmCP_HQD_ERROR */
4859         mqd_data = &mqd->cp_mqd_base_addr_lo;
4860
4861         /* disable wptr polling */
4862         WREG32_FIELD(CP_PQ_WPTR_POLL_CNTL, EN, 0);
4863
4864         /* program all HQD registers */
4865         for (mqd_reg = mmCP_HQD_VMID; mqd_reg <= mmCP_HQD_EOP_CONTROL; mqd_reg++)
4866                 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4867
4868         /* Tonga errata: EOP RPTR/WPTR should be left unmodified.
4869          * This is safe since EOP RPTR==WPTR for any inactive HQD
4870          * on ASICs that do not support context-save.
4871          * EOP writes/reads can start anywhere in the ring.
4872          */
4873         if (adev->asic_type != CHIP_TONGA) {
4874                 WREG32(mmCP_HQD_EOP_RPTR, mqd->cp_hqd_eop_rptr);
4875                 WREG32(mmCP_HQD_EOP_WPTR, mqd->cp_hqd_eop_wptr);
4876                 WREG32(mmCP_HQD_EOP_WPTR_MEM, mqd->cp_hqd_eop_wptr_mem);
4877         }
4878
4879         for (mqd_reg = mmCP_HQD_EOP_EVENTS; mqd_reg <= mmCP_HQD_ERROR; mqd_reg++)
4880                 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4881
4882         /* activate the HQD */
4883         for (mqd_reg = mmCP_MQD_BASE_ADDR; mqd_reg <= mmCP_HQD_ACTIVE; mqd_reg++)
4884                 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4885
4886         return 0;
4887 }
4888
4889 static int gfx_v8_0_kiq_init_queue(struct amdgpu_ring *ring)
4890 {
4891         struct amdgpu_device *adev = ring->adev;
4892         struct vi_mqd *mqd = ring->mqd_ptr;
4893         int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS;
4894
4895         gfx_v8_0_kiq_setting(ring);
4896
4897         if (adev->in_gpu_reset) { /* for GPU_RESET case */
4898                 /* reset MQD to a clean status */
4899                 if (adev->gfx.mec.mqd_backup[mqd_idx])
4900                         memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation));
4901
4902                 /* reset ring buffer */
4903                 ring->wptr = 0;
4904                 amdgpu_ring_clear_ring(ring);
4905                 mutex_lock(&adev->srbm_mutex);
4906                 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4907                 gfx_v8_0_mqd_commit(adev, mqd);
4908                 vi_srbm_select(adev, 0, 0, 0, 0);
4909                 mutex_unlock(&adev->srbm_mutex);
4910         } else {
4911                 memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation));
4912                 ((struct vi_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
4913                 ((struct vi_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
4914                 mutex_lock(&adev->srbm_mutex);
4915                 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4916                 gfx_v8_0_mqd_init(ring);
4917                 gfx_v8_0_mqd_commit(adev, mqd);
4918                 vi_srbm_select(adev, 0, 0, 0, 0);
4919                 mutex_unlock(&adev->srbm_mutex);
4920
4921                 if (adev->gfx.mec.mqd_backup[mqd_idx])
4922                         memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation));
4923         }
4924
4925         return 0;
4926 }
4927
4928 static int gfx_v8_0_kcq_init_queue(struct amdgpu_ring *ring)
4929 {
4930         struct amdgpu_device *adev = ring->adev;
4931         struct vi_mqd *mqd = ring->mqd_ptr;
4932         int mqd_idx = ring - &adev->gfx.compute_ring[0];
4933
4934         if (!adev->in_gpu_reset && !adev->gfx.in_suspend) {
4935                 memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation));
4936                 ((struct vi_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
4937                 ((struct vi_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
4938                 mutex_lock(&adev->srbm_mutex);
4939                 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4940                 gfx_v8_0_mqd_init(ring);
4941                 vi_srbm_select(adev, 0, 0, 0, 0);
4942                 mutex_unlock(&adev->srbm_mutex);
4943
4944                 if (adev->gfx.mec.mqd_backup[mqd_idx])
4945                         memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation));
4946         } else if (adev->in_gpu_reset) { /* for GPU_RESET case */
4947                 /* reset MQD to a clean status */
4948                 if (adev->gfx.mec.mqd_backup[mqd_idx])
4949                         memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation));
4950                 /* reset ring buffer */
4951                 ring->wptr = 0;
4952                 amdgpu_ring_clear_ring(ring);
4953         } else {
4954                 amdgpu_ring_clear_ring(ring);
4955         }
4956         return 0;
4957 }
4958
4959 static void gfx_v8_0_set_mec_doorbell_range(struct amdgpu_device *adev)
4960 {
4961         if (adev->asic_type > CHIP_TONGA) {
4962                 WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER, AMDGPU_DOORBELL_KIQ << 2);
4963                 WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER, AMDGPU_DOORBELL_MEC_RING7 << 2);
4964         }
4965         /* enable doorbells */
4966         WREG32_FIELD(CP_PQ_STATUS, DOORBELL_ENABLE, 1);
4967 }
4968
4969 static int gfx_v8_0_kiq_resume(struct amdgpu_device *adev)
4970 {
4971         struct amdgpu_ring *ring = NULL;
4972         int r = 0, i;
4973
4974         gfx_v8_0_cp_compute_enable(adev, true);
4975
4976         ring = &adev->gfx.kiq.ring;
4977
4978         r = amdgpu_bo_reserve(ring->mqd_obj, false);
4979         if (unlikely(r != 0))
4980                 goto done;
4981
4982         r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr);
4983         if (!r) {
4984                 r = gfx_v8_0_kiq_init_queue(ring);
4985                 amdgpu_bo_kunmap(ring->mqd_obj);
4986                 ring->mqd_ptr = NULL;
4987         }
4988         amdgpu_bo_unreserve(ring->mqd_obj);
4989         if (r)
4990                 goto done;
4991
4992         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4993                 ring = &adev->gfx.compute_ring[i];
4994
4995                 r = amdgpu_bo_reserve(ring->mqd_obj, false);
4996                 if (unlikely(r != 0))
4997                         goto done;
4998                 r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr);
4999                 if (!r) {
5000                         r = gfx_v8_0_kcq_init_queue(ring);
5001                         amdgpu_bo_kunmap(ring->mqd_obj);
5002                         ring->mqd_ptr = NULL;
5003                 }
5004                 amdgpu_bo_unreserve(ring->mqd_obj);
5005                 if (r)
5006                         goto done;
5007         }
5008
5009         gfx_v8_0_set_mec_doorbell_range(adev);
5010
5011         r = gfx_v8_0_kiq_kcq_enable(adev);
5012         if (r)
5013                 goto done;
5014
5015         /* Test KIQ */
5016         ring = &adev->gfx.kiq.ring;
5017         ring->ready = true;
5018         r = amdgpu_ring_test_ring(ring);
5019         if (r) {
5020                 ring->ready = false;
5021                 goto done;
5022         }
5023
5024         /* Test KCQs */
5025         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5026                 ring = &adev->gfx.compute_ring[i];
5027                 ring->ready = true;
5028                 r = amdgpu_ring_test_ring(ring);
5029                 if (r)
5030                         ring->ready = false;
5031         }
5032
5033 done:
5034         return r;
5035 }
5036
5037 static int gfx_v8_0_cp_resume(struct amdgpu_device *adev)
5038 {
5039         int r;
5040
5041         if (!(adev->flags & AMD_IS_APU))
5042                 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
5043
5044         if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
5045                         /* legacy firmware loading */
5046                 r = gfx_v8_0_cp_gfx_load_microcode(adev);
5047                 if (r)
5048                         return r;
5049
5050                 r = gfx_v8_0_cp_compute_load_microcode(adev);
5051                 if (r)
5052                         return r;
5053         }
5054
5055         r = gfx_v8_0_cp_gfx_resume(adev);
5056         if (r)
5057                 return r;
5058
5059         r = gfx_v8_0_kiq_resume(adev);
5060         if (r)
5061                 return r;
5062
5063         gfx_v8_0_enable_gui_idle_interrupt(adev, true);
5064
5065         return 0;
5066 }
5067
5068 static void gfx_v8_0_cp_enable(struct amdgpu_device *adev, bool enable)
5069 {
5070         gfx_v8_0_cp_gfx_enable(adev, enable);
5071         gfx_v8_0_cp_compute_enable(adev, enable);
5072 }
5073
5074 static int gfx_v8_0_hw_init(void *handle)
5075 {
5076         int r;
5077         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5078
5079         gfx_v8_0_init_golden_registers(adev);
5080         gfx_v8_0_gpu_init(adev);
5081
5082         r = gfx_v8_0_rlc_resume(adev);
5083         if (r)
5084                 return r;
5085
5086         r = gfx_v8_0_cp_resume(adev);
5087
5088         return r;
5089 }
5090
5091 static int gfx_v8_0_kcq_disable(struct amdgpu_ring *kiq_ring,struct amdgpu_ring *ring)
5092 {
5093         struct amdgpu_device *adev = kiq_ring->adev;
5094         uint32_t scratch, tmp = 0;
5095         int r, i;
5096
5097         r = amdgpu_gfx_scratch_get(adev, &scratch);
5098         if (r) {
5099                 DRM_ERROR("Failed to get scratch reg (%d).\n", r);
5100                 return r;
5101         }
5102         WREG32(scratch, 0xCAFEDEAD);
5103
5104         r = amdgpu_ring_alloc(kiq_ring, 10);
5105         if (r) {
5106                 DRM_ERROR("Failed to lock KIQ (%d).\n", r);
5107                 amdgpu_gfx_scratch_free(adev, scratch);
5108                 return r;
5109         }
5110
5111         /* unmap queues */
5112         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
5113         amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
5114                                                 PACKET3_UNMAP_QUEUES_ACTION(1) | /* RESET_QUEUES */
5115                                                 PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) |
5116                                                 PACKET3_UNMAP_QUEUES_ENGINE_SEL(0) |
5117                                                 PACKET3_UNMAP_QUEUES_NUM_QUEUES(1));
5118         amdgpu_ring_write(kiq_ring, PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
5119         amdgpu_ring_write(kiq_ring, 0);
5120         amdgpu_ring_write(kiq_ring, 0);
5121         amdgpu_ring_write(kiq_ring, 0);
5122         /* write to scratch for completion */
5123         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
5124         amdgpu_ring_write(kiq_ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
5125         amdgpu_ring_write(kiq_ring, 0xDEADBEEF);
5126         amdgpu_ring_commit(kiq_ring);
5127
5128         for (i = 0; i < adev->usec_timeout; i++) {
5129                 tmp = RREG32(scratch);
5130                 if (tmp == 0xDEADBEEF)
5131                         break;
5132                 DRM_UDELAY(1);
5133         }
5134         if (i >= adev->usec_timeout) {
5135                 DRM_ERROR("KCQ disabled failed (scratch(0x%04X)=0x%08X)\n", scratch, tmp);
5136                 r = -EINVAL;
5137         }
5138         amdgpu_gfx_scratch_free(adev, scratch);
5139         return r;
5140 }
5141
5142 static int gfx_v8_0_hw_fini(void *handle)
5143 {
5144         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5145         int i;
5146
5147         amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
5148         amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
5149
5150         amdgpu_irq_put(adev, &adev->gfx.cp_ecc_error_irq, 0);
5151
5152         amdgpu_irq_put(adev, &adev->gfx.sq_irq, 0);
5153
5154         /* disable KCQ to avoid CPC touch memory not valid anymore */
5155         for (i = 0; i < adev->gfx.num_compute_rings; i++)
5156                 gfx_v8_0_kcq_disable(&adev->gfx.kiq.ring, &adev->gfx.compute_ring[i]);
5157
5158         if (amdgpu_sriov_vf(adev)) {
5159                 pr_debug("For SRIOV client, shouldn't do anything.\n");
5160                 return 0;
5161         }
5162         gfx_v8_0_cp_enable(adev, false);
5163         gfx_v8_0_rlc_stop(adev);
5164
5165         amdgpu_device_ip_set_powergating_state(adev,
5166                                                AMD_IP_BLOCK_TYPE_GFX,
5167                                                AMD_PG_STATE_UNGATE);
5168
5169         return 0;
5170 }
5171
5172 static int gfx_v8_0_suspend(void *handle)
5173 {
5174         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5175         adev->gfx.in_suspend = true;
5176         return gfx_v8_0_hw_fini(adev);
5177 }
5178
5179 static int gfx_v8_0_resume(void *handle)
5180 {
5181         int r;
5182         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5183
5184         r = gfx_v8_0_hw_init(adev);
5185         adev->gfx.in_suspend = false;
5186         return r;
5187 }
5188
5189 static bool gfx_v8_0_is_idle(void *handle)
5190 {
5191         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5192
5193         if (REG_GET_FIELD(RREG32(mmGRBM_STATUS), GRBM_STATUS, GUI_ACTIVE))
5194                 return false;
5195         else
5196                 return true;
5197 }
5198
5199 static int gfx_v8_0_wait_for_idle(void *handle)
5200 {
5201         unsigned i;
5202         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5203
5204         for (i = 0; i < adev->usec_timeout; i++) {
5205                 if (gfx_v8_0_is_idle(handle))
5206                         return 0;
5207
5208                 udelay(1);
5209         }
5210         return -ETIMEDOUT;
5211 }
5212
5213 static bool gfx_v8_0_check_soft_reset(void *handle)
5214 {
5215         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5216         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5217         u32 tmp;
5218
5219         /* GRBM_STATUS */
5220         tmp = RREG32(mmGRBM_STATUS);
5221         if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
5222                    GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
5223                    GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
5224                    GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
5225                    GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
5226                    GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK |
5227                    GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
5228                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5229                                                 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
5230                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5231                                                 GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
5232                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5233                                                 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
5234         }
5235
5236         /* GRBM_STATUS2 */
5237         tmp = RREG32(mmGRBM_STATUS2);
5238         if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
5239                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5240                                                 GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
5241
5242         if (REG_GET_FIELD(tmp, GRBM_STATUS2, CPF_BUSY) ||
5243             REG_GET_FIELD(tmp, GRBM_STATUS2, CPC_BUSY) ||
5244             REG_GET_FIELD(tmp, GRBM_STATUS2, CPG_BUSY)) {
5245                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5246                                                 SOFT_RESET_CPF, 1);
5247                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5248                                                 SOFT_RESET_CPC, 1);
5249                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5250                                                 SOFT_RESET_CPG, 1);
5251                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET,
5252                                                 SOFT_RESET_GRBM, 1);
5253         }
5254
5255         /* SRBM_STATUS */
5256         tmp = RREG32(mmSRBM_STATUS);
5257         if (REG_GET_FIELD(tmp, SRBM_STATUS, GRBM_RQ_PENDING))
5258                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5259                                                 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
5260         if (REG_GET_FIELD(tmp, SRBM_STATUS, SEM_BUSY))
5261                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5262                                                 SRBM_SOFT_RESET, SOFT_RESET_SEM, 1);
5263
5264         if (grbm_soft_reset || srbm_soft_reset) {
5265                 adev->gfx.grbm_soft_reset = grbm_soft_reset;
5266                 adev->gfx.srbm_soft_reset = srbm_soft_reset;
5267                 return true;
5268         } else {
5269                 adev->gfx.grbm_soft_reset = 0;
5270                 adev->gfx.srbm_soft_reset = 0;
5271                 return false;
5272         }
5273 }
5274
5275 static int gfx_v8_0_pre_soft_reset(void *handle)
5276 {
5277         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5278         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5279
5280         if ((!adev->gfx.grbm_soft_reset) &&
5281             (!adev->gfx.srbm_soft_reset))
5282                 return 0;
5283
5284         grbm_soft_reset = adev->gfx.grbm_soft_reset;
5285         srbm_soft_reset = adev->gfx.srbm_soft_reset;
5286
5287         /* stop the rlc */
5288         gfx_v8_0_rlc_stop(adev);
5289
5290         if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5291             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
5292                 /* Disable GFX parsing/prefetching */
5293                 gfx_v8_0_cp_gfx_enable(adev, false);
5294
5295         if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5296             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
5297             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
5298             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
5299                 int i;
5300
5301                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5302                         struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5303
5304                         mutex_lock(&adev->srbm_mutex);
5305                         vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5306                         gfx_v8_0_deactivate_hqd(adev, 2);
5307                         vi_srbm_select(adev, 0, 0, 0, 0);
5308                         mutex_unlock(&adev->srbm_mutex);
5309                 }
5310                 /* Disable MEC parsing/prefetching */
5311                 gfx_v8_0_cp_compute_enable(adev, false);
5312         }
5313
5314        return 0;
5315 }
5316
5317 static int gfx_v8_0_soft_reset(void *handle)
5318 {
5319         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5320         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5321         u32 tmp;
5322
5323         if ((!adev->gfx.grbm_soft_reset) &&
5324             (!adev->gfx.srbm_soft_reset))
5325                 return 0;
5326
5327         grbm_soft_reset = adev->gfx.grbm_soft_reset;
5328         srbm_soft_reset = adev->gfx.srbm_soft_reset;
5329
5330         if (grbm_soft_reset || srbm_soft_reset) {
5331                 tmp = RREG32(mmGMCON_DEBUG);
5332                 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 1);
5333                 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 1);
5334                 WREG32(mmGMCON_DEBUG, tmp);
5335                 udelay(50);
5336         }
5337
5338         if (grbm_soft_reset) {
5339                 tmp = RREG32(mmGRBM_SOFT_RESET);
5340                 tmp |= grbm_soft_reset;
5341                 dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
5342                 WREG32(mmGRBM_SOFT_RESET, tmp);
5343                 tmp = RREG32(mmGRBM_SOFT_RESET);
5344
5345                 udelay(50);
5346
5347                 tmp &= ~grbm_soft_reset;
5348                 WREG32(mmGRBM_SOFT_RESET, tmp);
5349                 tmp = RREG32(mmGRBM_SOFT_RESET);
5350         }
5351
5352         if (srbm_soft_reset) {
5353                 tmp = RREG32(mmSRBM_SOFT_RESET);
5354                 tmp |= srbm_soft_reset;
5355                 dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
5356                 WREG32(mmSRBM_SOFT_RESET, tmp);
5357                 tmp = RREG32(mmSRBM_SOFT_RESET);
5358
5359                 udelay(50);
5360
5361                 tmp &= ~srbm_soft_reset;
5362                 WREG32(mmSRBM_SOFT_RESET, tmp);
5363                 tmp = RREG32(mmSRBM_SOFT_RESET);
5364         }
5365
5366         if (grbm_soft_reset || srbm_soft_reset) {
5367                 tmp = RREG32(mmGMCON_DEBUG);
5368                 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 0);
5369                 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 0);
5370                 WREG32(mmGMCON_DEBUG, tmp);
5371         }
5372
5373         /* Wait a little for things to settle down */
5374         udelay(50);
5375
5376         return 0;
5377 }
5378
5379 static int gfx_v8_0_post_soft_reset(void *handle)
5380 {
5381         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5382         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5383
5384         if ((!adev->gfx.grbm_soft_reset) &&
5385             (!adev->gfx.srbm_soft_reset))
5386                 return 0;
5387
5388         grbm_soft_reset = adev->gfx.grbm_soft_reset;
5389         srbm_soft_reset = adev->gfx.srbm_soft_reset;
5390
5391         if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5392             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
5393                 gfx_v8_0_cp_gfx_resume(adev);
5394
5395         if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5396             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
5397             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
5398             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
5399                 int i;
5400
5401                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5402                         struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5403
5404                         mutex_lock(&adev->srbm_mutex);
5405                         vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5406                         gfx_v8_0_deactivate_hqd(adev, 2);
5407                         vi_srbm_select(adev, 0, 0, 0, 0);
5408                         mutex_unlock(&adev->srbm_mutex);
5409                 }
5410                 gfx_v8_0_kiq_resume(adev);
5411         }
5412         gfx_v8_0_rlc_start(adev);
5413
5414         return 0;
5415 }
5416
5417 /**
5418  * gfx_v8_0_get_gpu_clock_counter - return GPU clock counter snapshot
5419  *
5420  * @adev: amdgpu_device pointer
5421  *
5422  * Fetches a GPU clock counter snapshot.
5423  * Returns the 64 bit clock counter snapshot.
5424  */
5425 static uint64_t gfx_v8_0_get_gpu_clock_counter(struct amdgpu_device *adev)
5426 {
5427         uint64_t clock;
5428
5429         mutex_lock(&adev->gfx.gpu_clock_mutex);
5430         WREG32(mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
5431         clock = (uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_LSB) |
5432                 ((uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
5433         mutex_unlock(&adev->gfx.gpu_clock_mutex);
5434         return clock;
5435 }
5436
5437 static void gfx_v8_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
5438                                           uint32_t vmid,
5439                                           uint32_t gds_base, uint32_t gds_size,
5440                                           uint32_t gws_base, uint32_t gws_size,
5441                                           uint32_t oa_base, uint32_t oa_size)
5442 {
5443         gds_base = gds_base >> AMDGPU_GDS_SHIFT;
5444         gds_size = gds_size >> AMDGPU_GDS_SHIFT;
5445
5446         gws_base = gws_base >> AMDGPU_GWS_SHIFT;
5447         gws_size = gws_size >> AMDGPU_GWS_SHIFT;
5448
5449         oa_base = oa_base >> AMDGPU_OA_SHIFT;
5450         oa_size = oa_size >> AMDGPU_OA_SHIFT;
5451
5452         /* GDS Base */
5453         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5454         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5455                                 WRITE_DATA_DST_SEL(0)));
5456         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_base);
5457         amdgpu_ring_write(ring, 0);
5458         amdgpu_ring_write(ring, gds_base);
5459
5460         /* GDS Size */
5461         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5462         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5463                                 WRITE_DATA_DST_SEL(0)));
5464         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_size);
5465         amdgpu_ring_write(ring, 0);
5466         amdgpu_ring_write(ring, gds_size);
5467
5468         /* GWS */
5469         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5470         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5471                                 WRITE_DATA_DST_SEL(0)));
5472         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].gws);
5473         amdgpu_ring_write(ring, 0);
5474         amdgpu_ring_write(ring, gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
5475
5476         /* OA */
5477         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5478         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5479                                 WRITE_DATA_DST_SEL(0)));
5480         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].oa);
5481         amdgpu_ring_write(ring, 0);
5482         amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << oa_base));
5483 }
5484
5485 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
5486 {
5487         WREG32(mmSQ_IND_INDEX,
5488                 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
5489                 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
5490                 (address << SQ_IND_INDEX__INDEX__SHIFT) |
5491                 (SQ_IND_INDEX__FORCE_READ_MASK));
5492         return RREG32(mmSQ_IND_DATA);
5493 }
5494
5495 static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
5496                            uint32_t wave, uint32_t thread,
5497                            uint32_t regno, uint32_t num, uint32_t *out)
5498 {
5499         WREG32(mmSQ_IND_INDEX,
5500                 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
5501                 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
5502                 (regno << SQ_IND_INDEX__INDEX__SHIFT) |
5503                 (thread << SQ_IND_INDEX__THREAD_ID__SHIFT) |
5504                 (SQ_IND_INDEX__FORCE_READ_MASK) |
5505                 (SQ_IND_INDEX__AUTO_INCR_MASK));
5506         while (num--)
5507                 *(out++) = RREG32(mmSQ_IND_DATA);
5508 }
5509
5510 static void gfx_v8_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
5511 {
5512         /* type 0 wave data */
5513         dst[(*no_fields)++] = 0;
5514         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS);
5515         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO);
5516         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI);
5517         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO);
5518         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI);
5519         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID);
5520         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0);
5521         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1);
5522         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC);
5523         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC);
5524         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS);
5525         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS);
5526         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_LO);
5527         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_HI);
5528         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_LO);
5529         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_HI);
5530         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0);
5531         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0);
5532 }
5533
5534 static void gfx_v8_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
5535                                      uint32_t wave, uint32_t start,
5536                                      uint32_t size, uint32_t *dst)
5537 {
5538         wave_read_regs(
5539                 adev, simd, wave, 0,
5540                 start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
5541 }
5542
5543
5544 static const struct amdgpu_gfx_funcs gfx_v8_0_gfx_funcs = {
5545         .get_gpu_clock_counter = &gfx_v8_0_get_gpu_clock_counter,
5546         .select_se_sh = &gfx_v8_0_select_se_sh,
5547         .read_wave_data = &gfx_v8_0_read_wave_data,
5548         .read_wave_sgprs = &gfx_v8_0_read_wave_sgprs,
5549         .select_me_pipe_q = &gfx_v8_0_select_me_pipe_q
5550 };
5551
5552 static int gfx_v8_0_early_init(void *handle)
5553 {
5554         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5555
5556         adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS;
5557         adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
5558         adev->gfx.funcs = &gfx_v8_0_gfx_funcs;
5559         gfx_v8_0_set_ring_funcs(adev);
5560         gfx_v8_0_set_irq_funcs(adev);
5561         gfx_v8_0_set_gds_init(adev);
5562         gfx_v8_0_set_rlc_funcs(adev);
5563
5564         return 0;
5565 }
5566
5567 static int gfx_v8_0_late_init(void *handle)
5568 {
5569         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5570         int r;
5571
5572         r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
5573         if (r)
5574                 return r;
5575
5576         r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
5577         if (r)
5578                 return r;
5579
5580         /* requires IBs so do in late init after IB pool is initialized */
5581         r = gfx_v8_0_do_edc_gpr_workarounds(adev);
5582         if (r)
5583                 return r;
5584
5585         r = amdgpu_irq_get(adev, &adev->gfx.cp_ecc_error_irq, 0);
5586         if (r) {
5587                 DRM_ERROR("amdgpu_irq_get() failed to get IRQ for EDC, r: %d.\n", r);
5588                 return r;
5589         }
5590
5591         r = amdgpu_irq_get(adev, &adev->gfx.sq_irq, 0);
5592         if (r) {
5593                 DRM_ERROR(
5594                         "amdgpu_irq_get() failed to get IRQ for SQ, r: %d.\n",
5595                         r);
5596                 return r;
5597         }
5598
5599         return 0;
5600 }
5601
5602 static void gfx_v8_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
5603                                                        bool enable)
5604 {
5605         if (((adev->asic_type == CHIP_POLARIS11) ||
5606             (adev->asic_type == CHIP_POLARIS12) ||
5607             (adev->asic_type == CHIP_VEGAM)) &&
5608             adev->powerplay.pp_funcs->set_powergating_by_smu)
5609                 /* Send msg to SMU via Powerplay */
5610                 amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, enable);
5611
5612         WREG32_FIELD(RLC_PG_CNTL, STATIC_PER_CU_PG_ENABLE, enable ? 1 : 0);
5613 }
5614
5615 static void gfx_v8_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
5616                                                         bool enable)
5617 {
5618         WREG32_FIELD(RLC_PG_CNTL, DYN_PER_CU_PG_ENABLE, enable ? 1 : 0);
5619 }
5620
5621 static void polaris11_enable_gfx_quick_mg_power_gating(struct amdgpu_device *adev,
5622                 bool enable)
5623 {
5624         WREG32_FIELD(RLC_PG_CNTL, QUICK_PG_ENABLE, enable ? 1 : 0);
5625 }
5626
5627 static void cz_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
5628                                           bool enable)
5629 {
5630         WREG32_FIELD(RLC_PG_CNTL, GFX_POWER_GATING_ENABLE, enable ? 1 : 0);
5631 }
5632
5633 static void cz_enable_gfx_pipeline_power_gating(struct amdgpu_device *adev,
5634                                                 bool enable)
5635 {
5636         WREG32_FIELD(RLC_PG_CNTL, GFX_PIPELINE_PG_ENABLE, enable ? 1 : 0);
5637
5638         /* Read any GFX register to wake up GFX. */
5639         if (!enable)
5640                 RREG32(mmDB_RENDER_CONTROL);
5641 }
5642
5643 static void cz_update_gfx_cg_power_gating(struct amdgpu_device *adev,
5644                                           bool enable)
5645 {
5646         if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
5647                 cz_enable_gfx_cg_power_gating(adev, true);
5648                 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
5649                         cz_enable_gfx_pipeline_power_gating(adev, true);
5650         } else {
5651                 cz_enable_gfx_cg_power_gating(adev, false);
5652                 cz_enable_gfx_pipeline_power_gating(adev, false);
5653         }
5654 }
5655
5656 static int gfx_v8_0_set_powergating_state(void *handle,
5657                                           enum amd_powergating_state state)
5658 {
5659         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5660         bool enable = (state == AMD_PG_STATE_GATE);
5661
5662         if (amdgpu_sriov_vf(adev))
5663                 return 0;
5664
5665         switch (adev->asic_type) {
5666         case CHIP_CARRIZO:
5667         case CHIP_STONEY:
5668
5669                 if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
5670                         cz_enable_sck_slow_down_on_power_up(adev, true);
5671                         cz_enable_sck_slow_down_on_power_down(adev, true);
5672                 } else {
5673                         cz_enable_sck_slow_down_on_power_up(adev, false);
5674                         cz_enable_sck_slow_down_on_power_down(adev, false);
5675                 }
5676                 if (adev->pg_flags & AMD_PG_SUPPORT_CP)
5677                         cz_enable_cp_power_gating(adev, true);
5678                 else
5679                         cz_enable_cp_power_gating(adev, false);
5680
5681                 cz_update_gfx_cg_power_gating(adev, enable);
5682
5683                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5684                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5685                 else
5686                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5687
5688                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5689                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5690                 else
5691                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5692                 break;
5693         case CHIP_POLARIS11:
5694         case CHIP_POLARIS12:
5695         case CHIP_VEGAM:
5696                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5697                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5698                 else
5699                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5700
5701                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5702                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5703                 else
5704                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5705
5706                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_QUICK_MG) && enable)
5707                         polaris11_enable_gfx_quick_mg_power_gating(adev, true);
5708                 else
5709                         polaris11_enable_gfx_quick_mg_power_gating(adev, false);
5710                 break;
5711         default:
5712                 break;
5713         }
5714
5715         return 0;
5716 }
5717
5718 static void gfx_v8_0_get_clockgating_state(void *handle, u32 *flags)
5719 {
5720         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5721         int data;
5722
5723         if (amdgpu_sriov_vf(adev))
5724                 *flags = 0;
5725
5726         /* AMD_CG_SUPPORT_GFX_MGCG */
5727         data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5728         if (!(data & RLC_CGTT_MGCG_OVERRIDE__CPF_MASK))
5729                 *flags |= AMD_CG_SUPPORT_GFX_MGCG;
5730
5731         /* AMD_CG_SUPPORT_GFX_CGLG */
5732         data = RREG32(mmRLC_CGCG_CGLS_CTRL);
5733         if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
5734                 *flags |= AMD_CG_SUPPORT_GFX_CGCG;
5735
5736         /* AMD_CG_SUPPORT_GFX_CGLS */
5737         if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
5738                 *flags |= AMD_CG_SUPPORT_GFX_CGLS;
5739
5740         /* AMD_CG_SUPPORT_GFX_CGTS */
5741         data = RREG32(mmCGTS_SM_CTRL_REG);
5742         if (!(data & CGTS_SM_CTRL_REG__OVERRIDE_MASK))
5743                 *flags |= AMD_CG_SUPPORT_GFX_CGTS;
5744
5745         /* AMD_CG_SUPPORT_GFX_CGTS_LS */
5746         if (!(data & CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK))
5747                 *flags |= AMD_CG_SUPPORT_GFX_CGTS_LS;
5748
5749         /* AMD_CG_SUPPORT_GFX_RLC_LS */
5750         data = RREG32(mmRLC_MEM_SLP_CNTL);
5751         if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK)
5752                 *flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS;
5753
5754         /* AMD_CG_SUPPORT_GFX_CP_LS */
5755         data = RREG32(mmCP_MEM_SLP_CNTL);
5756         if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK)
5757                 *flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS;
5758 }
5759
5760 static void gfx_v8_0_send_serdes_cmd(struct amdgpu_device *adev,
5761                                      uint32_t reg_addr, uint32_t cmd)
5762 {
5763         uint32_t data;
5764
5765         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
5766
5767         WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5768         WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5769
5770         data = RREG32(mmRLC_SERDES_WR_CTRL);
5771         if (adev->asic_type == CHIP_STONEY)
5772                 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5773                           RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5774                           RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5775                           RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5776                           RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5777                           RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5778                           RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5779                           RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5780                           RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5781         else
5782                 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5783                           RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5784                           RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5785                           RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5786                           RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5787                           RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5788                           RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5789                           RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5790                           RLC_SERDES_WR_CTRL__BPM_DATA_MASK |
5791                           RLC_SERDES_WR_CTRL__REG_ADDR_MASK |
5792                           RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5793         data |= (RLC_SERDES_WR_CTRL__RSVD_BPM_ADDR_MASK |
5794                  (cmd << RLC_SERDES_WR_CTRL__BPM_DATA__SHIFT) |
5795                  (reg_addr << RLC_SERDES_WR_CTRL__REG_ADDR__SHIFT) |
5796                  (0xff << RLC_SERDES_WR_CTRL__BPM_ADDR__SHIFT));
5797
5798         WREG32(mmRLC_SERDES_WR_CTRL, data);
5799 }
5800
5801 #define MSG_ENTER_RLC_SAFE_MODE     1
5802 #define MSG_EXIT_RLC_SAFE_MODE      0
5803 #define RLC_GPR_REG2__REQ_MASK 0x00000001
5804 #define RLC_GPR_REG2__REQ__SHIFT 0
5805 #define RLC_GPR_REG2__MESSAGE__SHIFT 0x00000001
5806 #define RLC_GPR_REG2__MESSAGE_MASK 0x0000001e
5807
5808 static void iceland_enter_rlc_safe_mode(struct amdgpu_device *adev)
5809 {
5810         u32 data;
5811         unsigned i;
5812
5813         data = RREG32(mmRLC_CNTL);
5814         if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK))
5815                 return;
5816
5817         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) {
5818                 data |= RLC_SAFE_MODE__CMD_MASK;
5819                 data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5820                 data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
5821                 WREG32(mmRLC_SAFE_MODE, data);
5822
5823                 for (i = 0; i < adev->usec_timeout; i++) {
5824                         if ((RREG32(mmRLC_GPM_STAT) &
5825                              (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5826                               RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) ==
5827                             (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5828                              RLC_GPM_STAT__GFX_POWER_STATUS_MASK))
5829                                 break;
5830                         udelay(1);
5831                 }
5832
5833                 for (i = 0; i < adev->usec_timeout; i++) {
5834                         if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
5835                                 break;
5836                         udelay(1);
5837                 }
5838                 adev->gfx.rlc.in_safe_mode = true;
5839         }
5840 }
5841
5842 static void iceland_exit_rlc_safe_mode(struct amdgpu_device *adev)
5843 {
5844         u32 data = 0;
5845         unsigned i;
5846
5847         data = RREG32(mmRLC_CNTL);
5848         if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK))
5849                 return;
5850
5851         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) {
5852                 if (adev->gfx.rlc.in_safe_mode) {
5853                         data |= RLC_SAFE_MODE__CMD_MASK;
5854                         data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5855                         WREG32(mmRLC_SAFE_MODE, data);
5856                         adev->gfx.rlc.in_safe_mode = false;
5857                 }
5858         }
5859
5860         for (i = 0; i < adev->usec_timeout; i++) {
5861                 if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
5862                         break;
5863                 udelay(1);
5864         }
5865 }
5866
5867 static const struct amdgpu_rlc_funcs iceland_rlc_funcs = {
5868         .enter_safe_mode = iceland_enter_rlc_safe_mode,
5869         .exit_safe_mode = iceland_exit_rlc_safe_mode
5870 };
5871
5872 static void gfx_v8_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
5873                                                       bool enable)
5874 {
5875         uint32_t temp, data;
5876
5877         adev->gfx.rlc.funcs->enter_safe_mode(adev);
5878
5879         /* It is disabled by HW by default */
5880         if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
5881                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5882                         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS)
5883                                 /* 1 - RLC memory Light sleep */
5884                                 WREG32_FIELD(RLC_MEM_SLP_CNTL, RLC_MEM_LS_EN, 1);
5885
5886                         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS)
5887                                 WREG32_FIELD(CP_MEM_SLP_CNTL, CP_MEM_LS_EN, 1);
5888                 }
5889
5890                 /* 3 - RLC_CGTT_MGCG_OVERRIDE */
5891                 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5892                 if (adev->flags & AMD_IS_APU)
5893                         data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5894                                   RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5895                                   RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK);
5896                 else
5897                         data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5898                                   RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5899                                   RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5900                                   RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5901
5902                 if (temp != data)
5903                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5904
5905                 /* 4 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5906                 gfx_v8_0_wait_for_rlc_serdes(adev);
5907
5908                 /* 5 - clear mgcg override */
5909                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5910
5911                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS) {
5912                         /* 6 - Enable CGTS(Tree Shade) MGCG /MGLS */
5913                         temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5914                         data &= ~(CGTS_SM_CTRL_REG__SM_MODE_MASK);
5915                         data |= (0x2 << CGTS_SM_CTRL_REG__SM_MODE__SHIFT);
5916                         data |= CGTS_SM_CTRL_REG__SM_MODE_ENABLE_MASK;
5917                         data &= ~CGTS_SM_CTRL_REG__OVERRIDE_MASK;
5918                         if ((adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) &&
5919                             (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS_LS))
5920                                 data &= ~CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK;
5921                         data |= CGTS_SM_CTRL_REG__ON_MONITOR_ADD_EN_MASK;
5922                         data |= (0x96 << CGTS_SM_CTRL_REG__ON_MONITOR_ADD__SHIFT);
5923                         if (temp != data)
5924                                 WREG32(mmCGTS_SM_CTRL_REG, data);
5925                 }
5926                 udelay(50);
5927
5928                 /* 7 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5929                 gfx_v8_0_wait_for_rlc_serdes(adev);
5930         } else {
5931                 /* 1 - MGCG_OVERRIDE[0] for CP and MGCG_OVERRIDE[1] for RLC */
5932                 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5933                 data |= (RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5934                                 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5935                                 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5936                                 RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5937                 if (temp != data)
5938                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5939
5940                 /* 2 - disable MGLS in RLC */
5941                 data = RREG32(mmRLC_MEM_SLP_CNTL);
5942                 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
5943                         data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
5944                         WREG32(mmRLC_MEM_SLP_CNTL, data);
5945                 }
5946
5947                 /* 3 - disable MGLS in CP */
5948                 data = RREG32(mmCP_MEM_SLP_CNTL);
5949                 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
5950                         data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
5951                         WREG32(mmCP_MEM_SLP_CNTL, data);
5952                 }
5953
5954                 /* 4 - Disable CGTS(Tree Shade) MGCG and MGLS */
5955                 temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5956                 data |= (CGTS_SM_CTRL_REG__OVERRIDE_MASK |
5957                                 CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK);
5958                 if (temp != data)
5959                         WREG32(mmCGTS_SM_CTRL_REG, data);
5960
5961                 /* 5 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5962                 gfx_v8_0_wait_for_rlc_serdes(adev);
5963
5964                 /* 6 - set mgcg override */
5965                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, SET_BPM_SERDES_CMD);
5966
5967                 udelay(50);
5968
5969                 /* 7- wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5970                 gfx_v8_0_wait_for_rlc_serdes(adev);
5971         }
5972
5973         adev->gfx.rlc.funcs->exit_safe_mode(adev);
5974 }
5975
5976 static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
5977                                                       bool enable)
5978 {
5979         uint32_t temp, temp1, data, data1;
5980
5981         temp = data = RREG32(mmRLC_CGCG_CGLS_CTRL);
5982
5983         adev->gfx.rlc.funcs->enter_safe_mode(adev);
5984
5985         if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
5986                 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5987                 data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK;
5988                 if (temp1 != data1)
5989                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5990
5991                 /* : wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5992                 gfx_v8_0_wait_for_rlc_serdes(adev);
5993
5994                 /* 2 - clear cgcg override */
5995                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5996
5997                 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5998                 gfx_v8_0_wait_for_rlc_serdes(adev);
5999
6000                 /* 3 - write cmd to set CGLS */
6001                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, SET_BPM_SERDES_CMD);
6002
6003                 /* 4 - enable cgcg */
6004                 data |= RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
6005
6006                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
6007                         /* enable cgls*/
6008                         data |= RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
6009
6010                         temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
6011                         data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK;
6012
6013                         if (temp1 != data1)
6014                                 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
6015                 } else {
6016                         data &= ~RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
6017                 }
6018
6019                 if (temp != data)
6020                         WREG32(mmRLC_CGCG_CGLS_CTRL, data);
6021
6022                 /* 5 enable cntx_empty_int_enable/cntx_busy_int_enable/
6023                  * Cmp_busy/GFX_Idle interrupts
6024                  */
6025                 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
6026         } else {
6027                 /* disable cntx_empty_int_enable & GFX Idle interrupt */
6028                 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
6029
6030                 /* TEST CGCG */
6031                 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
6032                 data1 |= (RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK |
6033                                 RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK);
6034                 if (temp1 != data1)
6035                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
6036
6037                 /* read gfx register to wake up cgcg */
6038                 RREG32(mmCB_CGTT_SCLK_CTRL);
6039                 RREG32(mmCB_CGTT_SCLK_CTRL);
6040                 RREG32(mmCB_CGTT_SCLK_CTRL);
6041                 RREG32(mmCB_CGTT_SCLK_CTRL);
6042
6043                 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
6044                 gfx_v8_0_wait_for_rlc_serdes(adev);
6045
6046                 /* write cmd to Set CGCG Overrride */
6047                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, SET_BPM_SERDES_CMD);
6048
6049                 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
6050                 gfx_v8_0_wait_for_rlc_serdes(adev);
6051
6052                 /* write cmd to Clear CGLS */
6053                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, CLE_BPM_SERDES_CMD);
6054
6055                 /* disable cgcg, cgls should be disabled too. */
6056                 data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
6057                           RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
6058                 if (temp != data)
6059                         WREG32(mmRLC_CGCG_CGLS_CTRL, data);
6060                 /* enable interrupts again for PG */
6061                 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
6062         }
6063
6064         gfx_v8_0_wait_for_rlc_serdes(adev);
6065
6066         adev->gfx.rlc.funcs->exit_safe_mode(adev);
6067 }
6068 static int gfx_v8_0_update_gfx_clock_gating(struct amdgpu_device *adev,
6069                                             bool enable)
6070 {
6071         if (enable) {
6072                 /* CGCG/CGLS should be enabled after MGCG/MGLS/TS(CG/LS)
6073                  * ===  MGCG + MGLS + TS(CG/LS) ===
6074                  */
6075                 gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
6076                 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
6077         } else {
6078                 /* CGCG/CGLS should be disabled before MGCG/MGLS/TS(CG/LS)
6079                  * ===  CGCG + CGLS ===
6080                  */
6081                 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
6082                 gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
6083         }
6084         return 0;
6085 }
6086
6087 static int gfx_v8_0_tonga_update_gfx_clock_gating(struct amdgpu_device *adev,
6088                                           enum amd_clockgating_state state)
6089 {
6090         uint32_t msg_id, pp_state = 0;
6091         uint32_t pp_support_state = 0;
6092
6093         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) {
6094                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
6095                         pp_support_state = PP_STATE_SUPPORT_LS;
6096                         pp_state = PP_STATE_LS;
6097                 }
6098                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
6099                         pp_support_state |= PP_STATE_SUPPORT_CG;
6100                         pp_state |= PP_STATE_CG;
6101                 }
6102                 if (state == AMD_CG_STATE_UNGATE)
6103                         pp_state = 0;
6104
6105                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6106                                 PP_BLOCK_GFX_CG,
6107                                 pp_support_state,
6108                                 pp_state);
6109                 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
6110                         amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
6111         }
6112
6113         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) {
6114                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
6115                         pp_support_state = PP_STATE_SUPPORT_LS;
6116                         pp_state = PP_STATE_LS;
6117                 }
6118
6119                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
6120                         pp_support_state |= PP_STATE_SUPPORT_CG;
6121                         pp_state |= PP_STATE_CG;
6122                 }
6123
6124                 if (state == AMD_CG_STATE_UNGATE)
6125                         pp_state = 0;
6126
6127                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6128                                 PP_BLOCK_GFX_MG,
6129                                 pp_support_state,
6130                                 pp_state);
6131                 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
6132                         amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
6133         }
6134
6135         return 0;
6136 }
6137
6138 static int gfx_v8_0_polaris_update_gfx_clock_gating(struct amdgpu_device *adev,
6139                                           enum amd_clockgating_state state)
6140 {
6141
6142         uint32_t msg_id, pp_state = 0;
6143         uint32_t pp_support_state = 0;
6144
6145         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) {
6146                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
6147                         pp_support_state = PP_STATE_SUPPORT_LS;
6148                         pp_state = PP_STATE_LS;
6149                 }
6150                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
6151                         pp_support_state |= PP_STATE_SUPPORT_CG;
6152                         pp_state |= PP_STATE_CG;
6153                 }
6154                 if (state == AMD_CG_STATE_UNGATE)
6155                         pp_state = 0;
6156
6157                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6158                                 PP_BLOCK_GFX_CG,
6159                                 pp_support_state,
6160                                 pp_state);
6161                 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
6162                         amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
6163         }
6164
6165         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_3D_CGCG | AMD_CG_SUPPORT_GFX_3D_CGLS)) {
6166                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) {
6167                         pp_support_state = PP_STATE_SUPPORT_LS;
6168                         pp_state = PP_STATE_LS;
6169                 }
6170                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG) {
6171                         pp_support_state |= PP_STATE_SUPPORT_CG;
6172                         pp_state |= PP_STATE_CG;
6173                 }
6174                 if (state == AMD_CG_STATE_UNGATE)
6175                         pp_state = 0;
6176
6177                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6178                                 PP_BLOCK_GFX_3D,
6179                                 pp_support_state,
6180                                 pp_state);
6181                 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
6182                         amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
6183         }
6184
6185         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) {
6186                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
6187                         pp_support_state = PP_STATE_SUPPORT_LS;
6188                         pp_state = PP_STATE_LS;
6189                 }
6190
6191                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
6192                         pp_support_state |= PP_STATE_SUPPORT_CG;
6193                         pp_state |= PP_STATE_CG;
6194                 }
6195
6196                 if (state == AMD_CG_STATE_UNGATE)
6197                         pp_state = 0;
6198
6199                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6200                                 PP_BLOCK_GFX_MG,
6201                                 pp_support_state,
6202                                 pp_state);
6203                 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
6204                         amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
6205         }
6206
6207         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
6208                 pp_support_state = PP_STATE_SUPPORT_LS;
6209
6210                 if (state == AMD_CG_STATE_UNGATE)
6211                         pp_state = 0;
6212                 else
6213                         pp_state = PP_STATE_LS;
6214
6215                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6216                                 PP_BLOCK_GFX_RLC,
6217                                 pp_support_state,
6218                                 pp_state);
6219                 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
6220                         amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
6221         }
6222
6223         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
6224                 pp_support_state = PP_STATE_SUPPORT_LS;
6225
6226                 if (state == AMD_CG_STATE_UNGATE)
6227                         pp_state = 0;
6228                 else
6229                         pp_state = PP_STATE_LS;
6230                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6231                         PP_BLOCK_GFX_CP,
6232                         pp_support_state,
6233                         pp_state);
6234                 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
6235                         amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
6236         }
6237
6238         return 0;
6239 }
6240
6241 static int gfx_v8_0_set_clockgating_state(void *handle,
6242                                           enum amd_clockgating_state state)
6243 {
6244         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
6245
6246         if (amdgpu_sriov_vf(adev))
6247                 return 0;
6248
6249         switch (adev->asic_type) {
6250         case CHIP_FIJI:
6251         case CHIP_CARRIZO:
6252         case CHIP_STONEY:
6253                 gfx_v8_0_update_gfx_clock_gating(adev,
6254                                                  state == AMD_CG_STATE_GATE);
6255                 break;
6256         case CHIP_TONGA:
6257                 gfx_v8_0_tonga_update_gfx_clock_gating(adev, state);
6258                 break;
6259         case CHIP_POLARIS10:
6260         case CHIP_POLARIS11:
6261         case CHIP_POLARIS12:
6262         case CHIP_VEGAM:
6263                 gfx_v8_0_polaris_update_gfx_clock_gating(adev, state);
6264                 break;
6265         default:
6266                 break;
6267         }
6268         return 0;
6269 }
6270
6271 static u64 gfx_v8_0_ring_get_rptr(struct amdgpu_ring *ring)
6272 {
6273         return ring->adev->wb.wb[ring->rptr_offs];
6274 }
6275
6276 static u64 gfx_v8_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
6277 {
6278         struct amdgpu_device *adev = ring->adev;
6279
6280         if (ring->use_doorbell)
6281                 /* XXX check if swapping is necessary on BE */
6282                 return ring->adev->wb.wb[ring->wptr_offs];
6283         else
6284                 return RREG32(mmCP_RB0_WPTR);
6285 }
6286
6287 static void gfx_v8_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
6288 {
6289         struct amdgpu_device *adev = ring->adev;
6290
6291         if (ring->use_doorbell) {
6292                 /* XXX check if swapping is necessary on BE */
6293                 adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
6294                 WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
6295         } else {
6296                 WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
6297                 (void)RREG32(mmCP_RB0_WPTR);
6298         }
6299 }
6300
6301 static void gfx_v8_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
6302 {
6303         u32 ref_and_mask, reg_mem_engine;
6304
6305         if ((ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) ||
6306             (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)) {
6307                 switch (ring->me) {
6308                 case 1:
6309                         ref_and_mask = GPU_HDP_FLUSH_DONE__CP2_MASK << ring->pipe;
6310                         break;
6311                 case 2:
6312                         ref_and_mask = GPU_HDP_FLUSH_DONE__CP6_MASK << ring->pipe;
6313                         break;
6314                 default:
6315                         return;
6316                 }
6317                 reg_mem_engine = 0;
6318         } else {
6319                 ref_and_mask = GPU_HDP_FLUSH_DONE__CP0_MASK;
6320                 reg_mem_engine = WAIT_REG_MEM_ENGINE(1); /* pfp */
6321         }
6322
6323         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6324         amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
6325                                  WAIT_REG_MEM_FUNCTION(3) |  /* == */
6326                                  reg_mem_engine));
6327         amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_REQ);
6328         amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_DONE);
6329         amdgpu_ring_write(ring, ref_and_mask);
6330         amdgpu_ring_write(ring, ref_and_mask);
6331         amdgpu_ring_write(ring, 0x20); /* poll interval */
6332 }
6333
6334 static void gfx_v8_0_ring_emit_vgt_flush(struct amdgpu_ring *ring)
6335 {
6336         amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
6337         amdgpu_ring_write(ring, EVENT_TYPE(VS_PARTIAL_FLUSH) |
6338                 EVENT_INDEX(4));
6339
6340         amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
6341         amdgpu_ring_write(ring, EVENT_TYPE(VGT_FLUSH) |
6342                 EVENT_INDEX(0));
6343 }
6344
6345 static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
6346                                       struct amdgpu_ib *ib,
6347                                       unsigned vmid, bool ctx_switch)
6348 {
6349         u32 header, control = 0;
6350
6351         if (ib->flags & AMDGPU_IB_FLAG_CE)
6352                 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
6353         else
6354                 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
6355
6356         control |= ib->length_dw | (vmid << 24);
6357
6358         if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) {
6359                 control |= INDIRECT_BUFFER_PRE_ENB(1);
6360
6361                 if (!(ib->flags & AMDGPU_IB_FLAG_CE))
6362                         gfx_v8_0_ring_emit_de_meta(ring);
6363         }
6364
6365         amdgpu_ring_write(ring, header);
6366         amdgpu_ring_write(ring,
6367 #ifdef __BIG_ENDIAN
6368                           (2 << 0) |
6369 #endif
6370                           (ib->gpu_addr & 0xFFFFFFFC));
6371         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6372         amdgpu_ring_write(ring, control);
6373 }
6374
6375 static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
6376                                           struct amdgpu_ib *ib,
6377                                           unsigned vmid, bool ctx_switch)
6378 {
6379         u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
6380
6381         amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
6382         amdgpu_ring_write(ring,
6383 #ifdef __BIG_ENDIAN
6384                                 (2 << 0) |
6385 #endif
6386                                 (ib->gpu_addr & 0xFFFFFFFC));
6387         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6388         amdgpu_ring_write(ring, control);
6389 }
6390
6391 static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr,
6392                                          u64 seq, unsigned flags)
6393 {
6394         bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6395         bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6396
6397         /* EVENT_WRITE_EOP - flush caches, send int */
6398         amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
6399         amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6400                                  EOP_TC_ACTION_EN |
6401                                  EOP_TC_WB_ACTION_EN |
6402                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6403                                  EVENT_INDEX(5)));
6404         amdgpu_ring_write(ring, addr & 0xfffffffc);
6405         amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
6406                           DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
6407         amdgpu_ring_write(ring, lower_32_bits(seq));
6408         amdgpu_ring_write(ring, upper_32_bits(seq));
6409
6410 }
6411
6412 static void gfx_v8_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
6413 {
6414         int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
6415         uint32_t seq = ring->fence_drv.sync_seq;
6416         uint64_t addr = ring->fence_drv.gpu_addr;
6417
6418         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6419         amdgpu_ring_write(ring, (WAIT_REG_MEM_MEM_SPACE(1) | /* memory */
6420                                  WAIT_REG_MEM_FUNCTION(3) | /* equal */
6421                                  WAIT_REG_MEM_ENGINE(usepfp))); /* pfp or me */
6422         amdgpu_ring_write(ring, addr & 0xfffffffc);
6423         amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
6424         amdgpu_ring_write(ring, seq);
6425         amdgpu_ring_write(ring, 0xffffffff);
6426         amdgpu_ring_write(ring, 4); /* poll interval */
6427 }
6428
6429 static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
6430                                         unsigned vmid, uint64_t pd_addr)
6431 {
6432         int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
6433
6434         amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
6435
6436         /* wait for the invalidate to complete */
6437         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6438         amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
6439                                  WAIT_REG_MEM_FUNCTION(0) |  /* always */
6440                                  WAIT_REG_MEM_ENGINE(0))); /* me */
6441         amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
6442         amdgpu_ring_write(ring, 0);
6443         amdgpu_ring_write(ring, 0); /* ref */
6444         amdgpu_ring_write(ring, 0); /* mask */
6445         amdgpu_ring_write(ring, 0x20); /* poll interval */
6446
6447         /* compute doesn't have PFP */
6448         if (usepfp) {
6449                 /* sync PFP to ME, otherwise we might get invalid PFP reads */
6450                 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
6451                 amdgpu_ring_write(ring, 0x0);
6452         }
6453 }
6454
6455 static u64 gfx_v8_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
6456 {
6457         return ring->adev->wb.wb[ring->wptr_offs];
6458 }
6459
6460 static void gfx_v8_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
6461 {
6462         struct amdgpu_device *adev = ring->adev;
6463
6464         /* XXX check if swapping is necessary on BE */
6465         adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
6466         WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
6467 }
6468
6469 static void gfx_v8_0_ring_set_pipe_percent(struct amdgpu_ring *ring,
6470                                            bool acquire)
6471 {
6472         struct amdgpu_device *adev = ring->adev;
6473         int pipe_num, tmp, reg;
6474         int pipe_percent = acquire ? SPI_WCL_PIPE_PERCENT_GFX__VALUE_MASK : 0x1;
6475
6476         pipe_num = ring->me * adev->gfx.mec.num_pipe_per_mec + ring->pipe;
6477
6478         /* first me only has 2 entries, GFX and HP3D */
6479         if (ring->me > 0)
6480                 pipe_num -= 2;
6481
6482         reg = mmSPI_WCL_PIPE_PERCENT_GFX + pipe_num;
6483         tmp = RREG32(reg);
6484         tmp = REG_SET_FIELD(tmp, SPI_WCL_PIPE_PERCENT_GFX, VALUE, pipe_percent);
6485         WREG32(reg, tmp);
6486 }
6487
6488 static void gfx_v8_0_pipe_reserve_resources(struct amdgpu_device *adev,
6489                                             struct amdgpu_ring *ring,
6490                                             bool acquire)
6491 {
6492         int i, pipe;
6493         bool reserve;
6494         struct amdgpu_ring *iring;
6495
6496         mutex_lock(&adev->gfx.pipe_reserve_mutex);
6497         pipe = amdgpu_gfx_queue_to_bit(adev, ring->me, ring->pipe, 0);
6498         if (acquire)
6499                 set_bit(pipe, adev->gfx.pipe_reserve_bitmap);
6500         else
6501                 clear_bit(pipe, adev->gfx.pipe_reserve_bitmap);
6502
6503         if (!bitmap_weight(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES)) {
6504                 /* Clear all reservations - everyone reacquires all resources */
6505                 for (i = 0; i < adev->gfx.num_gfx_rings; ++i)
6506                         gfx_v8_0_ring_set_pipe_percent(&adev->gfx.gfx_ring[i],
6507                                                        true);
6508
6509                 for (i = 0; i < adev->gfx.num_compute_rings; ++i)
6510                         gfx_v8_0_ring_set_pipe_percent(&adev->gfx.compute_ring[i],
6511                                                        true);
6512         } else {
6513                 /* Lower all pipes without a current reservation */
6514                 for (i = 0; i < adev->gfx.num_gfx_rings; ++i) {
6515                         iring = &adev->gfx.gfx_ring[i];
6516                         pipe = amdgpu_gfx_queue_to_bit(adev,
6517                                                        iring->me,
6518                                                        iring->pipe,
6519                                                        0);
6520                         reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap);
6521                         gfx_v8_0_ring_set_pipe_percent(iring, reserve);
6522                 }
6523
6524                 for (i = 0; i < adev->gfx.num_compute_rings; ++i) {
6525                         iring = &adev->gfx.compute_ring[i];
6526                         pipe = amdgpu_gfx_queue_to_bit(adev,
6527                                                        iring->me,
6528                                                        iring->pipe,
6529                                                        0);
6530                         reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap);
6531                         gfx_v8_0_ring_set_pipe_percent(iring, reserve);
6532                 }
6533         }
6534
6535         mutex_unlock(&adev->gfx.pipe_reserve_mutex);
6536 }
6537
6538 static void gfx_v8_0_hqd_set_priority(struct amdgpu_device *adev,
6539                                       struct amdgpu_ring *ring,
6540                                       bool acquire)
6541 {
6542         uint32_t pipe_priority = acquire ? 0x2 : 0x0;
6543         uint32_t queue_priority = acquire ? 0xf : 0x0;
6544
6545         mutex_lock(&adev->srbm_mutex);
6546         vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
6547
6548         WREG32(mmCP_HQD_PIPE_PRIORITY, pipe_priority);
6549         WREG32(mmCP_HQD_QUEUE_PRIORITY, queue_priority);
6550
6551         vi_srbm_select(adev, 0, 0, 0, 0);
6552         mutex_unlock(&adev->srbm_mutex);
6553 }
6554 static void gfx_v8_0_ring_set_priority_compute(struct amdgpu_ring *ring,
6555                                                enum drm_sched_priority priority)
6556 {
6557         struct amdgpu_device *adev = ring->adev;
6558         bool acquire = priority == DRM_SCHED_PRIORITY_HIGH_HW;
6559
6560         if (ring->funcs->type != AMDGPU_RING_TYPE_COMPUTE)
6561                 return;
6562
6563         gfx_v8_0_hqd_set_priority(adev, ring, acquire);
6564         gfx_v8_0_pipe_reserve_resources(adev, ring, acquire);
6565 }
6566
6567 static void gfx_v8_0_ring_emit_fence_compute(struct amdgpu_ring *ring,
6568                                              u64 addr, u64 seq,
6569                                              unsigned flags)
6570 {
6571         bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6572         bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6573
6574         /* RELEASE_MEM - flush caches, send int */
6575         amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
6576         amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6577                                  EOP_TC_ACTION_EN |
6578                                  EOP_TC_WB_ACTION_EN |
6579                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6580                                  EVENT_INDEX(5)));
6581         amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
6582         amdgpu_ring_write(ring, addr & 0xfffffffc);
6583         amdgpu_ring_write(ring, upper_32_bits(addr));
6584         amdgpu_ring_write(ring, lower_32_bits(seq));
6585         amdgpu_ring_write(ring, upper_32_bits(seq));
6586 }
6587
6588 static void gfx_v8_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
6589                                          u64 seq, unsigned int flags)
6590 {
6591         /* we only allocate 32bit for each seq wb address */
6592         BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
6593
6594         /* write fence seq to the "addr" */
6595         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6596         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6597                                  WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
6598         amdgpu_ring_write(ring, lower_32_bits(addr));
6599         amdgpu_ring_write(ring, upper_32_bits(addr));
6600         amdgpu_ring_write(ring, lower_32_bits(seq));
6601
6602         if (flags & AMDGPU_FENCE_FLAG_INT) {
6603                 /* set register to trigger INT */
6604                 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6605                 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6606                                          WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
6607                 amdgpu_ring_write(ring, mmCPC_INT_STATUS);
6608                 amdgpu_ring_write(ring, 0);
6609                 amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
6610         }
6611 }
6612
6613 static void gfx_v8_ring_emit_sb(struct amdgpu_ring *ring)
6614 {
6615         amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
6616         amdgpu_ring_write(ring, 0);
6617 }
6618
6619 static void gfx_v8_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
6620 {
6621         uint32_t dw2 = 0;
6622
6623         if (amdgpu_sriov_vf(ring->adev))
6624                 gfx_v8_0_ring_emit_ce_meta(ring);
6625
6626         dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
6627         if (flags & AMDGPU_HAVE_CTX_SWITCH) {
6628                 gfx_v8_0_ring_emit_vgt_flush(ring);
6629                 /* set load_global_config & load_global_uconfig */
6630                 dw2 |= 0x8001;
6631                 /* set load_cs_sh_regs */
6632                 dw2 |= 0x01000000;
6633                 /* set load_per_context_state & load_gfx_sh_regs for GFX */
6634                 dw2 |= 0x10002;
6635
6636                 /* set load_ce_ram if preamble presented */
6637                 if (AMDGPU_PREAMBLE_IB_PRESENT & flags)
6638                         dw2 |= 0x10000000;
6639         } else {
6640                 /* still load_ce_ram if this is the first time preamble presented
6641                  * although there is no context switch happens.
6642                  */
6643                 if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags)
6644                         dw2 |= 0x10000000;
6645         }
6646
6647         amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
6648         amdgpu_ring_write(ring, dw2);
6649         amdgpu_ring_write(ring, 0);
6650 }
6651
6652 static unsigned gfx_v8_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring)
6653 {
6654         unsigned ret;
6655
6656         amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
6657         amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
6658         amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
6659         amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */
6660         ret = ring->wptr & ring->buf_mask;
6661         amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */
6662         return ret;
6663 }
6664
6665 static void gfx_v8_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset)
6666 {
6667         unsigned cur;
6668
6669         BUG_ON(offset > ring->buf_mask);
6670         BUG_ON(ring->ring[offset] != 0x55aa55aa);
6671
6672         cur = (ring->wptr & ring->buf_mask) - 1;
6673         if (likely(cur > offset))
6674                 ring->ring[offset] = cur - offset;
6675         else
6676                 ring->ring[offset] = (ring->ring_size >> 2) - offset + cur;
6677 }
6678
6679 static void gfx_v8_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg)
6680 {
6681         struct amdgpu_device *adev = ring->adev;
6682
6683         amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
6684         amdgpu_ring_write(ring, 0 |     /* src: register*/
6685                                 (5 << 8) |      /* dst: memory */
6686                                 (1 << 20));     /* write confirm */
6687         amdgpu_ring_write(ring, reg);
6688         amdgpu_ring_write(ring, 0);
6689         amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
6690                                 adev->virt.reg_val_offs * 4));
6691         amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
6692                                 adev->virt.reg_val_offs * 4));
6693 }
6694
6695 static void gfx_v8_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
6696                                   uint32_t val)
6697 {
6698         uint32_t cmd;
6699
6700         switch (ring->funcs->type) {
6701         case AMDGPU_RING_TYPE_GFX:
6702                 cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM;
6703                 break;
6704         case AMDGPU_RING_TYPE_KIQ:
6705                 cmd = 1 << 16; /* no inc addr */
6706                 break;
6707         default:
6708                 cmd = WR_CONFIRM;
6709                 break;
6710         }
6711
6712         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6713         amdgpu_ring_write(ring, cmd);
6714         amdgpu_ring_write(ring, reg);
6715         amdgpu_ring_write(ring, 0);
6716         amdgpu_ring_write(ring, val);
6717 }
6718
6719 static void gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
6720                                                  enum amdgpu_interrupt_state state)
6721 {
6722         WREG32_FIELD(CP_INT_CNTL_RING0, TIME_STAMP_INT_ENABLE,
6723                      state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6724 }
6725
6726 static void gfx_v8_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
6727                                                      int me, int pipe,
6728                                                      enum amdgpu_interrupt_state state)
6729 {
6730         u32 mec_int_cntl, mec_int_cntl_reg;
6731
6732         /*
6733          * amdgpu controls only the first MEC. That's why this function only
6734          * handles the setting of interrupts for this specific MEC. All other
6735          * pipes' interrupts are set by amdkfd.
6736          */
6737
6738         if (me == 1) {
6739                 switch (pipe) {
6740                 case 0:
6741                         mec_int_cntl_reg = mmCP_ME1_PIPE0_INT_CNTL;
6742                         break;
6743                 case 1:
6744                         mec_int_cntl_reg = mmCP_ME1_PIPE1_INT_CNTL;
6745                         break;
6746                 case 2:
6747                         mec_int_cntl_reg = mmCP_ME1_PIPE2_INT_CNTL;
6748                         break;
6749                 case 3:
6750                         mec_int_cntl_reg = mmCP_ME1_PIPE3_INT_CNTL;
6751                         break;
6752                 default:
6753                         DRM_DEBUG("invalid pipe %d\n", pipe);
6754                         return;
6755                 }
6756         } else {
6757                 DRM_DEBUG("invalid me %d\n", me);
6758                 return;
6759         }
6760
6761         switch (state) {
6762         case AMDGPU_IRQ_STATE_DISABLE:
6763                 mec_int_cntl = RREG32(mec_int_cntl_reg);
6764                 mec_int_cntl &= ~CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK;
6765                 WREG32(mec_int_cntl_reg, mec_int_cntl);
6766                 break;
6767         case AMDGPU_IRQ_STATE_ENABLE:
6768                 mec_int_cntl = RREG32(mec_int_cntl_reg);
6769                 mec_int_cntl |= CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK;
6770                 WREG32(mec_int_cntl_reg, mec_int_cntl);
6771                 break;
6772         default:
6773                 break;
6774         }
6775 }
6776
6777 static int gfx_v8_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
6778                                              struct amdgpu_irq_src *source,
6779                                              unsigned type,
6780                                              enum amdgpu_interrupt_state state)
6781 {
6782         WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_REG_INT_ENABLE,
6783                      state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6784
6785         return 0;
6786 }
6787
6788 static int gfx_v8_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
6789                                               struct amdgpu_irq_src *source,
6790                                               unsigned type,
6791                                               enum amdgpu_interrupt_state state)
6792 {
6793         WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_INSTR_INT_ENABLE,
6794                      state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6795
6796         return 0;
6797 }
6798
6799 static int gfx_v8_0_set_eop_interrupt_state(struct amdgpu_device *adev,
6800                                             struct amdgpu_irq_src *src,
6801                                             unsigned type,
6802                                             enum amdgpu_interrupt_state state)
6803 {
6804         switch (type) {
6805         case AMDGPU_CP_IRQ_GFX_EOP:
6806                 gfx_v8_0_set_gfx_eop_interrupt_state(adev, state);
6807                 break;
6808         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
6809                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
6810                 break;
6811         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
6812                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
6813                 break;
6814         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
6815                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
6816                 break;
6817         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
6818                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
6819                 break;
6820         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
6821                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
6822                 break;
6823         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
6824                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
6825                 break;
6826         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
6827                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
6828                 break;
6829         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
6830                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
6831                 break;
6832         default:
6833                 break;
6834         }
6835         return 0;
6836 }
6837
6838 static int gfx_v8_0_set_cp_ecc_int_state(struct amdgpu_device *adev,
6839                                          struct amdgpu_irq_src *source,
6840                                          unsigned int type,
6841                                          enum amdgpu_interrupt_state state)
6842 {
6843         int enable_flag;
6844
6845         switch (state) {
6846         case AMDGPU_IRQ_STATE_DISABLE:
6847                 enable_flag = 0;
6848                 break;
6849
6850         case AMDGPU_IRQ_STATE_ENABLE:
6851                 enable_flag = 1;
6852                 break;
6853
6854         default:
6855                 return -EINVAL;
6856         }
6857
6858         WREG32_FIELD(CP_INT_CNTL, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6859         WREG32_FIELD(CP_INT_CNTL_RING0, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6860         WREG32_FIELD(CP_INT_CNTL_RING1, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6861         WREG32_FIELD(CP_INT_CNTL_RING2, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6862         WREG32_FIELD(CPC_INT_CNTL, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6863         WREG32_FIELD(CP_ME1_PIPE0_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6864                      enable_flag);
6865         WREG32_FIELD(CP_ME1_PIPE1_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6866                      enable_flag);
6867         WREG32_FIELD(CP_ME1_PIPE2_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6868                      enable_flag);
6869         WREG32_FIELD(CP_ME1_PIPE3_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6870                      enable_flag);
6871         WREG32_FIELD(CP_ME2_PIPE0_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6872                      enable_flag);
6873         WREG32_FIELD(CP_ME2_PIPE1_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6874                      enable_flag);
6875         WREG32_FIELD(CP_ME2_PIPE2_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6876                      enable_flag);
6877         WREG32_FIELD(CP_ME2_PIPE3_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6878                      enable_flag);
6879
6880         return 0;
6881 }
6882
6883 static int gfx_v8_0_set_sq_int_state(struct amdgpu_device *adev,
6884                                      struct amdgpu_irq_src *source,
6885                                      unsigned int type,
6886                                      enum amdgpu_interrupt_state state)
6887 {
6888         int enable_flag;
6889
6890         switch (state) {
6891         case AMDGPU_IRQ_STATE_DISABLE:
6892                 enable_flag = 1;
6893                 break;
6894
6895         case AMDGPU_IRQ_STATE_ENABLE:
6896                 enable_flag = 0;
6897                 break;
6898
6899         default:
6900                 return -EINVAL;
6901         }
6902
6903         WREG32_FIELD(SQ_INTERRUPT_MSG_CTRL, STALL,
6904                      enable_flag);
6905
6906         return 0;
6907 }
6908
6909 static int gfx_v8_0_eop_irq(struct amdgpu_device *adev,
6910                             struct amdgpu_irq_src *source,
6911                             struct amdgpu_iv_entry *entry)
6912 {
6913         int i;
6914         u8 me_id, pipe_id, queue_id;
6915         struct amdgpu_ring *ring;
6916
6917         DRM_DEBUG("IH: CP EOP\n");
6918         me_id = (entry->ring_id & 0x0c) >> 2;
6919         pipe_id = (entry->ring_id & 0x03) >> 0;
6920         queue_id = (entry->ring_id & 0x70) >> 4;
6921
6922         switch (me_id) {
6923         case 0:
6924                 amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
6925                 break;
6926         case 1:
6927         case 2:
6928                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
6929                         ring = &adev->gfx.compute_ring[i];
6930                         /* Per-queue interrupt is supported for MEC starting from VI.
6931                           * The interrupt can only be enabled/disabled per pipe instead of per queue.
6932                           */
6933                         if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
6934                                 amdgpu_fence_process(ring);
6935                 }
6936                 break;
6937         }
6938         return 0;
6939 }
6940
6941 static int gfx_v8_0_priv_reg_irq(struct amdgpu_device *adev,
6942                                  struct amdgpu_irq_src *source,
6943                                  struct amdgpu_iv_entry *entry)
6944 {
6945         DRM_ERROR("Illegal register access in command stream\n");
6946         schedule_work(&adev->reset_work);
6947         return 0;
6948 }
6949
6950 static int gfx_v8_0_priv_inst_irq(struct amdgpu_device *adev,
6951                                   struct amdgpu_irq_src *source,
6952                                   struct amdgpu_iv_entry *entry)
6953 {
6954         DRM_ERROR("Illegal instruction in command stream\n");
6955         schedule_work(&adev->reset_work);
6956         return 0;
6957 }
6958
6959 static int gfx_v8_0_cp_ecc_error_irq(struct amdgpu_device *adev,
6960                                      struct amdgpu_irq_src *source,
6961                                      struct amdgpu_iv_entry *entry)
6962 {
6963         DRM_ERROR("CP EDC/ECC error detected.");
6964         return 0;
6965 }
6966
6967 static void gfx_v8_0_parse_sq_irq(struct amdgpu_device *adev, unsigned ih_data)
6968 {
6969         u32 enc, se_id, sh_id, cu_id;
6970         char type[20];
6971         int sq_edc_source = -1;
6972
6973         enc = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_CMN, ENCODING);
6974         se_id = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_CMN, SE_ID);
6975
6976         switch (enc) {
6977                 case 0:
6978                         DRM_INFO("SQ general purpose intr detected:"
6979                                         "se_id %d, immed_overflow %d, host_reg_overflow %d,"
6980                                         "host_cmd_overflow %d, cmd_timestamp %d,"
6981                                         "reg_timestamp %d, thread_trace_buff_full %d,"
6982                                         "wlt %d, thread_trace %d.\n",
6983                                         se_id,
6984                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, IMMED_OVERFLOW),
6985                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, HOST_REG_OVERFLOW),
6986                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, HOST_CMD_OVERFLOW),
6987                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, CMD_TIMESTAMP),
6988                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, REG_TIMESTAMP),
6989                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, THREAD_TRACE_BUF_FULL),
6990                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, WLT),
6991                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, THREAD_TRACE)
6992                                         );
6993                         break;
6994                 case 1:
6995                 case 2:
6996
6997                         cu_id = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, CU_ID);
6998                         sh_id = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, SH_ID);
6999
7000                         /*
7001                          * This function can be called either directly from ISR
7002                          * or from BH in which case we can access SQ_EDC_INFO
7003                          * instance
7004                          */
7005                         if (in_task()) {
7006                                 mutex_lock(&adev->grbm_idx_mutex);
7007                                 gfx_v8_0_select_se_sh(adev, se_id, sh_id, cu_id);
7008
7009                                 sq_edc_source = REG_GET_FIELD(RREG32(mmSQ_EDC_INFO), SQ_EDC_INFO, SOURCE);
7010
7011                                 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
7012                                 mutex_unlock(&adev->grbm_idx_mutex);
7013                         }
7014
7015                         if (enc == 1)
7016                                 sprintf(type, "instruction intr");
7017                         else
7018                                 sprintf(type, "EDC/ECC error");
7019
7020                         DRM_INFO(
7021                                 "SQ %s detected: "
7022                                         "se_id %d, sh_id %d, cu_id %d, simd_id %d, wave_id %d, vm_id %d "
7023                                         "trap %s, sq_ed_info.source %s.\n",
7024                                         type, se_id, sh_id, cu_id,
7025                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, SIMD_ID),
7026                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, WAVE_ID),
7027                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, VM_ID),
7028                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, PRIV) ? "true" : "false",
7029                                         (sq_edc_source != -1) ? sq_edc_source_names[sq_edc_source] : "unavailable"
7030                                 );
7031                         break;
7032                 default:
7033                         DRM_ERROR("SQ invalid encoding type\n.");
7034         }
7035 }
7036
7037 static void gfx_v8_0_sq_irq_work_func(struct work_struct *work)
7038 {
7039
7040         struct amdgpu_device *adev = container_of(work, struct amdgpu_device, gfx.sq_work.work);
7041         struct sq_work *sq_work = container_of(work, struct sq_work, work);
7042
7043         gfx_v8_0_parse_sq_irq(adev, sq_work->ih_data);
7044 }
7045
7046 static int gfx_v8_0_sq_irq(struct amdgpu_device *adev,
7047                            struct amdgpu_irq_src *source,
7048                            struct amdgpu_iv_entry *entry)
7049 {
7050         unsigned ih_data = entry->src_data[0];
7051
7052         /*
7053          * Try to submit work so SQ_EDC_INFO can be accessed from
7054          * BH. If previous work submission hasn't finished yet
7055          * just print whatever info is possible directly from the ISR.
7056          */
7057         if (work_pending(&adev->gfx.sq_work.work)) {
7058                 gfx_v8_0_parse_sq_irq(adev, ih_data);
7059         } else {
7060                 adev->gfx.sq_work.ih_data = ih_data;
7061                 schedule_work(&adev->gfx.sq_work.work);
7062         }
7063
7064         return 0;
7065 }
7066
7067 static int gfx_v8_0_kiq_set_interrupt_state(struct amdgpu_device *adev,
7068                                             struct amdgpu_irq_src *src,
7069                                             unsigned int type,
7070                                             enum amdgpu_interrupt_state state)
7071 {
7072         struct amdgpu_ring *ring = &(adev->gfx.kiq.ring);
7073
7074         switch (type) {
7075         case AMDGPU_CP_KIQ_IRQ_DRIVER0:
7076                 WREG32_FIELD(CPC_INT_CNTL, GENERIC2_INT_ENABLE,
7077                              state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
7078                 if (ring->me == 1)
7079                         WREG32_FIELD_OFFSET(CP_ME1_PIPE0_INT_CNTL,
7080                                      ring->pipe,
7081                                      GENERIC2_INT_ENABLE,
7082                                      state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
7083                 else
7084                         WREG32_FIELD_OFFSET(CP_ME2_PIPE0_INT_CNTL,
7085                                      ring->pipe,
7086                                      GENERIC2_INT_ENABLE,
7087                                      state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
7088                 break;
7089         default:
7090                 BUG(); /* kiq only support GENERIC2_INT now */
7091                 break;
7092         }
7093         return 0;
7094 }
7095
7096 static int gfx_v8_0_kiq_irq(struct amdgpu_device *adev,
7097                             struct amdgpu_irq_src *source,
7098                             struct amdgpu_iv_entry *entry)
7099 {
7100         u8 me_id, pipe_id, queue_id;
7101         struct amdgpu_ring *ring = &(adev->gfx.kiq.ring);
7102
7103         me_id = (entry->ring_id & 0x0c) >> 2;
7104         pipe_id = (entry->ring_id & 0x03) >> 0;
7105         queue_id = (entry->ring_id & 0x70) >> 4;
7106         DRM_DEBUG("IH: CPC GENERIC2_INT, me:%d, pipe:%d, queue:%d\n",
7107                    me_id, pipe_id, queue_id);
7108
7109         amdgpu_fence_process(ring);
7110         return 0;
7111 }
7112
7113 static const struct amd_ip_funcs gfx_v8_0_ip_funcs = {
7114         .name = "gfx_v8_0",
7115         .early_init = gfx_v8_0_early_init,
7116         .late_init = gfx_v8_0_late_init,
7117         .sw_init = gfx_v8_0_sw_init,
7118         .sw_fini = gfx_v8_0_sw_fini,
7119         .hw_init = gfx_v8_0_hw_init,
7120         .hw_fini = gfx_v8_0_hw_fini,
7121         .suspend = gfx_v8_0_suspend,
7122         .resume = gfx_v8_0_resume,
7123         .is_idle = gfx_v8_0_is_idle,
7124         .wait_for_idle = gfx_v8_0_wait_for_idle,
7125         .check_soft_reset = gfx_v8_0_check_soft_reset,
7126         .pre_soft_reset = gfx_v8_0_pre_soft_reset,
7127         .soft_reset = gfx_v8_0_soft_reset,
7128         .post_soft_reset = gfx_v8_0_post_soft_reset,
7129         .set_clockgating_state = gfx_v8_0_set_clockgating_state,
7130         .set_powergating_state = gfx_v8_0_set_powergating_state,
7131         .get_clockgating_state = gfx_v8_0_get_clockgating_state,
7132 };
7133
7134 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
7135         .type = AMDGPU_RING_TYPE_GFX,
7136         .align_mask = 0xff,
7137         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
7138         .support_64bit_ptrs = false,
7139         .get_rptr = gfx_v8_0_ring_get_rptr,
7140         .get_wptr = gfx_v8_0_ring_get_wptr_gfx,
7141         .set_wptr = gfx_v8_0_ring_set_wptr_gfx,
7142         .emit_frame_size = /* maximum 215dw if count 16 IBs in */
7143                 5 +  /* COND_EXEC */
7144                 7 +  /* PIPELINE_SYNC */
7145                 VI_FLUSH_GPU_TLB_NUM_WREG * 5 + 9 + /* VM_FLUSH */
7146                 8 +  /* FENCE for VM_FLUSH */
7147                 20 + /* GDS switch */
7148                 4 + /* double SWITCH_BUFFER,
7149                        the first COND_EXEC jump to the place just
7150                            prior to this double SWITCH_BUFFER  */
7151                 5 + /* COND_EXEC */
7152                 7 +      /*     HDP_flush */
7153                 4 +      /*     VGT_flush */
7154                 14 + /* CE_META */
7155                 31 + /* DE_META */
7156                 3 + /* CNTX_CTRL */
7157                 5 + /* HDP_INVL */
7158                 8 + 8 + /* FENCE x2 */
7159                 2, /* SWITCH_BUFFER */
7160         .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_gfx */
7161         .emit_ib = gfx_v8_0_ring_emit_ib_gfx,
7162         .emit_fence = gfx_v8_0_ring_emit_fence_gfx,
7163         .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
7164         .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
7165         .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
7166         .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
7167         .test_ring = gfx_v8_0_ring_test_ring,
7168         .test_ib = gfx_v8_0_ring_test_ib,
7169         .insert_nop = amdgpu_ring_insert_nop,
7170         .pad_ib = amdgpu_ring_generic_pad_ib,
7171         .emit_switch_buffer = gfx_v8_ring_emit_sb,
7172         .emit_cntxcntl = gfx_v8_ring_emit_cntxcntl,
7173         .init_cond_exec = gfx_v8_0_ring_emit_init_cond_exec,
7174         .patch_cond_exec = gfx_v8_0_ring_emit_patch_cond_exec,
7175         .emit_wreg = gfx_v8_0_ring_emit_wreg,
7176 };
7177
7178 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
7179         .type = AMDGPU_RING_TYPE_COMPUTE,
7180         .align_mask = 0xff,
7181         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
7182         .support_64bit_ptrs = false,
7183         .get_rptr = gfx_v8_0_ring_get_rptr,
7184         .get_wptr = gfx_v8_0_ring_get_wptr_compute,
7185         .set_wptr = gfx_v8_0_ring_set_wptr_compute,
7186         .emit_frame_size =
7187                 20 + /* gfx_v8_0_ring_emit_gds_switch */
7188                 7 + /* gfx_v8_0_ring_emit_hdp_flush */
7189                 5 + /* hdp_invalidate */
7190                 7 + /* gfx_v8_0_ring_emit_pipeline_sync */
7191                 VI_FLUSH_GPU_TLB_NUM_WREG * 5 + 7 + /* gfx_v8_0_ring_emit_vm_flush */
7192                 7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_compute x3 for user fence, vm fence */
7193         .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_compute */
7194         .emit_ib = gfx_v8_0_ring_emit_ib_compute,
7195         .emit_fence = gfx_v8_0_ring_emit_fence_compute,
7196         .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
7197         .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
7198         .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
7199         .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
7200         .test_ring = gfx_v8_0_ring_test_ring,
7201         .test_ib = gfx_v8_0_ring_test_ib,
7202         .insert_nop = amdgpu_ring_insert_nop,
7203         .pad_ib = amdgpu_ring_generic_pad_ib,
7204         .set_priority = gfx_v8_0_ring_set_priority_compute,
7205         .emit_wreg = gfx_v8_0_ring_emit_wreg,
7206 };
7207
7208 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_kiq = {
7209         .type = AMDGPU_RING_TYPE_KIQ,
7210         .align_mask = 0xff,
7211         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
7212         .support_64bit_ptrs = false,
7213         .get_rptr = gfx_v8_0_ring_get_rptr,
7214         .get_wptr = gfx_v8_0_ring_get_wptr_compute,
7215         .set_wptr = gfx_v8_0_ring_set_wptr_compute,
7216         .emit_frame_size =
7217                 20 + /* gfx_v8_0_ring_emit_gds_switch */
7218                 7 + /* gfx_v8_0_ring_emit_hdp_flush */
7219                 5 + /* hdp_invalidate */
7220                 7 + /* gfx_v8_0_ring_emit_pipeline_sync */
7221                 17 + /* gfx_v8_0_ring_emit_vm_flush */
7222                 7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_kiq x3 for user fence, vm fence */
7223         .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_compute */
7224         .emit_ib = gfx_v8_0_ring_emit_ib_compute,
7225         .emit_fence = gfx_v8_0_ring_emit_fence_kiq,
7226         .test_ring = gfx_v8_0_ring_test_ring,
7227         .test_ib = gfx_v8_0_ring_test_ib,
7228         .insert_nop = amdgpu_ring_insert_nop,
7229         .pad_ib = amdgpu_ring_generic_pad_ib,
7230         .emit_rreg = gfx_v8_0_ring_emit_rreg,
7231         .emit_wreg = gfx_v8_0_ring_emit_wreg,
7232 };
7233
7234 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev)
7235 {
7236         int i;
7237
7238         adev->gfx.kiq.ring.funcs = &gfx_v8_0_ring_funcs_kiq;
7239
7240         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
7241                 adev->gfx.gfx_ring[i].funcs = &gfx_v8_0_ring_funcs_gfx;
7242
7243         for (i = 0; i < adev->gfx.num_compute_rings; i++)
7244                 adev->gfx.compute_ring[i].funcs = &gfx_v8_0_ring_funcs_compute;
7245 }
7246
7247 static const struct amdgpu_irq_src_funcs gfx_v8_0_eop_irq_funcs = {
7248         .set = gfx_v8_0_set_eop_interrupt_state,
7249         .process = gfx_v8_0_eop_irq,
7250 };
7251
7252 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_reg_irq_funcs = {
7253         .set = gfx_v8_0_set_priv_reg_fault_state,
7254         .process = gfx_v8_0_priv_reg_irq,
7255 };
7256
7257 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_inst_irq_funcs = {
7258         .set = gfx_v8_0_set_priv_inst_fault_state,
7259         .process = gfx_v8_0_priv_inst_irq,
7260 };
7261
7262 static const struct amdgpu_irq_src_funcs gfx_v8_0_kiq_irq_funcs = {
7263         .set = gfx_v8_0_kiq_set_interrupt_state,
7264         .process = gfx_v8_0_kiq_irq,
7265 };
7266
7267 static const struct amdgpu_irq_src_funcs gfx_v8_0_cp_ecc_error_irq_funcs = {
7268         .set = gfx_v8_0_set_cp_ecc_int_state,
7269         .process = gfx_v8_0_cp_ecc_error_irq,
7270 };
7271
7272 static const struct amdgpu_irq_src_funcs gfx_v8_0_sq_irq_funcs = {
7273         .set = gfx_v8_0_set_sq_int_state,
7274         .process = gfx_v8_0_sq_irq,
7275 };
7276
7277 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev)
7278 {
7279         adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
7280         adev->gfx.eop_irq.funcs = &gfx_v8_0_eop_irq_funcs;
7281
7282         adev->gfx.priv_reg_irq.num_types = 1;
7283         adev->gfx.priv_reg_irq.funcs = &gfx_v8_0_priv_reg_irq_funcs;
7284
7285         adev->gfx.priv_inst_irq.num_types = 1;
7286         adev->gfx.priv_inst_irq.funcs = &gfx_v8_0_priv_inst_irq_funcs;
7287
7288         adev->gfx.kiq.irq.num_types = AMDGPU_CP_KIQ_IRQ_LAST;
7289         adev->gfx.kiq.irq.funcs = &gfx_v8_0_kiq_irq_funcs;
7290
7291         adev->gfx.cp_ecc_error_irq.num_types = 1;
7292         adev->gfx.cp_ecc_error_irq.funcs = &gfx_v8_0_cp_ecc_error_irq_funcs;
7293
7294         adev->gfx.sq_irq.num_types = 1;
7295         adev->gfx.sq_irq.funcs = &gfx_v8_0_sq_irq_funcs;
7296 }
7297
7298 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev)
7299 {
7300         adev->gfx.rlc.funcs = &iceland_rlc_funcs;
7301 }
7302
7303 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev)
7304 {
7305         /* init asci gds info */
7306         adev->gds.mem.total_size = RREG32(mmGDS_VMID0_SIZE);
7307         adev->gds.gws.total_size = 64;
7308         adev->gds.oa.total_size = 16;
7309
7310         if (adev->gds.mem.total_size == 64 * 1024) {
7311                 adev->gds.mem.gfx_partition_size = 4096;
7312                 adev->gds.mem.cs_partition_size = 4096;
7313
7314                 adev->gds.gws.gfx_partition_size = 4;
7315                 adev->gds.gws.cs_partition_size = 4;
7316
7317                 adev->gds.oa.gfx_partition_size = 4;
7318                 adev->gds.oa.cs_partition_size = 1;
7319         } else {
7320                 adev->gds.mem.gfx_partition_size = 1024;
7321                 adev->gds.mem.cs_partition_size = 1024;
7322
7323                 adev->gds.gws.gfx_partition_size = 16;
7324                 adev->gds.gws.cs_partition_size = 16;
7325
7326                 adev->gds.oa.gfx_partition_size = 4;
7327                 adev->gds.oa.cs_partition_size = 4;
7328         }
7329 }
7330
7331 static void gfx_v8_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
7332                                                  u32 bitmap)
7333 {
7334         u32 data;
7335
7336         if (!bitmap)
7337                 return;
7338
7339         data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
7340         data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
7341
7342         WREG32(mmGC_USER_SHADER_ARRAY_CONFIG, data);
7343 }
7344
7345 static u32 gfx_v8_0_get_cu_active_bitmap(struct amdgpu_device *adev)
7346 {
7347         u32 data, mask;
7348
7349         data =  RREG32(mmCC_GC_SHADER_ARRAY_CONFIG) |
7350                 RREG32(mmGC_USER_SHADER_ARRAY_CONFIG);
7351
7352         mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh);
7353
7354         return ~REG_GET_FIELD(data, CC_GC_SHADER_ARRAY_CONFIG, INACTIVE_CUS) & mask;
7355 }
7356
7357 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev)
7358 {
7359         int i, j, k, counter, active_cu_number = 0;
7360         u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
7361         struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
7362         unsigned disable_masks[4 * 2];
7363         u32 ao_cu_num;
7364
7365         memset(cu_info, 0, sizeof(*cu_info));
7366
7367         if (adev->flags & AMD_IS_APU)
7368                 ao_cu_num = 2;
7369         else
7370                 ao_cu_num = adev->gfx.config.max_cu_per_sh;
7371
7372         amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2);
7373
7374         mutex_lock(&adev->grbm_idx_mutex);
7375         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
7376                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
7377                         mask = 1;
7378                         ao_bitmap = 0;
7379                         counter = 0;
7380                         gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
7381                         if (i < 4 && j < 2)
7382                                 gfx_v8_0_set_user_cu_inactive_bitmap(
7383                                         adev, disable_masks[i * 2 + j]);
7384                         bitmap = gfx_v8_0_get_cu_active_bitmap(adev);
7385                         cu_info->bitmap[i][j] = bitmap;
7386
7387                         for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
7388                                 if (bitmap & mask) {
7389                                         if (counter < ao_cu_num)
7390                                                 ao_bitmap |= mask;
7391                                         counter ++;
7392                                 }
7393                                 mask <<= 1;
7394                         }
7395                         active_cu_number += counter;
7396                         if (i < 2 && j < 2)
7397                                 ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
7398                         cu_info->ao_cu_bitmap[i][j] = ao_bitmap;
7399                 }
7400         }
7401         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
7402         mutex_unlock(&adev->grbm_idx_mutex);
7403
7404         cu_info->number = active_cu_number;
7405         cu_info->ao_cu_mask = ao_cu_mask;
7406         cu_info->simd_per_cu = NUM_SIMD_PER_CU;
7407         cu_info->max_waves_per_simd = 10;
7408         cu_info->max_scratch_slots_per_cu = 32;
7409         cu_info->wave_front_size = 64;
7410         cu_info->lds_size = 64;
7411 }
7412
7413 const struct amdgpu_ip_block_version gfx_v8_0_ip_block =
7414 {
7415         .type = AMD_IP_BLOCK_TYPE_GFX,
7416         .major = 8,
7417         .minor = 0,
7418         .rev = 0,
7419         .funcs = &gfx_v8_0_ip_funcs,
7420 };
7421
7422 const struct amdgpu_ip_block_version gfx_v8_1_ip_block =
7423 {
7424         .type = AMD_IP_BLOCK_TYPE_GFX,
7425         .major = 8,
7426         .minor = 1,
7427         .rev = 0,
7428         .funcs = &gfx_v8_0_ip_funcs,
7429 };
7430
7431 static void gfx_v8_0_ring_emit_ce_meta(struct amdgpu_ring *ring)
7432 {
7433         uint64_t ce_payload_addr;
7434         int cnt_ce;
7435         union {
7436                 struct vi_ce_ib_state regular;
7437                 struct vi_ce_ib_state_chained_ib chained;
7438         } ce_payload = {};
7439
7440         if (ring->adev->virt.chained_ib_support) {
7441                 ce_payload_addr = amdgpu_csa_vaddr(ring->adev) +
7442                         offsetof(struct vi_gfx_meta_data_chained_ib, ce_payload);
7443                 cnt_ce = (sizeof(ce_payload.chained) >> 2) + 4 - 2;
7444         } else {
7445                 ce_payload_addr = amdgpu_csa_vaddr(ring->adev) +
7446                         offsetof(struct vi_gfx_meta_data, ce_payload);
7447                 cnt_ce = (sizeof(ce_payload.regular) >> 2) + 4 - 2;
7448         }
7449
7450         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_ce));
7451         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
7452                                 WRITE_DATA_DST_SEL(8) |
7453                                 WR_CONFIRM) |
7454                                 WRITE_DATA_CACHE_POLICY(0));
7455         amdgpu_ring_write(ring, lower_32_bits(ce_payload_addr));
7456         amdgpu_ring_write(ring, upper_32_bits(ce_payload_addr));
7457         amdgpu_ring_write_multiple(ring, (void *)&ce_payload, cnt_ce - 2);
7458 }
7459
7460 static void gfx_v8_0_ring_emit_de_meta(struct amdgpu_ring *ring)
7461 {
7462         uint64_t de_payload_addr, gds_addr, csa_addr;
7463         int cnt_de;
7464         union {
7465                 struct vi_de_ib_state regular;
7466                 struct vi_de_ib_state_chained_ib chained;
7467         } de_payload = {};
7468
7469         csa_addr = amdgpu_csa_vaddr(ring->adev);
7470         gds_addr = csa_addr + 4096;
7471         if (ring->adev->virt.chained_ib_support) {
7472                 de_payload.chained.gds_backup_addrlo = lower_32_bits(gds_addr);
7473                 de_payload.chained.gds_backup_addrhi = upper_32_bits(gds_addr);
7474                 de_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data_chained_ib, de_payload);
7475                 cnt_de = (sizeof(de_payload.chained) >> 2) + 4 - 2;
7476         } else {
7477                 de_payload.regular.gds_backup_addrlo = lower_32_bits(gds_addr);
7478                 de_payload.regular.gds_backup_addrhi = upper_32_bits(gds_addr);
7479                 de_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data, de_payload);
7480                 cnt_de = (sizeof(de_payload.regular) >> 2) + 4 - 2;
7481         }
7482
7483         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_de));
7484         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
7485                                 WRITE_DATA_DST_SEL(8) |
7486                                 WR_CONFIRM) |
7487                                 WRITE_DATA_CACHE_POLICY(0));
7488         amdgpu_ring_write(ring, lower_32_bits(de_payload_addr));
7489         amdgpu_ring_write(ring, upper_32_bits(de_payload_addr));
7490         amdgpu_ring_write_multiple(ring, (void *)&de_payload, cnt_de - 2);
7491 }
This page took 0.498336 seconds and 4 git commands to generate.