]> Git Repo - linux.git/blob - drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
Merge tag 'ib-fbdev-drm-v4.19-deferred-console-takeover' of https://github.com/bzolni...
[linux.git] / drivers / gpu / drm / amd / amdgpu / gfx_v8_0.c
1 /*
2  * Copyright 2014 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23 #include <linux/kernel.h>
24 #include <linux/firmware.h>
25 #include <drm/drmP.h>
26 #include "amdgpu.h"
27 #include "amdgpu_gfx.h"
28 #include "vi.h"
29 #include "vi_structs.h"
30 #include "vid.h"
31 #include "amdgpu_ucode.h"
32 #include "amdgpu_atombios.h"
33 #include "atombios_i2c.h"
34 #include "clearstate_vi.h"
35
36 #include "gmc/gmc_8_2_d.h"
37 #include "gmc/gmc_8_2_sh_mask.h"
38
39 #include "oss/oss_3_0_d.h"
40 #include "oss/oss_3_0_sh_mask.h"
41
42 #include "bif/bif_5_0_d.h"
43 #include "bif/bif_5_0_sh_mask.h"
44 #include "gca/gfx_8_0_d.h"
45 #include "gca/gfx_8_0_enum.h"
46 #include "gca/gfx_8_0_sh_mask.h"
47 #include "gca/gfx_8_0_enum.h"
48
49 #include "dce/dce_10_0_d.h"
50 #include "dce/dce_10_0_sh_mask.h"
51
52 #include "smu/smu_7_1_3_d.h"
53
54 #define GFX8_NUM_GFX_RINGS     1
55 #define GFX8_MEC_HPD_SIZE 2048
56
57 #define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001
58 #define CARRIZO_GB_ADDR_CONFIG_GOLDEN 0x22010001
59 #define POLARIS11_GB_ADDR_CONFIG_GOLDEN 0x22011002
60 #define TONGA_GB_ADDR_CONFIG_GOLDEN 0x22011003
61
62 #define ARRAY_MODE(x)                                   ((x) << GB_TILE_MODE0__ARRAY_MODE__SHIFT)
63 #define PIPE_CONFIG(x)                                  ((x) << GB_TILE_MODE0__PIPE_CONFIG__SHIFT)
64 #define TILE_SPLIT(x)                                   ((x) << GB_TILE_MODE0__TILE_SPLIT__SHIFT)
65 #define MICRO_TILE_MODE_NEW(x)                          ((x) << GB_TILE_MODE0__MICRO_TILE_MODE_NEW__SHIFT)
66 #define SAMPLE_SPLIT(x)                                 ((x) << GB_TILE_MODE0__SAMPLE_SPLIT__SHIFT)
67 #define BANK_WIDTH(x)                                   ((x) << GB_MACROTILE_MODE0__BANK_WIDTH__SHIFT)
68 #define BANK_HEIGHT(x)                                  ((x) << GB_MACROTILE_MODE0__BANK_HEIGHT__SHIFT)
69 #define MACRO_TILE_ASPECT(x)                            ((x) << GB_MACROTILE_MODE0__MACRO_TILE_ASPECT__SHIFT)
70 #define NUM_BANKS(x)                                    ((x) << GB_MACROTILE_MODE0__NUM_BANKS__SHIFT)
71
72 #define RLC_CGTT_MGCG_OVERRIDE__CPF_MASK            0x00000001L
73 #define RLC_CGTT_MGCG_OVERRIDE__RLC_MASK            0x00000002L
74 #define RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK           0x00000004L
75 #define RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK           0x00000008L
76 #define RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK           0x00000010L
77 #define RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK           0x00000020L
78
79 /* BPM SERDES CMD */
80 #define SET_BPM_SERDES_CMD    1
81 #define CLE_BPM_SERDES_CMD    0
82
83 /* BPM Register Address*/
84 enum {
85         BPM_REG_CGLS_EN = 0,        /* Enable/Disable CGLS */
86         BPM_REG_CGLS_ON,            /* ON/OFF CGLS: shall be controlled by RLC FW */
87         BPM_REG_CGCG_OVERRIDE,      /* Set/Clear CGCG Override */
88         BPM_REG_MGCG_OVERRIDE,      /* Set/Clear MGCG Override */
89         BPM_REG_FGCG_OVERRIDE,      /* Set/Clear FGCG Override */
90         BPM_REG_FGCG_MAX
91 };
92
93 #define RLC_FormatDirectRegListLength        14
94
95 MODULE_FIRMWARE("amdgpu/carrizo_ce.bin");
96 MODULE_FIRMWARE("amdgpu/carrizo_pfp.bin");
97 MODULE_FIRMWARE("amdgpu/carrizo_me.bin");
98 MODULE_FIRMWARE("amdgpu/carrizo_mec.bin");
99 MODULE_FIRMWARE("amdgpu/carrizo_mec2.bin");
100 MODULE_FIRMWARE("amdgpu/carrizo_rlc.bin");
101
102 MODULE_FIRMWARE("amdgpu/stoney_ce.bin");
103 MODULE_FIRMWARE("amdgpu/stoney_pfp.bin");
104 MODULE_FIRMWARE("amdgpu/stoney_me.bin");
105 MODULE_FIRMWARE("amdgpu/stoney_mec.bin");
106 MODULE_FIRMWARE("amdgpu/stoney_rlc.bin");
107
108 MODULE_FIRMWARE("amdgpu/tonga_ce.bin");
109 MODULE_FIRMWARE("amdgpu/tonga_pfp.bin");
110 MODULE_FIRMWARE("amdgpu/tonga_me.bin");
111 MODULE_FIRMWARE("amdgpu/tonga_mec.bin");
112 MODULE_FIRMWARE("amdgpu/tonga_mec2.bin");
113 MODULE_FIRMWARE("amdgpu/tonga_rlc.bin");
114
115 MODULE_FIRMWARE("amdgpu/topaz_ce.bin");
116 MODULE_FIRMWARE("amdgpu/topaz_pfp.bin");
117 MODULE_FIRMWARE("amdgpu/topaz_me.bin");
118 MODULE_FIRMWARE("amdgpu/topaz_mec.bin");
119 MODULE_FIRMWARE("amdgpu/topaz_rlc.bin");
120
121 MODULE_FIRMWARE("amdgpu/fiji_ce.bin");
122 MODULE_FIRMWARE("amdgpu/fiji_pfp.bin");
123 MODULE_FIRMWARE("amdgpu/fiji_me.bin");
124 MODULE_FIRMWARE("amdgpu/fiji_mec.bin");
125 MODULE_FIRMWARE("amdgpu/fiji_mec2.bin");
126 MODULE_FIRMWARE("amdgpu/fiji_rlc.bin");
127
128 MODULE_FIRMWARE("amdgpu/polaris10_ce.bin");
129 MODULE_FIRMWARE("amdgpu/polaris10_ce_2.bin");
130 MODULE_FIRMWARE("amdgpu/polaris10_pfp.bin");
131 MODULE_FIRMWARE("amdgpu/polaris10_pfp_2.bin");
132 MODULE_FIRMWARE("amdgpu/polaris10_me.bin");
133 MODULE_FIRMWARE("amdgpu/polaris10_me_2.bin");
134 MODULE_FIRMWARE("amdgpu/polaris10_mec.bin");
135 MODULE_FIRMWARE("amdgpu/polaris10_mec_2.bin");
136 MODULE_FIRMWARE("amdgpu/polaris10_mec2.bin");
137 MODULE_FIRMWARE("amdgpu/polaris10_mec2_2.bin");
138 MODULE_FIRMWARE("amdgpu/polaris10_rlc.bin");
139
140 MODULE_FIRMWARE("amdgpu/polaris11_ce.bin");
141 MODULE_FIRMWARE("amdgpu/polaris11_ce_2.bin");
142 MODULE_FIRMWARE("amdgpu/polaris11_pfp.bin");
143 MODULE_FIRMWARE("amdgpu/polaris11_pfp_2.bin");
144 MODULE_FIRMWARE("amdgpu/polaris11_me.bin");
145 MODULE_FIRMWARE("amdgpu/polaris11_me_2.bin");
146 MODULE_FIRMWARE("amdgpu/polaris11_mec.bin");
147 MODULE_FIRMWARE("amdgpu/polaris11_mec_2.bin");
148 MODULE_FIRMWARE("amdgpu/polaris11_mec2.bin");
149 MODULE_FIRMWARE("amdgpu/polaris11_mec2_2.bin");
150 MODULE_FIRMWARE("amdgpu/polaris11_rlc.bin");
151
152 MODULE_FIRMWARE("amdgpu/polaris12_ce.bin");
153 MODULE_FIRMWARE("amdgpu/polaris12_ce_2.bin");
154 MODULE_FIRMWARE("amdgpu/polaris12_pfp.bin");
155 MODULE_FIRMWARE("amdgpu/polaris12_pfp_2.bin");
156 MODULE_FIRMWARE("amdgpu/polaris12_me.bin");
157 MODULE_FIRMWARE("amdgpu/polaris12_me_2.bin");
158 MODULE_FIRMWARE("amdgpu/polaris12_mec.bin");
159 MODULE_FIRMWARE("amdgpu/polaris12_mec_2.bin");
160 MODULE_FIRMWARE("amdgpu/polaris12_mec2.bin");
161 MODULE_FIRMWARE("amdgpu/polaris12_mec2_2.bin");
162 MODULE_FIRMWARE("amdgpu/polaris12_rlc.bin");
163
164 MODULE_FIRMWARE("amdgpu/vegam_ce.bin");
165 MODULE_FIRMWARE("amdgpu/vegam_pfp.bin");
166 MODULE_FIRMWARE("amdgpu/vegam_me.bin");
167 MODULE_FIRMWARE("amdgpu/vegam_mec.bin");
168 MODULE_FIRMWARE("amdgpu/vegam_mec2.bin");
169 MODULE_FIRMWARE("amdgpu/vegam_rlc.bin");
170
171 static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] =
172 {
173         {mmGDS_VMID0_BASE, mmGDS_VMID0_SIZE, mmGDS_GWS_VMID0, mmGDS_OA_VMID0},
174         {mmGDS_VMID1_BASE, mmGDS_VMID1_SIZE, mmGDS_GWS_VMID1, mmGDS_OA_VMID1},
175         {mmGDS_VMID2_BASE, mmGDS_VMID2_SIZE, mmGDS_GWS_VMID2, mmGDS_OA_VMID2},
176         {mmGDS_VMID3_BASE, mmGDS_VMID3_SIZE, mmGDS_GWS_VMID3, mmGDS_OA_VMID3},
177         {mmGDS_VMID4_BASE, mmGDS_VMID4_SIZE, mmGDS_GWS_VMID4, mmGDS_OA_VMID4},
178         {mmGDS_VMID5_BASE, mmGDS_VMID5_SIZE, mmGDS_GWS_VMID5, mmGDS_OA_VMID5},
179         {mmGDS_VMID6_BASE, mmGDS_VMID6_SIZE, mmGDS_GWS_VMID6, mmGDS_OA_VMID6},
180         {mmGDS_VMID7_BASE, mmGDS_VMID7_SIZE, mmGDS_GWS_VMID7, mmGDS_OA_VMID7},
181         {mmGDS_VMID8_BASE, mmGDS_VMID8_SIZE, mmGDS_GWS_VMID8, mmGDS_OA_VMID8},
182         {mmGDS_VMID9_BASE, mmGDS_VMID9_SIZE, mmGDS_GWS_VMID9, mmGDS_OA_VMID9},
183         {mmGDS_VMID10_BASE, mmGDS_VMID10_SIZE, mmGDS_GWS_VMID10, mmGDS_OA_VMID10},
184         {mmGDS_VMID11_BASE, mmGDS_VMID11_SIZE, mmGDS_GWS_VMID11, mmGDS_OA_VMID11},
185         {mmGDS_VMID12_BASE, mmGDS_VMID12_SIZE, mmGDS_GWS_VMID12, mmGDS_OA_VMID12},
186         {mmGDS_VMID13_BASE, mmGDS_VMID13_SIZE, mmGDS_GWS_VMID13, mmGDS_OA_VMID13},
187         {mmGDS_VMID14_BASE, mmGDS_VMID14_SIZE, mmGDS_GWS_VMID14, mmGDS_OA_VMID14},
188         {mmGDS_VMID15_BASE, mmGDS_VMID15_SIZE, mmGDS_GWS_VMID15, mmGDS_OA_VMID15}
189 };
190
191 static const u32 golden_settings_tonga_a11[] =
192 {
193         mmCB_HW_CONTROL, 0xfffdf3cf, 0x00007208,
194         mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
195         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
196         mmGB_GPU_ID, 0x0000000f, 0x00000000,
197         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
198         mmPA_SC_FIFO_DEPTH_CNTL, 0x000003ff, 0x000000fc,
199         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
200         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
201         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
202         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
203         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
204         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
205         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000002fb,
206         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x0000543b,
207         mmTCP_CHAN_STEER_LO, 0xffffffff, 0xa9210876,
208         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
209 };
210
211 static const u32 tonga_golden_common_all[] =
212 {
213         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
214         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
215         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
216         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
217         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
218         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
219         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
220         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
221 };
222
223 static const u32 tonga_mgcg_cgcg_init[] =
224 {
225         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
226         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
227         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
228         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
229         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
230         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
231         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
232         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
233         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
234         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
235         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
236         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
237         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
238         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
239         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
240         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
241         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
242         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
243         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
244         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
245         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
246         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
247         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
248         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
249         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
250         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
251         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
252         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
253         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
254         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
255         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
256         mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
257         mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
258         mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
259         mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
260         mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
261         mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
262         mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
263         mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
264         mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
265         mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
266         mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
267         mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
268         mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
269         mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
270         mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
271         mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
272         mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
273         mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
274         mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
275         mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
276         mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
277         mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
278         mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
279         mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
280         mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
281         mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
282         mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
283         mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
284         mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
285         mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
286         mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
287         mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
288         mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
289         mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
290         mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
291         mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
292         mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
293         mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
294         mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
295         mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
296         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
297         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
298         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
299         mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
300 };
301
302 static const u32 golden_settings_vegam_a11[] =
303 {
304         mmCB_HW_CONTROL, 0x0001f3cf, 0x00007208,
305         mmCB_HW_CONTROL_2, 0x0f000000, 0x0d000000,
306         mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
307         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
308         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
309         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
310         mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x3a00161a,
311         mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002e,
312         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
313         mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
314         mmSQ_CONFIG, 0x07f80000, 0x01180000,
315         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
316         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
317         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7,
318         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
319         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x32761054,
320         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
321 };
322
323 static const u32 vegam_golden_common_all[] =
324 {
325         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
326         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
327         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
328         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
329         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
330         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
331 };
332
333 static const u32 golden_settings_polaris11_a11[] =
334 {
335         mmCB_HW_CONTROL, 0x0000f3cf, 0x00007208,
336         mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
337         mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
338         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
339         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
340         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
341         mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
342         mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
343         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
344         mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
345         mmSQ_CONFIG, 0x07f80000, 0x01180000,
346         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
347         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
348         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f3,
349         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
350         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003210,
351         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
352 };
353
354 static const u32 polaris11_golden_common_all[] =
355 {
356         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
357         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011002,
358         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
359         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
360         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
361         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
362 };
363
364 static const u32 golden_settings_polaris10_a11[] =
365 {
366         mmATC_MISC_CG, 0x000c0fc0, 0x000c0200,
367         mmCB_HW_CONTROL, 0x0001f3cf, 0x00007208,
368         mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
369         mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
370         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
371         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
372         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
373         mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
374         mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002a,
375         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
376         mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
377         mmSQ_CONFIG, 0x07f80000, 0x07180000,
378         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
379         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
380         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7,
381         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
382         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
383 };
384
385 static const u32 polaris10_golden_common_all[] =
386 {
387         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
388         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
389         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
390         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
391         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
392         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
393         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
394         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
395 };
396
397 static const u32 fiji_golden_common_all[] =
398 {
399         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
400         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x3a00161a,
401         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002e,
402         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
403         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
404         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
405         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
406         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
407         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
408         mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x00000009,
409 };
410
411 static const u32 golden_settings_fiji_a10[] =
412 {
413         mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
414         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
415         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
416         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
417         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
418         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
419         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
420         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
421         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
422         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000ff,
423         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
424 };
425
426 static const u32 fiji_mgcg_cgcg_init[] =
427 {
428         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
429         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
430         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
431         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
432         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
433         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
434         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
435         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
436         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
437         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
438         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
439         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
440         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
441         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
442         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
443         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
444         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
445         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
446         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
447         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
448         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
449         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
450         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
451         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
452         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
453         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
454         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
455         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
456         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
457         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
458         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
459         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
460         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
461         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
462         mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
463 };
464
465 static const u32 golden_settings_iceland_a11[] =
466 {
467         mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
468         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
469         mmDB_DEBUG3, 0xc0000000, 0xc0000000,
470         mmGB_GPU_ID, 0x0000000f, 0x00000000,
471         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
472         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
473         mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x00000002,
474         mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
475         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
476         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
477         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
478         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
479         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
480         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f1,
481         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
482         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010,
483 };
484
485 static const u32 iceland_golden_common_all[] =
486 {
487         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
488         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
489         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
490         mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
491         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
492         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
493         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
494         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
495 };
496
497 static const u32 iceland_mgcg_cgcg_init[] =
498 {
499         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
500         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
501         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
502         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
503         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0xc0000100,
504         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0xc0000100,
505         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0xc0000100,
506         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
507         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
508         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
509         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
510         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
511         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
512         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
513         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
514         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
515         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
516         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
517         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
518         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
519         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
520         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
521         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0xff000100,
522         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
523         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
524         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
525         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
526         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
527         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
528         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
529         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
530         mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
531         mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
532         mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
533         mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
534         mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
535         mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
536         mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
537         mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
538         mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
539         mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
540         mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
541         mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
542         mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
543         mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
544         mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
545         mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
546         mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
547         mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
548         mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
549         mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
550         mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
551         mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
552         mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
553         mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
554         mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
555         mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
556         mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
557         mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
558         mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
559         mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
560         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
561         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
562         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
563 };
564
565 static const u32 cz_golden_settings_a11[] =
566 {
567         mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
568         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
569         mmGB_GPU_ID, 0x0000000f, 0x00000000,
570         mmPA_SC_ENHANCE, 0xffffffff, 0x00000001,
571         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
572         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
573         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
574         mmTA_CNTL_AUX, 0x000f000f, 0x00010000,
575         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
576         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
577         mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f3,
578         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00001302
579 };
580
581 static const u32 cz_golden_common_all[] =
582 {
583         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
584         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
585         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
586         mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
587         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
588         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
589         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
590         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
591 };
592
593 static const u32 cz_mgcg_cgcg_init[] =
594 {
595         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
596         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
597         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
598         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
599         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
600         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
601         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x00000100,
602         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
603         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
604         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
605         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
606         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
607         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
608         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
609         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
610         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
611         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
612         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
613         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
614         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
615         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
616         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
617         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
618         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
619         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
620         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
621         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
622         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
623         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
624         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
625         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
626         mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
627         mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
628         mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
629         mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
630         mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
631         mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
632         mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
633         mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
634         mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
635         mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
636         mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
637         mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
638         mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
639         mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
640         mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
641         mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
642         mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
643         mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
644         mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
645         mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
646         mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
647         mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
648         mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
649         mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
650         mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
651         mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
652         mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
653         mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
654         mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
655         mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
656         mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
657         mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
658         mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
659         mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
660         mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
661         mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
662         mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
663         mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
664         mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
665         mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
666         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
667         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
668         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
669         mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
670 };
671
672 static const u32 stoney_golden_settings_a11[] =
673 {
674         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
675         mmGB_GPU_ID, 0x0000000f, 0x00000000,
676         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
677         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
678         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
679         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
680         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
681         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
682         mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f1,
683         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x10101010,
684 };
685
686 static const u32 stoney_golden_common_all[] =
687 {
688         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
689         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000000,
690         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
691         mmGB_ADDR_CONFIG, 0xffffffff, 0x12010001,
692         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
693         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
694         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
695         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
696 };
697
698 static const u32 stoney_mgcg_cgcg_init[] =
699 {
700         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
701         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
702         mmCP_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
703         mmRLC_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
704         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96940200,
705 };
706
707 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev);
708 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev);
709 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev);
710 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev);
711 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev);
712 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev);
713 static void gfx_v8_0_ring_emit_ce_meta(struct amdgpu_ring *ring);
714 static void gfx_v8_0_ring_emit_de_meta(struct amdgpu_ring *ring);
715
716 static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev)
717 {
718         switch (adev->asic_type) {
719         case CHIP_TOPAZ:
720                 amdgpu_device_program_register_sequence(adev,
721                                                         iceland_mgcg_cgcg_init,
722                                                         ARRAY_SIZE(iceland_mgcg_cgcg_init));
723                 amdgpu_device_program_register_sequence(adev,
724                                                         golden_settings_iceland_a11,
725                                                         ARRAY_SIZE(golden_settings_iceland_a11));
726                 amdgpu_device_program_register_sequence(adev,
727                                                         iceland_golden_common_all,
728                                                         ARRAY_SIZE(iceland_golden_common_all));
729                 break;
730         case CHIP_FIJI:
731                 amdgpu_device_program_register_sequence(adev,
732                                                         fiji_mgcg_cgcg_init,
733                                                         ARRAY_SIZE(fiji_mgcg_cgcg_init));
734                 amdgpu_device_program_register_sequence(adev,
735                                                         golden_settings_fiji_a10,
736                                                         ARRAY_SIZE(golden_settings_fiji_a10));
737                 amdgpu_device_program_register_sequence(adev,
738                                                         fiji_golden_common_all,
739                                                         ARRAY_SIZE(fiji_golden_common_all));
740                 break;
741
742         case CHIP_TONGA:
743                 amdgpu_device_program_register_sequence(adev,
744                                                         tonga_mgcg_cgcg_init,
745                                                         ARRAY_SIZE(tonga_mgcg_cgcg_init));
746                 amdgpu_device_program_register_sequence(adev,
747                                                         golden_settings_tonga_a11,
748                                                         ARRAY_SIZE(golden_settings_tonga_a11));
749                 amdgpu_device_program_register_sequence(adev,
750                                                         tonga_golden_common_all,
751                                                         ARRAY_SIZE(tonga_golden_common_all));
752                 break;
753         case CHIP_VEGAM:
754                 amdgpu_device_program_register_sequence(adev,
755                                                         golden_settings_vegam_a11,
756                                                         ARRAY_SIZE(golden_settings_vegam_a11));
757                 amdgpu_device_program_register_sequence(adev,
758                                                         vegam_golden_common_all,
759                                                         ARRAY_SIZE(vegam_golden_common_all));
760                 break;
761         case CHIP_POLARIS11:
762         case CHIP_POLARIS12:
763                 amdgpu_device_program_register_sequence(adev,
764                                                         golden_settings_polaris11_a11,
765                                                         ARRAY_SIZE(golden_settings_polaris11_a11));
766                 amdgpu_device_program_register_sequence(adev,
767                                                         polaris11_golden_common_all,
768                                                         ARRAY_SIZE(polaris11_golden_common_all));
769                 break;
770         case CHIP_POLARIS10:
771                 amdgpu_device_program_register_sequence(adev,
772                                                         golden_settings_polaris10_a11,
773                                                         ARRAY_SIZE(golden_settings_polaris10_a11));
774                 amdgpu_device_program_register_sequence(adev,
775                                                         polaris10_golden_common_all,
776                                                         ARRAY_SIZE(polaris10_golden_common_all));
777                 WREG32_SMC(ixCG_ACLK_CNTL, 0x0000001C);
778                 if (adev->pdev->revision == 0xc7 &&
779                     ((adev->pdev->subsystem_device == 0xb37 && adev->pdev->subsystem_vendor == 0x1002) ||
780                      (adev->pdev->subsystem_device == 0x4a8 && adev->pdev->subsystem_vendor == 0x1043) ||
781                      (adev->pdev->subsystem_device == 0x9480 && adev->pdev->subsystem_vendor == 0x1682))) {
782                         amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1E, 0xDD);
783                         amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1F, 0xD0);
784                 }
785                 break;
786         case CHIP_CARRIZO:
787                 amdgpu_device_program_register_sequence(adev,
788                                                         cz_mgcg_cgcg_init,
789                                                         ARRAY_SIZE(cz_mgcg_cgcg_init));
790                 amdgpu_device_program_register_sequence(adev,
791                                                         cz_golden_settings_a11,
792                                                         ARRAY_SIZE(cz_golden_settings_a11));
793                 amdgpu_device_program_register_sequence(adev,
794                                                         cz_golden_common_all,
795                                                         ARRAY_SIZE(cz_golden_common_all));
796                 break;
797         case CHIP_STONEY:
798                 amdgpu_device_program_register_sequence(adev,
799                                                         stoney_mgcg_cgcg_init,
800                                                         ARRAY_SIZE(stoney_mgcg_cgcg_init));
801                 amdgpu_device_program_register_sequence(adev,
802                                                         stoney_golden_settings_a11,
803                                                         ARRAY_SIZE(stoney_golden_settings_a11));
804                 amdgpu_device_program_register_sequence(adev,
805                                                         stoney_golden_common_all,
806                                                         ARRAY_SIZE(stoney_golden_common_all));
807                 break;
808         default:
809                 break;
810         }
811 }
812
813 static void gfx_v8_0_scratch_init(struct amdgpu_device *adev)
814 {
815         adev->gfx.scratch.num_reg = 8;
816         adev->gfx.scratch.reg_base = mmSCRATCH_REG0;
817         adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1;
818 }
819
820 static int gfx_v8_0_ring_test_ring(struct amdgpu_ring *ring)
821 {
822         struct amdgpu_device *adev = ring->adev;
823         uint32_t scratch;
824         uint32_t tmp = 0;
825         unsigned i;
826         int r;
827
828         r = amdgpu_gfx_scratch_get(adev, &scratch);
829         if (r) {
830                 DRM_ERROR("amdgpu: cp failed to get scratch reg (%d).\n", r);
831                 return r;
832         }
833         WREG32(scratch, 0xCAFEDEAD);
834         r = amdgpu_ring_alloc(ring, 3);
835         if (r) {
836                 DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n",
837                           ring->idx, r);
838                 amdgpu_gfx_scratch_free(adev, scratch);
839                 return r;
840         }
841         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
842         amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
843         amdgpu_ring_write(ring, 0xDEADBEEF);
844         amdgpu_ring_commit(ring);
845
846         for (i = 0; i < adev->usec_timeout; i++) {
847                 tmp = RREG32(scratch);
848                 if (tmp == 0xDEADBEEF)
849                         break;
850                 DRM_UDELAY(1);
851         }
852         if (i < adev->usec_timeout) {
853                 DRM_DEBUG("ring test on %d succeeded in %d usecs\n",
854                          ring->idx, i);
855         } else {
856                 DRM_ERROR("amdgpu: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
857                           ring->idx, scratch, tmp);
858                 r = -EINVAL;
859         }
860         amdgpu_gfx_scratch_free(adev, scratch);
861         return r;
862 }
863
864 static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
865 {
866         struct amdgpu_device *adev = ring->adev;
867         struct amdgpu_ib ib;
868         struct dma_fence *f = NULL;
869
870         unsigned int index;
871         uint64_t gpu_addr;
872         uint32_t tmp;
873         long r;
874
875         r = amdgpu_device_wb_get(adev, &index);
876         if (r) {
877                 dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r);
878                 return r;
879         }
880
881         gpu_addr = adev->wb.gpu_addr + (index * 4);
882         adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
883         memset(&ib, 0, sizeof(ib));
884         r = amdgpu_ib_get(adev, NULL, 16, &ib);
885         if (r) {
886                 DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
887                 goto err1;
888         }
889         ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3);
890         ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
891         ib.ptr[2] = lower_32_bits(gpu_addr);
892         ib.ptr[3] = upper_32_bits(gpu_addr);
893         ib.ptr[4] = 0xDEADBEEF;
894         ib.length_dw = 5;
895
896         r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
897         if (r)
898                 goto err2;
899
900         r = dma_fence_wait_timeout(f, false, timeout);
901         if (r == 0) {
902                 DRM_ERROR("amdgpu: IB test timed out.\n");
903                 r = -ETIMEDOUT;
904                 goto err2;
905         } else if (r < 0) {
906                 DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
907                 goto err2;
908         }
909
910         tmp = adev->wb.wb[index];
911         if (tmp == 0xDEADBEEF) {
912                 DRM_DEBUG("ib test on ring %d succeeded\n", ring->idx);
913                 r = 0;
914         } else {
915                 DRM_ERROR("ib test on ring %d failed\n", ring->idx);
916                 r = -EINVAL;
917         }
918
919 err2:
920         amdgpu_ib_free(adev, &ib, NULL);
921         dma_fence_put(f);
922 err1:
923         amdgpu_device_wb_free(adev, index);
924         return r;
925 }
926
927
928 static void gfx_v8_0_free_microcode(struct amdgpu_device *adev)
929 {
930         release_firmware(adev->gfx.pfp_fw);
931         adev->gfx.pfp_fw = NULL;
932         release_firmware(adev->gfx.me_fw);
933         adev->gfx.me_fw = NULL;
934         release_firmware(adev->gfx.ce_fw);
935         adev->gfx.ce_fw = NULL;
936         release_firmware(adev->gfx.rlc_fw);
937         adev->gfx.rlc_fw = NULL;
938         release_firmware(adev->gfx.mec_fw);
939         adev->gfx.mec_fw = NULL;
940         if ((adev->asic_type != CHIP_STONEY) &&
941             (adev->asic_type != CHIP_TOPAZ))
942                 release_firmware(adev->gfx.mec2_fw);
943         adev->gfx.mec2_fw = NULL;
944
945         kfree(adev->gfx.rlc.register_list_format);
946 }
947
948 static int gfx_v8_0_init_microcode(struct amdgpu_device *adev)
949 {
950         const char *chip_name;
951         char fw_name[30];
952         int err;
953         struct amdgpu_firmware_info *info = NULL;
954         const struct common_firmware_header *header = NULL;
955         const struct gfx_firmware_header_v1_0 *cp_hdr;
956         const struct rlc_firmware_header_v2_0 *rlc_hdr;
957         unsigned int *tmp = NULL, i;
958
959         DRM_DEBUG("\n");
960
961         switch (adev->asic_type) {
962         case CHIP_TOPAZ:
963                 chip_name = "topaz";
964                 break;
965         case CHIP_TONGA:
966                 chip_name = "tonga";
967                 break;
968         case CHIP_CARRIZO:
969                 chip_name = "carrizo";
970                 break;
971         case CHIP_FIJI:
972                 chip_name = "fiji";
973                 break;
974         case CHIP_STONEY:
975                 chip_name = "stoney";
976                 break;
977         case CHIP_POLARIS10:
978                 chip_name = "polaris10";
979                 break;
980         case CHIP_POLARIS11:
981                 chip_name = "polaris11";
982                 break;
983         case CHIP_POLARIS12:
984                 chip_name = "polaris12";
985                 break;
986         case CHIP_VEGAM:
987                 chip_name = "vegam";
988                 break;
989         default:
990                 BUG();
991         }
992
993         if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
994                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp_2.bin", chip_name);
995                 err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
996                 if (err == -ENOENT) {
997                         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
998                         err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
999                 }
1000         } else {
1001                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
1002                 err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
1003         }
1004         if (err)
1005                 goto out;
1006         err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
1007         if (err)
1008                 goto out;
1009         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
1010         adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1011         adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1012
1013         if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1014                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me_2.bin", chip_name);
1015                 err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
1016                 if (err == -ENOENT) {
1017                         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
1018                         err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
1019                 }
1020         } else {
1021                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
1022                 err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
1023         }
1024         if (err)
1025                 goto out;
1026         err = amdgpu_ucode_validate(adev->gfx.me_fw);
1027         if (err)
1028                 goto out;
1029         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
1030         adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1031
1032         adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1033
1034         if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1035                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce_2.bin", chip_name);
1036                 err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
1037                 if (err == -ENOENT) {
1038                         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
1039                         err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
1040                 }
1041         } else {
1042                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
1043                 err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
1044         }
1045         if (err)
1046                 goto out;
1047         err = amdgpu_ucode_validate(adev->gfx.ce_fw);
1048         if (err)
1049                 goto out;
1050         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
1051         adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1052         adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1053
1054         /*
1055          * Support for MCBP/Virtualization in combination with chained IBs is
1056          * formal released on feature version #46
1057          */
1058         if (adev->gfx.ce_feature_version >= 46 &&
1059             adev->gfx.pfp_feature_version >= 46) {
1060                 adev->virt.chained_ib_support = true;
1061                 DRM_INFO("Chained IB support enabled!\n");
1062         } else
1063                 adev->virt.chained_ib_support = false;
1064
1065         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
1066         err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
1067         if (err)
1068                 goto out;
1069         err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
1070         rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
1071         adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
1072         adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
1073
1074         adev->gfx.rlc.save_and_restore_offset =
1075                         le32_to_cpu(rlc_hdr->save_and_restore_offset);
1076         adev->gfx.rlc.clear_state_descriptor_offset =
1077                         le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
1078         adev->gfx.rlc.avail_scratch_ram_locations =
1079                         le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
1080         adev->gfx.rlc.reg_restore_list_size =
1081                         le32_to_cpu(rlc_hdr->reg_restore_list_size);
1082         adev->gfx.rlc.reg_list_format_start =
1083                         le32_to_cpu(rlc_hdr->reg_list_format_start);
1084         adev->gfx.rlc.reg_list_format_separate_start =
1085                         le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
1086         adev->gfx.rlc.starting_offsets_start =
1087                         le32_to_cpu(rlc_hdr->starting_offsets_start);
1088         adev->gfx.rlc.reg_list_format_size_bytes =
1089                         le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
1090         adev->gfx.rlc.reg_list_size_bytes =
1091                         le32_to_cpu(rlc_hdr->reg_list_size_bytes);
1092
1093         adev->gfx.rlc.register_list_format =
1094                         kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
1095                                         adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
1096
1097         if (!adev->gfx.rlc.register_list_format) {
1098                 err = -ENOMEM;
1099                 goto out;
1100         }
1101
1102         tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1103                         le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
1104         for (i = 0 ; i < (rlc_hdr->reg_list_format_size_bytes >> 2); i++)
1105                 adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]);
1106
1107         adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
1108
1109         tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1110                         le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
1111         for (i = 0 ; i < (rlc_hdr->reg_list_size_bytes >> 2); i++)
1112                 adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
1113
1114         if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1115                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec_2.bin", chip_name);
1116                 err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1117                 if (err == -ENOENT) {
1118                         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
1119                         err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1120                 }
1121         } else {
1122                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
1123                 err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1124         }
1125         if (err)
1126                 goto out;
1127         err = amdgpu_ucode_validate(adev->gfx.mec_fw);
1128         if (err)
1129                 goto out;
1130         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1131         adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1132         adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1133
1134         if ((adev->asic_type != CHIP_STONEY) &&
1135             (adev->asic_type != CHIP_TOPAZ)) {
1136                 if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1137                         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2_2.bin", chip_name);
1138                         err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1139                         if (err == -ENOENT) {
1140                                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1141                                 err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1142                         }
1143                 } else {
1144                         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1145                         err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1146                 }
1147                 if (!err) {
1148                         err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
1149                         if (err)
1150                                 goto out;
1151                         cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1152                                 adev->gfx.mec2_fw->data;
1153                         adev->gfx.mec2_fw_version =
1154                                 le32_to_cpu(cp_hdr->header.ucode_version);
1155                         adev->gfx.mec2_feature_version =
1156                                 le32_to_cpu(cp_hdr->ucode_feature_version);
1157                 } else {
1158                         err = 0;
1159                         adev->gfx.mec2_fw = NULL;
1160                 }
1161         }
1162
1163         if (adev->firmware.load_type == AMDGPU_FW_LOAD_SMU) {
1164                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
1165                 info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
1166                 info->fw = adev->gfx.pfp_fw;
1167                 header = (const struct common_firmware_header *)info->fw->data;
1168                 adev->firmware.fw_size +=
1169                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1170
1171                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
1172                 info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
1173                 info->fw = adev->gfx.me_fw;
1174                 header = (const struct common_firmware_header *)info->fw->data;
1175                 adev->firmware.fw_size +=
1176                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1177
1178                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
1179                 info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
1180                 info->fw = adev->gfx.ce_fw;
1181                 header = (const struct common_firmware_header *)info->fw->data;
1182                 adev->firmware.fw_size +=
1183                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1184
1185                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
1186                 info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
1187                 info->fw = adev->gfx.rlc_fw;
1188                 header = (const struct common_firmware_header *)info->fw->data;
1189                 adev->firmware.fw_size +=
1190                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1191
1192                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
1193                 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
1194                 info->fw = adev->gfx.mec_fw;
1195                 header = (const struct common_firmware_header *)info->fw->data;
1196                 adev->firmware.fw_size +=
1197                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1198
1199                 /* we need account JT in */
1200                 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1201                 adev->firmware.fw_size +=
1202                         ALIGN(le32_to_cpu(cp_hdr->jt_size) << 2, PAGE_SIZE);
1203
1204                 if (amdgpu_sriov_vf(adev)) {
1205                         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_STORAGE];
1206                         info->ucode_id = AMDGPU_UCODE_ID_STORAGE;
1207                         info->fw = adev->gfx.mec_fw;
1208                         adev->firmware.fw_size +=
1209                                 ALIGN(le32_to_cpu(64 * PAGE_SIZE), PAGE_SIZE);
1210                 }
1211
1212                 if (adev->gfx.mec2_fw) {
1213                         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
1214                         info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
1215                         info->fw = adev->gfx.mec2_fw;
1216                         header = (const struct common_firmware_header *)info->fw->data;
1217                         adev->firmware.fw_size +=
1218                                 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1219                 }
1220
1221         }
1222
1223 out:
1224         if (err) {
1225                 dev_err(adev->dev,
1226                         "gfx8: Failed to load firmware \"%s\"\n",
1227                         fw_name);
1228                 release_firmware(adev->gfx.pfp_fw);
1229                 adev->gfx.pfp_fw = NULL;
1230                 release_firmware(adev->gfx.me_fw);
1231                 adev->gfx.me_fw = NULL;
1232                 release_firmware(adev->gfx.ce_fw);
1233                 adev->gfx.ce_fw = NULL;
1234                 release_firmware(adev->gfx.rlc_fw);
1235                 adev->gfx.rlc_fw = NULL;
1236                 release_firmware(adev->gfx.mec_fw);
1237                 adev->gfx.mec_fw = NULL;
1238                 release_firmware(adev->gfx.mec2_fw);
1239                 adev->gfx.mec2_fw = NULL;
1240         }
1241         return err;
1242 }
1243
1244 static void gfx_v8_0_get_csb_buffer(struct amdgpu_device *adev,
1245                                     volatile u32 *buffer)
1246 {
1247         u32 count = 0, i;
1248         const struct cs_section_def *sect = NULL;
1249         const struct cs_extent_def *ext = NULL;
1250
1251         if (adev->gfx.rlc.cs_data == NULL)
1252                 return;
1253         if (buffer == NULL)
1254                 return;
1255
1256         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1257         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1258
1259         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
1260         buffer[count++] = cpu_to_le32(0x80000000);
1261         buffer[count++] = cpu_to_le32(0x80000000);
1262
1263         for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
1264                 for (ext = sect->section; ext->extent != NULL; ++ext) {
1265                         if (sect->id == SECT_CONTEXT) {
1266                                 buffer[count++] =
1267                                         cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
1268                                 buffer[count++] = cpu_to_le32(ext->reg_index -
1269                                                 PACKET3_SET_CONTEXT_REG_START);
1270                                 for (i = 0; i < ext->reg_count; i++)
1271                                         buffer[count++] = cpu_to_le32(ext->extent[i]);
1272                         } else {
1273                                 return;
1274                         }
1275                 }
1276         }
1277
1278         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
1279         buffer[count++] = cpu_to_le32(mmPA_SC_RASTER_CONFIG -
1280                         PACKET3_SET_CONTEXT_REG_START);
1281         buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config);
1282         buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config_1);
1283
1284         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1285         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
1286
1287         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
1288         buffer[count++] = cpu_to_le32(0);
1289 }
1290
1291 static void cz_init_cp_jump_table(struct amdgpu_device *adev)
1292 {
1293         const __le32 *fw_data;
1294         volatile u32 *dst_ptr;
1295         int me, i, max_me = 4;
1296         u32 bo_offset = 0;
1297         u32 table_offset, table_size;
1298
1299         if (adev->asic_type == CHIP_CARRIZO)
1300                 max_me = 5;
1301
1302         /* write the cp table buffer */
1303         dst_ptr = adev->gfx.rlc.cp_table_ptr;
1304         for (me = 0; me < max_me; me++) {
1305                 if (me == 0) {
1306                         const struct gfx_firmware_header_v1_0 *hdr =
1307                                 (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
1308                         fw_data = (const __le32 *)
1309                                 (adev->gfx.ce_fw->data +
1310                                  le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1311                         table_offset = le32_to_cpu(hdr->jt_offset);
1312                         table_size = le32_to_cpu(hdr->jt_size);
1313                 } else if (me == 1) {
1314                         const struct gfx_firmware_header_v1_0 *hdr =
1315                                 (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
1316                         fw_data = (const __le32 *)
1317                                 (adev->gfx.pfp_fw->data +
1318                                  le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1319                         table_offset = le32_to_cpu(hdr->jt_offset);
1320                         table_size = le32_to_cpu(hdr->jt_size);
1321                 } else if (me == 2) {
1322                         const struct gfx_firmware_header_v1_0 *hdr =
1323                                 (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
1324                         fw_data = (const __le32 *)
1325                                 (adev->gfx.me_fw->data +
1326                                  le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1327                         table_offset = le32_to_cpu(hdr->jt_offset);
1328                         table_size = le32_to_cpu(hdr->jt_size);
1329                 } else if (me == 3) {
1330                         const struct gfx_firmware_header_v1_0 *hdr =
1331                                 (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1332                         fw_data = (const __le32 *)
1333                                 (adev->gfx.mec_fw->data +
1334                                  le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1335                         table_offset = le32_to_cpu(hdr->jt_offset);
1336                         table_size = le32_to_cpu(hdr->jt_size);
1337                 } else  if (me == 4) {
1338                         const struct gfx_firmware_header_v1_0 *hdr =
1339                                 (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
1340                         fw_data = (const __le32 *)
1341                                 (adev->gfx.mec2_fw->data +
1342                                  le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1343                         table_offset = le32_to_cpu(hdr->jt_offset);
1344                         table_size = le32_to_cpu(hdr->jt_size);
1345                 }
1346
1347                 for (i = 0; i < table_size; i ++) {
1348                         dst_ptr[bo_offset + i] =
1349                                 cpu_to_le32(le32_to_cpu(fw_data[table_offset + i]));
1350                 }
1351
1352                 bo_offset += table_size;
1353         }
1354 }
1355
1356 static void gfx_v8_0_rlc_fini(struct amdgpu_device *adev)
1357 {
1358         amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj, NULL, NULL);
1359         amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj, NULL, NULL);
1360 }
1361
1362 static int gfx_v8_0_rlc_init(struct amdgpu_device *adev)
1363 {
1364         volatile u32 *dst_ptr;
1365         u32 dws;
1366         const struct cs_section_def *cs_data;
1367         int r;
1368
1369         adev->gfx.rlc.cs_data = vi_cs_data;
1370
1371         cs_data = adev->gfx.rlc.cs_data;
1372
1373         if (cs_data) {
1374                 /* clear state block */
1375                 adev->gfx.rlc.clear_state_size = dws = gfx_v8_0_get_csb_size(adev);
1376
1377                 r = amdgpu_bo_create_reserved(adev, dws * 4, PAGE_SIZE,
1378                                               AMDGPU_GEM_DOMAIN_VRAM,
1379                                               &adev->gfx.rlc.clear_state_obj,
1380                                               &adev->gfx.rlc.clear_state_gpu_addr,
1381                                               (void **)&adev->gfx.rlc.cs_ptr);
1382                 if (r) {
1383                         dev_warn(adev->dev, "(%d) create RLC c bo failed\n", r);
1384                         gfx_v8_0_rlc_fini(adev);
1385                         return r;
1386                 }
1387
1388                 /* set up the cs buffer */
1389                 dst_ptr = adev->gfx.rlc.cs_ptr;
1390                 gfx_v8_0_get_csb_buffer(adev, dst_ptr);
1391                 amdgpu_bo_kunmap(adev->gfx.rlc.clear_state_obj);
1392                 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1393         }
1394
1395         if ((adev->asic_type == CHIP_CARRIZO) ||
1396             (adev->asic_type == CHIP_STONEY)) {
1397                 adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
1398                 r = amdgpu_bo_create_reserved(adev, adev->gfx.rlc.cp_table_size,
1399                                               PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM,
1400                                               &adev->gfx.rlc.cp_table_obj,
1401                                               &adev->gfx.rlc.cp_table_gpu_addr,
1402                                               (void **)&adev->gfx.rlc.cp_table_ptr);
1403                 if (r) {
1404                         dev_warn(adev->dev, "(%d) create RLC cp table bo failed\n", r);
1405                         return r;
1406                 }
1407
1408                 cz_init_cp_jump_table(adev);
1409
1410                 amdgpu_bo_kunmap(adev->gfx.rlc.cp_table_obj);
1411                 amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
1412         }
1413
1414         return 0;
1415 }
1416
1417 static void gfx_v8_0_mec_fini(struct amdgpu_device *adev)
1418 {
1419         amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
1420 }
1421
1422 static int gfx_v8_0_mec_init(struct amdgpu_device *adev)
1423 {
1424         int r;
1425         u32 *hpd;
1426         size_t mec_hpd_size;
1427
1428         bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
1429
1430         /* take ownership of the relevant compute queues */
1431         amdgpu_gfx_compute_queue_acquire(adev);
1432
1433         mec_hpd_size = adev->gfx.num_compute_rings * GFX8_MEC_HPD_SIZE;
1434
1435         r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
1436                                       AMDGPU_GEM_DOMAIN_GTT,
1437                                       &adev->gfx.mec.hpd_eop_obj,
1438                                       &adev->gfx.mec.hpd_eop_gpu_addr,
1439                                       (void **)&hpd);
1440         if (r) {
1441                 dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
1442                 return r;
1443         }
1444
1445         memset(hpd, 0, mec_hpd_size);
1446
1447         amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
1448         amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1449
1450         return 0;
1451 }
1452
1453 static const u32 vgpr_init_compute_shader[] =
1454 {
1455         0x7e000209, 0x7e020208,
1456         0x7e040207, 0x7e060206,
1457         0x7e080205, 0x7e0a0204,
1458         0x7e0c0203, 0x7e0e0202,
1459         0x7e100201, 0x7e120200,
1460         0x7e140209, 0x7e160208,
1461         0x7e180207, 0x7e1a0206,
1462         0x7e1c0205, 0x7e1e0204,
1463         0x7e200203, 0x7e220202,
1464         0x7e240201, 0x7e260200,
1465         0x7e280209, 0x7e2a0208,
1466         0x7e2c0207, 0x7e2e0206,
1467         0x7e300205, 0x7e320204,
1468         0x7e340203, 0x7e360202,
1469         0x7e380201, 0x7e3a0200,
1470         0x7e3c0209, 0x7e3e0208,
1471         0x7e400207, 0x7e420206,
1472         0x7e440205, 0x7e460204,
1473         0x7e480203, 0x7e4a0202,
1474         0x7e4c0201, 0x7e4e0200,
1475         0x7e500209, 0x7e520208,
1476         0x7e540207, 0x7e560206,
1477         0x7e580205, 0x7e5a0204,
1478         0x7e5c0203, 0x7e5e0202,
1479         0x7e600201, 0x7e620200,
1480         0x7e640209, 0x7e660208,
1481         0x7e680207, 0x7e6a0206,
1482         0x7e6c0205, 0x7e6e0204,
1483         0x7e700203, 0x7e720202,
1484         0x7e740201, 0x7e760200,
1485         0x7e780209, 0x7e7a0208,
1486         0x7e7c0207, 0x7e7e0206,
1487         0xbf8a0000, 0xbf810000,
1488 };
1489
1490 static const u32 sgpr_init_compute_shader[] =
1491 {
1492         0xbe8a0100, 0xbe8c0102,
1493         0xbe8e0104, 0xbe900106,
1494         0xbe920108, 0xbe940100,
1495         0xbe960102, 0xbe980104,
1496         0xbe9a0106, 0xbe9c0108,
1497         0xbe9e0100, 0xbea00102,
1498         0xbea20104, 0xbea40106,
1499         0xbea60108, 0xbea80100,
1500         0xbeaa0102, 0xbeac0104,
1501         0xbeae0106, 0xbeb00108,
1502         0xbeb20100, 0xbeb40102,
1503         0xbeb60104, 0xbeb80106,
1504         0xbeba0108, 0xbebc0100,
1505         0xbebe0102, 0xbec00104,
1506         0xbec20106, 0xbec40108,
1507         0xbec60100, 0xbec80102,
1508         0xbee60004, 0xbee70005,
1509         0xbeea0006, 0xbeeb0007,
1510         0xbee80008, 0xbee90009,
1511         0xbefc0000, 0xbf8a0000,
1512         0xbf810000, 0x00000000,
1513 };
1514
1515 static const u32 vgpr_init_regs[] =
1516 {
1517         mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xffffffff,
1518         mmCOMPUTE_RESOURCE_LIMITS, 0x1000000, /* CU_GROUP_COUNT=1 */
1519         mmCOMPUTE_NUM_THREAD_X, 256*4,
1520         mmCOMPUTE_NUM_THREAD_Y, 1,
1521         mmCOMPUTE_NUM_THREAD_Z, 1,
1522         mmCOMPUTE_PGM_RSRC1, 0x100004f, /* VGPRS=15 (64 logical VGPRs), SGPRS=1 (16 SGPRs), BULKY=1 */
1523         mmCOMPUTE_PGM_RSRC2, 20,
1524         mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1525         mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1526         mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1527         mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1528         mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1529         mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1530         mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1531         mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1532         mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1533         mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1534 };
1535
1536 static const u32 sgpr1_init_regs[] =
1537 {
1538         mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0x0f,
1539         mmCOMPUTE_RESOURCE_LIMITS, 0x1000000, /* CU_GROUP_COUNT=1 */
1540         mmCOMPUTE_NUM_THREAD_X, 256*5,
1541         mmCOMPUTE_NUM_THREAD_Y, 1,
1542         mmCOMPUTE_NUM_THREAD_Z, 1,
1543         mmCOMPUTE_PGM_RSRC1, 0x240, /* SGPRS=9 (80 GPRS) */
1544         mmCOMPUTE_PGM_RSRC2, 20,
1545         mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1546         mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1547         mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1548         mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1549         mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1550         mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1551         mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1552         mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1553         mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1554         mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1555 };
1556
1557 static const u32 sgpr2_init_regs[] =
1558 {
1559         mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xf0,
1560         mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1561         mmCOMPUTE_NUM_THREAD_X, 256*5,
1562         mmCOMPUTE_NUM_THREAD_Y, 1,
1563         mmCOMPUTE_NUM_THREAD_Z, 1,
1564         mmCOMPUTE_PGM_RSRC1, 0x240, /* SGPRS=9 (80 GPRS) */
1565         mmCOMPUTE_PGM_RSRC2, 20,
1566         mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1567         mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1568         mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1569         mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1570         mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1571         mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1572         mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1573         mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1574         mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1575         mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1576 };
1577
1578 static const u32 sec_ded_counter_registers[] =
1579 {
1580         mmCPC_EDC_ATC_CNT,
1581         mmCPC_EDC_SCRATCH_CNT,
1582         mmCPC_EDC_UCODE_CNT,
1583         mmCPF_EDC_ATC_CNT,
1584         mmCPF_EDC_ROQ_CNT,
1585         mmCPF_EDC_TAG_CNT,
1586         mmCPG_EDC_ATC_CNT,
1587         mmCPG_EDC_DMA_CNT,
1588         mmCPG_EDC_TAG_CNT,
1589         mmDC_EDC_CSINVOC_CNT,
1590         mmDC_EDC_RESTORE_CNT,
1591         mmDC_EDC_STATE_CNT,
1592         mmGDS_EDC_CNT,
1593         mmGDS_EDC_GRBM_CNT,
1594         mmGDS_EDC_OA_DED,
1595         mmSPI_EDC_CNT,
1596         mmSQC_ATC_EDC_GATCL1_CNT,
1597         mmSQC_EDC_CNT,
1598         mmSQ_EDC_DED_CNT,
1599         mmSQ_EDC_INFO,
1600         mmSQ_EDC_SEC_CNT,
1601         mmTCC_EDC_CNT,
1602         mmTCP_ATC_EDC_GATCL1_CNT,
1603         mmTCP_EDC_CNT,
1604         mmTD_EDC_CNT
1605 };
1606
1607 static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
1608 {
1609         struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
1610         struct amdgpu_ib ib;
1611         struct dma_fence *f = NULL;
1612         int r, i;
1613         u32 tmp;
1614         unsigned total_size, vgpr_offset, sgpr_offset;
1615         u64 gpu_addr;
1616
1617         /* only supported on CZ */
1618         if (adev->asic_type != CHIP_CARRIZO)
1619                 return 0;
1620
1621         /* bail if the compute ring is not ready */
1622         if (!ring->ready)
1623                 return 0;
1624
1625         tmp = RREG32(mmGB_EDC_MODE);
1626         WREG32(mmGB_EDC_MODE, 0);
1627
1628         total_size =
1629                 (((ARRAY_SIZE(vgpr_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1630         total_size +=
1631                 (((ARRAY_SIZE(sgpr1_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1632         total_size +=
1633                 (((ARRAY_SIZE(sgpr2_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1634         total_size = ALIGN(total_size, 256);
1635         vgpr_offset = total_size;
1636         total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256);
1637         sgpr_offset = total_size;
1638         total_size += sizeof(sgpr_init_compute_shader);
1639
1640         /* allocate an indirect buffer to put the commands in */
1641         memset(&ib, 0, sizeof(ib));
1642         r = amdgpu_ib_get(adev, NULL, total_size, &ib);
1643         if (r) {
1644                 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
1645                 return r;
1646         }
1647
1648         /* load the compute shaders */
1649         for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++)
1650                 ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i];
1651
1652         for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
1653                 ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
1654
1655         /* init the ib length to 0 */
1656         ib.length_dw = 0;
1657
1658         /* VGPR */
1659         /* write the register state for the compute dispatch */
1660         for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i += 2) {
1661                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1662                 ib.ptr[ib.length_dw++] = vgpr_init_regs[i] - PACKET3_SET_SH_REG_START;
1663                 ib.ptr[ib.length_dw++] = vgpr_init_regs[i + 1];
1664         }
1665         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1666         gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
1667         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1668         ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1669         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1670         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1671
1672         /* write dispatch packet */
1673         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1674         ib.ptr[ib.length_dw++] = 8; /* x */
1675         ib.ptr[ib.length_dw++] = 1; /* y */
1676         ib.ptr[ib.length_dw++] = 1; /* z */
1677         ib.ptr[ib.length_dw++] =
1678                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1679
1680         /* write CS partial flush packet */
1681         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1682         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1683
1684         /* SGPR1 */
1685         /* write the register state for the compute dispatch */
1686         for (i = 0; i < ARRAY_SIZE(sgpr1_init_regs); i += 2) {
1687                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1688                 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i] - PACKET3_SET_SH_REG_START;
1689                 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i + 1];
1690         }
1691         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1692         gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1693         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1694         ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1695         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1696         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1697
1698         /* write dispatch packet */
1699         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1700         ib.ptr[ib.length_dw++] = 8; /* x */
1701         ib.ptr[ib.length_dw++] = 1; /* y */
1702         ib.ptr[ib.length_dw++] = 1; /* z */
1703         ib.ptr[ib.length_dw++] =
1704                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1705
1706         /* write CS partial flush packet */
1707         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1708         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1709
1710         /* SGPR2 */
1711         /* write the register state for the compute dispatch */
1712         for (i = 0; i < ARRAY_SIZE(sgpr2_init_regs); i += 2) {
1713                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1714                 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i] - PACKET3_SET_SH_REG_START;
1715                 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i + 1];
1716         }
1717         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1718         gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1719         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1720         ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1721         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1722         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1723
1724         /* write dispatch packet */
1725         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1726         ib.ptr[ib.length_dw++] = 8; /* x */
1727         ib.ptr[ib.length_dw++] = 1; /* y */
1728         ib.ptr[ib.length_dw++] = 1; /* z */
1729         ib.ptr[ib.length_dw++] =
1730                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1731
1732         /* write CS partial flush packet */
1733         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1734         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1735
1736         /* shedule the ib on the ring */
1737         r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
1738         if (r) {
1739                 DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
1740                 goto fail;
1741         }
1742
1743         /* wait for the GPU to finish processing the IB */
1744         r = dma_fence_wait(f, false);
1745         if (r) {
1746                 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
1747                 goto fail;
1748         }
1749
1750         tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, DED_MODE, 2);
1751         tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, PROP_FED, 1);
1752         WREG32(mmGB_EDC_MODE, tmp);
1753
1754         tmp = RREG32(mmCC_GC_EDC_CONFIG);
1755         tmp = REG_SET_FIELD(tmp, CC_GC_EDC_CONFIG, DIS_EDC, 0) | 1;
1756         WREG32(mmCC_GC_EDC_CONFIG, tmp);
1757
1758
1759         /* read back registers to clear the counters */
1760         for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++)
1761                 RREG32(sec_ded_counter_registers[i]);
1762
1763 fail:
1764         amdgpu_ib_free(adev, &ib, NULL);
1765         dma_fence_put(f);
1766
1767         return r;
1768 }
1769
1770 static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev)
1771 {
1772         u32 gb_addr_config;
1773         u32 mc_shared_chmap, mc_arb_ramcfg;
1774         u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map;
1775         u32 tmp;
1776         int ret;
1777
1778         switch (adev->asic_type) {
1779         case CHIP_TOPAZ:
1780                 adev->gfx.config.max_shader_engines = 1;
1781                 adev->gfx.config.max_tile_pipes = 2;
1782                 adev->gfx.config.max_cu_per_sh = 6;
1783                 adev->gfx.config.max_sh_per_se = 1;
1784                 adev->gfx.config.max_backends_per_se = 2;
1785                 adev->gfx.config.max_texture_channel_caches = 2;
1786                 adev->gfx.config.max_gprs = 256;
1787                 adev->gfx.config.max_gs_threads = 32;
1788                 adev->gfx.config.max_hw_contexts = 8;
1789
1790                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1791                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1792                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1793                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1794                 gb_addr_config = TOPAZ_GB_ADDR_CONFIG_GOLDEN;
1795                 break;
1796         case CHIP_FIJI:
1797                 adev->gfx.config.max_shader_engines = 4;
1798                 adev->gfx.config.max_tile_pipes = 16;
1799                 adev->gfx.config.max_cu_per_sh = 16;
1800                 adev->gfx.config.max_sh_per_se = 1;
1801                 adev->gfx.config.max_backends_per_se = 4;
1802                 adev->gfx.config.max_texture_channel_caches = 16;
1803                 adev->gfx.config.max_gprs = 256;
1804                 adev->gfx.config.max_gs_threads = 32;
1805                 adev->gfx.config.max_hw_contexts = 8;
1806
1807                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1808                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1809                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1810                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1811                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1812                 break;
1813         case CHIP_POLARIS11:
1814         case CHIP_POLARIS12:
1815                 ret = amdgpu_atombios_get_gfx_info(adev);
1816                 if (ret)
1817                         return ret;
1818                 adev->gfx.config.max_gprs = 256;
1819                 adev->gfx.config.max_gs_threads = 32;
1820                 adev->gfx.config.max_hw_contexts = 8;
1821
1822                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1823                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1824                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1825                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1826                 gb_addr_config = POLARIS11_GB_ADDR_CONFIG_GOLDEN;
1827                 break;
1828         case CHIP_POLARIS10:
1829         case CHIP_VEGAM:
1830                 ret = amdgpu_atombios_get_gfx_info(adev);
1831                 if (ret)
1832                         return ret;
1833                 adev->gfx.config.max_gprs = 256;
1834                 adev->gfx.config.max_gs_threads = 32;
1835                 adev->gfx.config.max_hw_contexts = 8;
1836
1837                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1838                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1839                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1840                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1841                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1842                 break;
1843         case CHIP_TONGA:
1844                 adev->gfx.config.max_shader_engines = 4;
1845                 adev->gfx.config.max_tile_pipes = 8;
1846                 adev->gfx.config.max_cu_per_sh = 8;
1847                 adev->gfx.config.max_sh_per_se = 1;
1848                 adev->gfx.config.max_backends_per_se = 2;
1849                 adev->gfx.config.max_texture_channel_caches = 8;
1850                 adev->gfx.config.max_gprs = 256;
1851                 adev->gfx.config.max_gs_threads = 32;
1852                 adev->gfx.config.max_hw_contexts = 8;
1853
1854                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1855                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1856                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1857                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1858                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1859                 break;
1860         case CHIP_CARRIZO:
1861                 adev->gfx.config.max_shader_engines = 1;
1862                 adev->gfx.config.max_tile_pipes = 2;
1863                 adev->gfx.config.max_sh_per_se = 1;
1864                 adev->gfx.config.max_backends_per_se = 2;
1865                 adev->gfx.config.max_cu_per_sh = 8;
1866                 adev->gfx.config.max_texture_channel_caches = 2;
1867                 adev->gfx.config.max_gprs = 256;
1868                 adev->gfx.config.max_gs_threads = 32;
1869                 adev->gfx.config.max_hw_contexts = 8;
1870
1871                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1872                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1873                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1874                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1875                 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1876                 break;
1877         case CHIP_STONEY:
1878                 adev->gfx.config.max_shader_engines = 1;
1879                 adev->gfx.config.max_tile_pipes = 2;
1880                 adev->gfx.config.max_sh_per_se = 1;
1881                 adev->gfx.config.max_backends_per_se = 1;
1882                 adev->gfx.config.max_cu_per_sh = 3;
1883                 adev->gfx.config.max_texture_channel_caches = 2;
1884                 adev->gfx.config.max_gprs = 256;
1885                 adev->gfx.config.max_gs_threads = 16;
1886                 adev->gfx.config.max_hw_contexts = 8;
1887
1888                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1889                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1890                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1891                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1892                 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1893                 break;
1894         default:
1895                 adev->gfx.config.max_shader_engines = 2;
1896                 adev->gfx.config.max_tile_pipes = 4;
1897                 adev->gfx.config.max_cu_per_sh = 2;
1898                 adev->gfx.config.max_sh_per_se = 1;
1899                 adev->gfx.config.max_backends_per_se = 2;
1900                 adev->gfx.config.max_texture_channel_caches = 4;
1901                 adev->gfx.config.max_gprs = 256;
1902                 adev->gfx.config.max_gs_threads = 32;
1903                 adev->gfx.config.max_hw_contexts = 8;
1904
1905                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1906                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1907                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1908                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1909                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1910                 break;
1911         }
1912
1913         mc_shared_chmap = RREG32(mmMC_SHARED_CHMAP);
1914         adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG);
1915         mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg;
1916
1917         adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes;
1918         adev->gfx.config.mem_max_burst_length_bytes = 256;
1919         if (adev->flags & AMD_IS_APU) {
1920                 /* Get memory bank mapping mode. */
1921                 tmp = RREG32(mmMC_FUS_DRAM0_BANK_ADDR_MAPPING);
1922                 dimm00_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1923                 dimm01_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1924
1925                 tmp = RREG32(mmMC_FUS_DRAM1_BANK_ADDR_MAPPING);
1926                 dimm10_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1927                 dimm11_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1928
1929                 /* Validate settings in case only one DIMM installed. */
1930                 if ((dimm00_addr_map == 0) || (dimm00_addr_map == 3) || (dimm00_addr_map == 4) || (dimm00_addr_map > 12))
1931                         dimm00_addr_map = 0;
1932                 if ((dimm01_addr_map == 0) || (dimm01_addr_map == 3) || (dimm01_addr_map == 4) || (dimm01_addr_map > 12))
1933                         dimm01_addr_map = 0;
1934                 if ((dimm10_addr_map == 0) || (dimm10_addr_map == 3) || (dimm10_addr_map == 4) || (dimm10_addr_map > 12))
1935                         dimm10_addr_map = 0;
1936                 if ((dimm11_addr_map == 0) || (dimm11_addr_map == 3) || (dimm11_addr_map == 4) || (dimm11_addr_map > 12))
1937                         dimm11_addr_map = 0;
1938
1939                 /* If DIMM Addr map is 8GB, ROW size should be 2KB. Otherwise 1KB. */
1940                 /* If ROW size(DIMM1) != ROW size(DMIMM0), ROW size should be larger one. */
1941                 if ((dimm00_addr_map == 11) || (dimm01_addr_map == 11) || (dimm10_addr_map == 11) || (dimm11_addr_map == 11))
1942                         adev->gfx.config.mem_row_size_in_kb = 2;
1943                 else
1944                         adev->gfx.config.mem_row_size_in_kb = 1;
1945         } else {
1946                 tmp = REG_GET_FIELD(mc_arb_ramcfg, MC_ARB_RAMCFG, NOOFCOLS);
1947                 adev->gfx.config.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
1948                 if (adev->gfx.config.mem_row_size_in_kb > 4)
1949                         adev->gfx.config.mem_row_size_in_kb = 4;
1950         }
1951
1952         adev->gfx.config.shader_engine_tile_size = 32;
1953         adev->gfx.config.num_gpus = 1;
1954         adev->gfx.config.multi_gpu_tile_size = 64;
1955
1956         /* fix up row size */
1957         switch (adev->gfx.config.mem_row_size_in_kb) {
1958         case 1:
1959         default:
1960                 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 0);
1961                 break;
1962         case 2:
1963                 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 1);
1964                 break;
1965         case 4:
1966                 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 2);
1967                 break;
1968         }
1969         adev->gfx.config.gb_addr_config = gb_addr_config;
1970
1971         return 0;
1972 }
1973
1974 static int gfx_v8_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
1975                                         int mec, int pipe, int queue)
1976 {
1977         int r;
1978         unsigned irq_type;
1979         struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
1980
1981         ring = &adev->gfx.compute_ring[ring_id];
1982
1983         /* mec0 is me1 */
1984         ring->me = mec + 1;
1985         ring->pipe = pipe;
1986         ring->queue = queue;
1987
1988         ring->ring_obj = NULL;
1989         ring->use_doorbell = true;
1990         ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + ring_id;
1991         ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
1992                                 + (ring_id * GFX8_MEC_HPD_SIZE);
1993         sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
1994
1995         irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
1996                 + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
1997                 + ring->pipe;
1998
1999         /* type-2 packets are deprecated on MEC, use type-3 instead */
2000         r = amdgpu_ring_init(adev, ring, 1024,
2001                         &adev->gfx.eop_irq, irq_type);
2002         if (r)
2003                 return r;
2004
2005
2006         return 0;
2007 }
2008
2009 static int gfx_v8_0_sw_init(void *handle)
2010 {
2011         int i, j, k, r, ring_id;
2012         struct amdgpu_ring *ring;
2013         struct amdgpu_kiq *kiq;
2014         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2015
2016         switch (adev->asic_type) {
2017         case CHIP_TONGA:
2018         case CHIP_CARRIZO:
2019         case CHIP_FIJI:
2020         case CHIP_POLARIS10:
2021         case CHIP_POLARIS11:
2022         case CHIP_POLARIS12:
2023         case CHIP_VEGAM:
2024                 adev->gfx.mec.num_mec = 2;
2025                 break;
2026         case CHIP_TOPAZ:
2027         case CHIP_STONEY:
2028         default:
2029                 adev->gfx.mec.num_mec = 1;
2030                 break;
2031         }
2032
2033         adev->gfx.mec.num_pipe_per_mec = 4;
2034         adev->gfx.mec.num_queue_per_pipe = 8;
2035
2036         /* KIQ event */
2037         r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 178, &adev->gfx.kiq.irq);
2038         if (r)
2039                 return r;
2040
2041         /* EOP Event */
2042         r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 181, &adev->gfx.eop_irq);
2043         if (r)
2044                 return r;
2045
2046         /* Privileged reg */
2047         r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 184,
2048                               &adev->gfx.priv_reg_irq);
2049         if (r)
2050                 return r;
2051
2052         /* Privileged inst */
2053         r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 185,
2054                               &adev->gfx.priv_inst_irq);
2055         if (r)
2056                 return r;
2057
2058         /* Add CP EDC/ECC irq  */
2059         r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 197,
2060                               &adev->gfx.cp_ecc_error_irq);
2061         if (r)
2062                 return r;
2063
2064         /* SQ interrupts. */
2065         r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 239,
2066                               &adev->gfx.sq_irq);
2067         if (r) {
2068                 DRM_ERROR("amdgpu_irq_add() for SQ failed: %d\n", r);
2069                 return r;
2070         }
2071
2072         adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
2073
2074         gfx_v8_0_scratch_init(adev);
2075
2076         r = gfx_v8_0_init_microcode(adev);
2077         if (r) {
2078                 DRM_ERROR("Failed to load gfx firmware!\n");
2079                 return r;
2080         }
2081
2082         r = gfx_v8_0_rlc_init(adev);
2083         if (r) {
2084                 DRM_ERROR("Failed to init rlc BOs!\n");
2085                 return r;
2086         }
2087
2088         r = gfx_v8_0_mec_init(adev);
2089         if (r) {
2090                 DRM_ERROR("Failed to init MEC BOs!\n");
2091                 return r;
2092         }
2093
2094         /* set up the gfx ring */
2095         for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
2096                 ring = &adev->gfx.gfx_ring[i];
2097                 ring->ring_obj = NULL;
2098                 sprintf(ring->name, "gfx");
2099                 /* no gfx doorbells on iceland */
2100                 if (adev->asic_type != CHIP_TOPAZ) {
2101                         ring->use_doorbell = true;
2102                         ring->doorbell_index = AMDGPU_DOORBELL_GFX_RING0;
2103                 }
2104
2105                 r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
2106                                      AMDGPU_CP_IRQ_GFX_EOP);
2107                 if (r)
2108                         return r;
2109         }
2110
2111
2112         /* set up the compute queues - allocate horizontally across pipes */
2113         ring_id = 0;
2114         for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
2115                 for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
2116                         for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
2117                                 if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, j))
2118                                         continue;
2119
2120                                 r = gfx_v8_0_compute_ring_init(adev,
2121                                                                 ring_id,
2122                                                                 i, k, j);
2123                                 if (r)
2124                                         return r;
2125
2126                                 ring_id++;
2127                         }
2128                 }
2129         }
2130
2131         r = amdgpu_gfx_kiq_init(adev, GFX8_MEC_HPD_SIZE);
2132         if (r) {
2133                 DRM_ERROR("Failed to init KIQ BOs!\n");
2134                 return r;
2135         }
2136
2137         kiq = &adev->gfx.kiq;
2138         r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
2139         if (r)
2140                 return r;
2141
2142         /* create MQD for all compute queues as well as KIQ for SRIOV case */
2143         r = amdgpu_gfx_compute_mqd_sw_init(adev, sizeof(struct vi_mqd_allocation));
2144         if (r)
2145                 return r;
2146
2147         /* reserve GDS, GWS and OA resource for gfx */
2148         r = amdgpu_bo_create_kernel(adev, adev->gds.mem.gfx_partition_size,
2149                                     PAGE_SIZE, AMDGPU_GEM_DOMAIN_GDS,
2150                                     &adev->gds.gds_gfx_bo, NULL, NULL);
2151         if (r)
2152                 return r;
2153
2154         r = amdgpu_bo_create_kernel(adev, adev->gds.gws.gfx_partition_size,
2155                                     PAGE_SIZE, AMDGPU_GEM_DOMAIN_GWS,
2156                                     &adev->gds.gws_gfx_bo, NULL, NULL);
2157         if (r)
2158                 return r;
2159
2160         r = amdgpu_bo_create_kernel(adev, adev->gds.oa.gfx_partition_size,
2161                                     PAGE_SIZE, AMDGPU_GEM_DOMAIN_OA,
2162                                     &adev->gds.oa_gfx_bo, NULL, NULL);
2163         if (r)
2164                 return r;
2165
2166         adev->gfx.ce_ram_size = 0x8000;
2167
2168         r = gfx_v8_0_gpu_early_init(adev);
2169         if (r)
2170                 return r;
2171
2172         return 0;
2173 }
2174
2175 static int gfx_v8_0_sw_fini(void *handle)
2176 {
2177         int i;
2178         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2179
2180         amdgpu_bo_free_kernel(&adev->gds.oa_gfx_bo, NULL, NULL);
2181         amdgpu_bo_free_kernel(&adev->gds.gws_gfx_bo, NULL, NULL);
2182         amdgpu_bo_free_kernel(&adev->gds.gds_gfx_bo, NULL, NULL);
2183
2184         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2185                 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
2186         for (i = 0; i < adev->gfx.num_compute_rings; i++)
2187                 amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
2188
2189         amdgpu_gfx_compute_mqd_sw_fini(adev);
2190         amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq);
2191         amdgpu_gfx_kiq_fini(adev);
2192
2193         gfx_v8_0_mec_fini(adev);
2194         gfx_v8_0_rlc_fini(adev);
2195         amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj,
2196                                 &adev->gfx.rlc.clear_state_gpu_addr,
2197                                 (void **)&adev->gfx.rlc.cs_ptr);
2198         if ((adev->asic_type == CHIP_CARRIZO) ||
2199             (adev->asic_type == CHIP_STONEY)) {
2200                 amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj,
2201                                 &adev->gfx.rlc.cp_table_gpu_addr,
2202                                 (void **)&adev->gfx.rlc.cp_table_ptr);
2203         }
2204         gfx_v8_0_free_microcode(adev);
2205
2206         return 0;
2207 }
2208
2209 static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev)
2210 {
2211         uint32_t *modearray, *mod2array;
2212         const u32 num_tile_mode_states = ARRAY_SIZE(adev->gfx.config.tile_mode_array);
2213         const u32 num_secondary_tile_mode_states = ARRAY_SIZE(adev->gfx.config.macrotile_mode_array);
2214         u32 reg_offset;
2215
2216         modearray = adev->gfx.config.tile_mode_array;
2217         mod2array = adev->gfx.config.macrotile_mode_array;
2218
2219         for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2220                 modearray[reg_offset] = 0;
2221
2222         for (reg_offset = 0; reg_offset <  num_secondary_tile_mode_states; reg_offset++)
2223                 mod2array[reg_offset] = 0;
2224
2225         switch (adev->asic_type) {
2226         case CHIP_TOPAZ:
2227                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2228                                 PIPE_CONFIG(ADDR_SURF_P2) |
2229                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2230                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2231                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2232                                 PIPE_CONFIG(ADDR_SURF_P2) |
2233                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2234                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2235                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2236                                 PIPE_CONFIG(ADDR_SURF_P2) |
2237                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2238                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2239                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2240                                 PIPE_CONFIG(ADDR_SURF_P2) |
2241                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2242                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2243                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2244                                 PIPE_CONFIG(ADDR_SURF_P2) |
2245                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2246                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2247                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2248                                 PIPE_CONFIG(ADDR_SURF_P2) |
2249                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2250                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2251                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2252                                 PIPE_CONFIG(ADDR_SURF_P2) |
2253                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2254                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2255                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2256                                 PIPE_CONFIG(ADDR_SURF_P2));
2257                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2258                                 PIPE_CONFIG(ADDR_SURF_P2) |
2259                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2260                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2261                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2262                                  PIPE_CONFIG(ADDR_SURF_P2) |
2263                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2264                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2265                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2266                                  PIPE_CONFIG(ADDR_SURF_P2) |
2267                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2268                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2269                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2270                                  PIPE_CONFIG(ADDR_SURF_P2) |
2271                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2272                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2273                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2274                                  PIPE_CONFIG(ADDR_SURF_P2) |
2275                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2276                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2277                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2278                                  PIPE_CONFIG(ADDR_SURF_P2) |
2279                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2280                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2281                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2282                                  PIPE_CONFIG(ADDR_SURF_P2) |
2283                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2284                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2285                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2286                                  PIPE_CONFIG(ADDR_SURF_P2) |
2287                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2288                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2289                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2290                                  PIPE_CONFIG(ADDR_SURF_P2) |
2291                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2292                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2293                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2294                                  PIPE_CONFIG(ADDR_SURF_P2) |
2295                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2296                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2297                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2298                                  PIPE_CONFIG(ADDR_SURF_P2) |
2299                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2300                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2301                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2302                                  PIPE_CONFIG(ADDR_SURF_P2) |
2303                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2304                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2305                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2306                                  PIPE_CONFIG(ADDR_SURF_P2) |
2307                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2308                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2309                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2310                                  PIPE_CONFIG(ADDR_SURF_P2) |
2311                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2312                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2313                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2314                                  PIPE_CONFIG(ADDR_SURF_P2) |
2315                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2316                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2317                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2318                                  PIPE_CONFIG(ADDR_SURF_P2) |
2319                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2320                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2321                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2322                                  PIPE_CONFIG(ADDR_SURF_P2) |
2323                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2324                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2325                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2326                                  PIPE_CONFIG(ADDR_SURF_P2) |
2327                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2328                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2329
2330                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2331                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2332                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2333                                 NUM_BANKS(ADDR_SURF_8_BANK));
2334                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2335                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2336                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2337                                 NUM_BANKS(ADDR_SURF_8_BANK));
2338                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2339                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2340                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2341                                 NUM_BANKS(ADDR_SURF_8_BANK));
2342                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2343                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2344                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2345                                 NUM_BANKS(ADDR_SURF_8_BANK));
2346                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2347                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2348                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2349                                 NUM_BANKS(ADDR_SURF_8_BANK));
2350                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2351                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2352                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2353                                 NUM_BANKS(ADDR_SURF_8_BANK));
2354                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2355                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2356                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2357                                 NUM_BANKS(ADDR_SURF_8_BANK));
2358                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2359                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2360                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2361                                 NUM_BANKS(ADDR_SURF_16_BANK));
2362                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2363                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2364                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2365                                 NUM_BANKS(ADDR_SURF_16_BANK));
2366                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2367                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2368                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2369                                  NUM_BANKS(ADDR_SURF_16_BANK));
2370                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2371                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2372                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2373                                  NUM_BANKS(ADDR_SURF_16_BANK));
2374                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2375                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2376                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2377                                  NUM_BANKS(ADDR_SURF_16_BANK));
2378                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2379                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2380                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2381                                  NUM_BANKS(ADDR_SURF_16_BANK));
2382                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2383                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2384                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2385                                  NUM_BANKS(ADDR_SURF_8_BANK));
2386
2387                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2388                         if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
2389                             reg_offset != 23)
2390                                 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2391
2392                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2393                         if (reg_offset != 7)
2394                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2395
2396                 break;
2397         case CHIP_FIJI:
2398         case CHIP_VEGAM:
2399                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2400                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2401                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2402                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2403                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2404                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2405                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2406                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2407                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2408                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2409                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2410                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2411                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2412                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2413                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2414                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2415                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2416                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2417                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2418                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2419                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2420                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2421                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2422                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2423                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2424                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2425                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2426                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2427                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2428                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2429                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2430                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2431                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2432                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2433                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2434                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2435                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2436                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2437                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2438                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2439                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2440                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2441                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2442                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2443                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2444                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2445                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2446                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2447                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2448                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2449                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2450                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2451                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2452                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2453                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2454                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2455                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2456                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2457                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2458                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2459                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2460                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2461                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2462                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2463                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2464                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2465                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2466                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2467                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2468                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2469                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2470                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2471                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2472                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2473                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2474                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2475                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2476                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2477                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2478                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2479                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2480                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2481                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2482                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2483                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2484                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2485                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2486                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2487                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2488                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2489                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2490                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2491                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2492                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2493                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2494                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2495                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2496                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2497                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2498                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2499                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2500                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2501                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2502                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2503                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2504                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2505                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2506                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2507                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2508                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2509                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2510                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2511                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2512                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2513                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2514                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2515                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2516                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2517                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2518                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2519                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2520                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2521
2522                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2523                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2524                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2525                                 NUM_BANKS(ADDR_SURF_8_BANK));
2526                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2527                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2528                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2529                                 NUM_BANKS(ADDR_SURF_8_BANK));
2530                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2531                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2532                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2533                                 NUM_BANKS(ADDR_SURF_8_BANK));
2534                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2535                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2536                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2537                                 NUM_BANKS(ADDR_SURF_8_BANK));
2538                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2539                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2540                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2541                                 NUM_BANKS(ADDR_SURF_8_BANK));
2542                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2543                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2544                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2545                                 NUM_BANKS(ADDR_SURF_8_BANK));
2546                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2547                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2548                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2549                                 NUM_BANKS(ADDR_SURF_8_BANK));
2550                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2551                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2552                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2553                                 NUM_BANKS(ADDR_SURF_8_BANK));
2554                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2555                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2556                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2557                                 NUM_BANKS(ADDR_SURF_8_BANK));
2558                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2559                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2560                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2561                                  NUM_BANKS(ADDR_SURF_8_BANK));
2562                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2563                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2564                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2565                                  NUM_BANKS(ADDR_SURF_8_BANK));
2566                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2567                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2568                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2569                                  NUM_BANKS(ADDR_SURF_8_BANK));
2570                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2571                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2572                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2573                                  NUM_BANKS(ADDR_SURF_8_BANK));
2574                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2575                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2576                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2577                                  NUM_BANKS(ADDR_SURF_4_BANK));
2578
2579                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2580                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2581
2582                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2583                         if (reg_offset != 7)
2584                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2585
2586                 break;
2587         case CHIP_TONGA:
2588                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2589                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2590                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2591                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2592                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2593                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2594                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2595                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2596                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2597                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2598                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2599                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2600                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2601                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2602                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2603                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2604                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2605                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2606                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2607                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2608                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2609                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2610                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2611                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2612                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2613                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2614                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2615                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2616                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2617                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2618                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2619                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2620                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2621                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2622                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2623                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2624                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2625                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2626                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2627                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2628                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2629                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2630                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2631                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2632                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2633                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2634                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2635                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2636                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2637                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2638                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2639                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2640                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2641                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2642                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2643                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2644                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2645                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2646                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2647                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2648                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2649                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2650                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2651                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2652                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2653                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2654                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2655                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2656                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2657                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2658                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2659                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2660                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2661                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2662                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2663                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2664                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2665                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2666                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2667                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2668                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2669                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2670                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2671                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2672                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2673                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2674                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2675                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2676                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2677                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2678                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2679                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2680                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2681                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2682                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2683                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2684                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2685                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2686                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2687                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2688                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2689                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2690                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2691                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2692                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2693                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2694                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2695                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2696                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2697                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2698                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2699                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2700                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2701                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2702                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2703                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2704                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2705                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2706                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2707                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2708                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2709                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2710
2711                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2712                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2713                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2714                                 NUM_BANKS(ADDR_SURF_16_BANK));
2715                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2716                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2717                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2718                                 NUM_BANKS(ADDR_SURF_16_BANK));
2719                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2720                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2721                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2722                                 NUM_BANKS(ADDR_SURF_16_BANK));
2723                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2724                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2725                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2726                                 NUM_BANKS(ADDR_SURF_16_BANK));
2727                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2728                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2729                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2730                                 NUM_BANKS(ADDR_SURF_16_BANK));
2731                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2732                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2733                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2734                                 NUM_BANKS(ADDR_SURF_16_BANK));
2735                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2736                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2737                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2738                                 NUM_BANKS(ADDR_SURF_16_BANK));
2739                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2740                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2741                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2742                                 NUM_BANKS(ADDR_SURF_16_BANK));
2743                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2744                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2745                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2746                                 NUM_BANKS(ADDR_SURF_16_BANK));
2747                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2748                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2749                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2750                                  NUM_BANKS(ADDR_SURF_16_BANK));
2751                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2752                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2753                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2754                                  NUM_BANKS(ADDR_SURF_16_BANK));
2755                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2756                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2757                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2758                                  NUM_BANKS(ADDR_SURF_8_BANK));
2759                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2760                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2761                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2762                                  NUM_BANKS(ADDR_SURF_4_BANK));
2763                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2764                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2765                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2766                                  NUM_BANKS(ADDR_SURF_4_BANK));
2767
2768                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2769                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2770
2771                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2772                         if (reg_offset != 7)
2773                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2774
2775                 break;
2776         case CHIP_POLARIS11:
2777         case CHIP_POLARIS12:
2778                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2779                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2780                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2781                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2782                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2783                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2784                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2785                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2786                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2787                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2788                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2789                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2790                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2791                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2792                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2793                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2794                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2795                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2796                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2797                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2798                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2799                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2800                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2801                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2802                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2803                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2804                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2805                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2806                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2807                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2808                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2809                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2810                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2811                                 PIPE_CONFIG(ADDR_SURF_P4_16x16));
2812                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2813                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2814                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2815                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2816                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2817                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2818                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2819                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2820                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2821                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2822                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2823                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2824                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2825                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2826                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2827                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2828                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2829                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2830                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2831                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2832                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2833                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2834                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2835                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2836                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2837                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2838                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2839                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2840                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2841                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2842                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2843                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2844                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2845                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2846                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2847                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2848                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2849                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2850                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2851                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2852                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2853                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2854                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2855                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2856                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2857                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2858                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2859                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2860                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2861                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2862                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2863                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2864                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2865                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2866                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2867                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2868                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2869                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2870                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2871                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2872                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2873                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2874                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2875                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2876                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2877                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2878                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2879                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2880                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2881                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2882                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2883                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2884                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2885                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2886                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2887                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2888                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2889                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2890                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2891                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2892                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2893                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2894                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2895                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2896                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2897                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2898                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2899                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2900
2901                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2902                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2903                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2904                                 NUM_BANKS(ADDR_SURF_16_BANK));
2905
2906                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2907                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2908                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2909                                 NUM_BANKS(ADDR_SURF_16_BANK));
2910
2911                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2912                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2913                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2914                                 NUM_BANKS(ADDR_SURF_16_BANK));
2915
2916                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2917                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2918                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2919                                 NUM_BANKS(ADDR_SURF_16_BANK));
2920
2921                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2922                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2923                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2924                                 NUM_BANKS(ADDR_SURF_16_BANK));
2925
2926                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2927                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2928                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2929                                 NUM_BANKS(ADDR_SURF_16_BANK));
2930
2931                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2932                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2933                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2934                                 NUM_BANKS(ADDR_SURF_16_BANK));
2935
2936                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2937                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2938                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2939                                 NUM_BANKS(ADDR_SURF_16_BANK));
2940
2941                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2942                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2943                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2944                                 NUM_BANKS(ADDR_SURF_16_BANK));
2945
2946                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2947                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2948                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2949                                 NUM_BANKS(ADDR_SURF_16_BANK));
2950
2951                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2952                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2953                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2954                                 NUM_BANKS(ADDR_SURF_16_BANK));
2955
2956                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2957                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2958                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2959                                 NUM_BANKS(ADDR_SURF_16_BANK));
2960
2961                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2962                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2963                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2964                                 NUM_BANKS(ADDR_SURF_8_BANK));
2965
2966                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2967                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2968                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2969                                 NUM_BANKS(ADDR_SURF_4_BANK));
2970
2971                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2972                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2973
2974                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2975                         if (reg_offset != 7)
2976                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2977
2978                 break;
2979         case CHIP_POLARIS10:
2980                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2981                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2982                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2983                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2984                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2985                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2986                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2987                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2988                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2989                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2990                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2991                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2992                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2993                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2994                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2995                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2996                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2997                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2998                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2999                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3000                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3001                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3002                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3003                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3004                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3005                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3006                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3007                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3008                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3009                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3010                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3011                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3012                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3013                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
3014                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3015                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3016                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3017                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3018                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3019                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3020                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3021                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3022                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3023                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3024                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3025                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3026                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3027                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3028                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3029                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3030                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3031                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3032                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3033                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3034                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3035                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3036                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3037                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3038                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3039                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3040                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3041                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3042                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3043                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3044                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3045                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3046                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3047                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3048                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3049                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3050                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3051                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3052                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3053                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3054                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3055                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3056                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3057                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3058                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3059                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3060                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3061                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3062                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3063                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3064                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3065                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3066                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3067                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3068                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3069                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3070                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3071                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3072                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3073                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3074                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3075                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3076                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3077                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3078                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3079                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3080                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3081                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3082                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3083                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3084                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3085                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3086                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3087                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3088                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3089                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3090                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3091                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3092                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3093                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3094                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3095                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3096                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3097                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3098                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3099                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3100                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3101                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3102
3103                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3104                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3105                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3106                                 NUM_BANKS(ADDR_SURF_16_BANK));
3107
3108                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3109                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3110                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3111                                 NUM_BANKS(ADDR_SURF_16_BANK));
3112
3113                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3114                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3115                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3116                                 NUM_BANKS(ADDR_SURF_16_BANK));
3117
3118                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3119                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3120                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3121                                 NUM_BANKS(ADDR_SURF_16_BANK));
3122
3123                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3124                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3125                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3126                                 NUM_BANKS(ADDR_SURF_16_BANK));
3127
3128                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3129                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3130                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3131                                 NUM_BANKS(ADDR_SURF_16_BANK));
3132
3133                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3134                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3135                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3136                                 NUM_BANKS(ADDR_SURF_16_BANK));
3137
3138                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3139                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3140                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3141                                 NUM_BANKS(ADDR_SURF_16_BANK));
3142
3143                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3144                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3145                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3146                                 NUM_BANKS(ADDR_SURF_16_BANK));
3147
3148                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3149                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3150                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3151                                 NUM_BANKS(ADDR_SURF_16_BANK));
3152
3153                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3154                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3155                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3156                                 NUM_BANKS(ADDR_SURF_16_BANK));
3157
3158                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3159                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3160                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3161                                 NUM_BANKS(ADDR_SURF_8_BANK));
3162
3163                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3164                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3165                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3166                                 NUM_BANKS(ADDR_SURF_4_BANK));
3167
3168                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3169                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3170                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3171                                 NUM_BANKS(ADDR_SURF_4_BANK));
3172
3173                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3174                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3175
3176                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3177                         if (reg_offset != 7)
3178                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3179
3180                 break;
3181         case CHIP_STONEY:
3182                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3183                                 PIPE_CONFIG(ADDR_SURF_P2) |
3184                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3185                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3186                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3187                                 PIPE_CONFIG(ADDR_SURF_P2) |
3188                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3189                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3190                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3191                                 PIPE_CONFIG(ADDR_SURF_P2) |
3192                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3193                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3194                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3195                                 PIPE_CONFIG(ADDR_SURF_P2) |
3196                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3197                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3198                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3199                                 PIPE_CONFIG(ADDR_SURF_P2) |
3200                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3201                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3202                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3203                                 PIPE_CONFIG(ADDR_SURF_P2) |
3204                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3205                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3206                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3207                                 PIPE_CONFIG(ADDR_SURF_P2) |
3208                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3209                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3210                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3211                                 PIPE_CONFIG(ADDR_SURF_P2));
3212                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3213                                 PIPE_CONFIG(ADDR_SURF_P2) |
3214                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3215                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3216                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3217                                  PIPE_CONFIG(ADDR_SURF_P2) |
3218                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3219                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3220                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3221                                  PIPE_CONFIG(ADDR_SURF_P2) |
3222                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3223                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3224                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3225                                  PIPE_CONFIG(ADDR_SURF_P2) |
3226                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3227                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3228                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3229                                  PIPE_CONFIG(ADDR_SURF_P2) |
3230                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3231                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3232                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3233                                  PIPE_CONFIG(ADDR_SURF_P2) |
3234                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3235                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3236                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3237                                  PIPE_CONFIG(ADDR_SURF_P2) |
3238                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3239                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3240                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3241                                  PIPE_CONFIG(ADDR_SURF_P2) |
3242                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3243                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3244                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3245                                  PIPE_CONFIG(ADDR_SURF_P2) |
3246                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3247                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3248                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3249                                  PIPE_CONFIG(ADDR_SURF_P2) |
3250                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3251                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3252                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3253                                  PIPE_CONFIG(ADDR_SURF_P2) |
3254                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3255                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3256                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3257                                  PIPE_CONFIG(ADDR_SURF_P2) |
3258                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3259                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3260                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3261                                  PIPE_CONFIG(ADDR_SURF_P2) |
3262                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3263                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3264                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3265                                  PIPE_CONFIG(ADDR_SURF_P2) |
3266                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3267                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3268                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3269                                  PIPE_CONFIG(ADDR_SURF_P2) |
3270                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3271                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3272                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3273                                  PIPE_CONFIG(ADDR_SURF_P2) |
3274                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3275                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3276                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3277                                  PIPE_CONFIG(ADDR_SURF_P2) |
3278                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3279                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3280                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3281                                  PIPE_CONFIG(ADDR_SURF_P2) |
3282                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3283                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3284
3285                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3286                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3287                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3288                                 NUM_BANKS(ADDR_SURF_8_BANK));
3289                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3290                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3291                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3292                                 NUM_BANKS(ADDR_SURF_8_BANK));
3293                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3294                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3295                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3296                                 NUM_BANKS(ADDR_SURF_8_BANK));
3297                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3298                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3299                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3300                                 NUM_BANKS(ADDR_SURF_8_BANK));
3301                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3302                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3303                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3304                                 NUM_BANKS(ADDR_SURF_8_BANK));
3305                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3306                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3307                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3308                                 NUM_BANKS(ADDR_SURF_8_BANK));
3309                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3310                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3311                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3312                                 NUM_BANKS(ADDR_SURF_8_BANK));
3313                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3314                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3315                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3316                                 NUM_BANKS(ADDR_SURF_16_BANK));
3317                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3318                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3319                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3320                                 NUM_BANKS(ADDR_SURF_16_BANK));
3321                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3322                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3323                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3324                                  NUM_BANKS(ADDR_SURF_16_BANK));
3325                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3326                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3327                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3328                                  NUM_BANKS(ADDR_SURF_16_BANK));
3329                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3330                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3331                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3332                                  NUM_BANKS(ADDR_SURF_16_BANK));
3333                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3334                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3335                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3336                                  NUM_BANKS(ADDR_SURF_16_BANK));
3337                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3338                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3339                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3340                                  NUM_BANKS(ADDR_SURF_8_BANK));
3341
3342                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3343                         if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3344                             reg_offset != 23)
3345                                 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3346
3347                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3348                         if (reg_offset != 7)
3349                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3350
3351                 break;
3352         default:
3353                 dev_warn(adev->dev,
3354                          "Unknown chip type (%d) in function gfx_v8_0_tiling_mode_table_init() falling through to CHIP_CARRIZO\n",
3355                          adev->asic_type);
3356
3357         case CHIP_CARRIZO:
3358                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3359                                 PIPE_CONFIG(ADDR_SURF_P2) |
3360                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3361                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3362                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3363                                 PIPE_CONFIG(ADDR_SURF_P2) |
3364                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3365                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3366                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3367                                 PIPE_CONFIG(ADDR_SURF_P2) |
3368                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3369                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3370                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3371                                 PIPE_CONFIG(ADDR_SURF_P2) |
3372                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3373                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3374                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3375                                 PIPE_CONFIG(ADDR_SURF_P2) |
3376                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3377                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3378                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3379                                 PIPE_CONFIG(ADDR_SURF_P2) |
3380                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3381                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3382                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3383                                 PIPE_CONFIG(ADDR_SURF_P2) |
3384                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3385                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3386                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3387                                 PIPE_CONFIG(ADDR_SURF_P2));
3388                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3389                                 PIPE_CONFIG(ADDR_SURF_P2) |
3390                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3391                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3392                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3393                                  PIPE_CONFIG(ADDR_SURF_P2) |
3394                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3395                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3396                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3397                                  PIPE_CONFIG(ADDR_SURF_P2) |
3398                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3399                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3400                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3401                                  PIPE_CONFIG(ADDR_SURF_P2) |
3402                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3403                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3404                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3405                                  PIPE_CONFIG(ADDR_SURF_P2) |
3406                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3407                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3408                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3409                                  PIPE_CONFIG(ADDR_SURF_P2) |
3410                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3411                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3412                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3413                                  PIPE_CONFIG(ADDR_SURF_P2) |
3414                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3415                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3416                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3417                                  PIPE_CONFIG(ADDR_SURF_P2) |
3418                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3419                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3420                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3421                                  PIPE_CONFIG(ADDR_SURF_P2) |
3422                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3423                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3424                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3425                                  PIPE_CONFIG(ADDR_SURF_P2) |
3426                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3427                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3428                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3429                                  PIPE_CONFIG(ADDR_SURF_P2) |
3430                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3431                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3432                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3433                                  PIPE_CONFIG(ADDR_SURF_P2) |
3434                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3435                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3436                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3437                                  PIPE_CONFIG(ADDR_SURF_P2) |
3438                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3439                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3440                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3441                                  PIPE_CONFIG(ADDR_SURF_P2) |
3442                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3443                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3444                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3445                                  PIPE_CONFIG(ADDR_SURF_P2) |
3446                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3447                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3448                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3449                                  PIPE_CONFIG(ADDR_SURF_P2) |
3450                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3451                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3452                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3453                                  PIPE_CONFIG(ADDR_SURF_P2) |
3454                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3455                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3456                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3457                                  PIPE_CONFIG(ADDR_SURF_P2) |
3458                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3459                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3460
3461                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3462                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3463                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3464                                 NUM_BANKS(ADDR_SURF_8_BANK));
3465                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3466                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3467                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3468                                 NUM_BANKS(ADDR_SURF_8_BANK));
3469                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3470                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3471                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3472                                 NUM_BANKS(ADDR_SURF_8_BANK));
3473                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3474                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3475                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3476                                 NUM_BANKS(ADDR_SURF_8_BANK));
3477                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3478                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3479                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3480                                 NUM_BANKS(ADDR_SURF_8_BANK));
3481                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3482                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3483                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3484                                 NUM_BANKS(ADDR_SURF_8_BANK));
3485                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3486                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3487                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3488                                 NUM_BANKS(ADDR_SURF_8_BANK));
3489                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3490                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3491                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3492                                 NUM_BANKS(ADDR_SURF_16_BANK));
3493                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3494                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3495                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3496                                 NUM_BANKS(ADDR_SURF_16_BANK));
3497                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3498                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3499                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3500                                  NUM_BANKS(ADDR_SURF_16_BANK));
3501                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3502                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3503                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3504                                  NUM_BANKS(ADDR_SURF_16_BANK));
3505                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3506                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3507                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3508                                  NUM_BANKS(ADDR_SURF_16_BANK));
3509                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3510                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3511                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3512                                  NUM_BANKS(ADDR_SURF_16_BANK));
3513                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3514                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3515                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3516                                  NUM_BANKS(ADDR_SURF_8_BANK));
3517
3518                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3519                         if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3520                             reg_offset != 23)
3521                                 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3522
3523                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3524                         if (reg_offset != 7)
3525                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3526
3527                 break;
3528         }
3529 }
3530
3531 static void gfx_v8_0_select_se_sh(struct amdgpu_device *adev,
3532                                   u32 se_num, u32 sh_num, u32 instance)
3533 {
3534         u32 data;
3535
3536         if (instance == 0xffffffff)
3537                 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
3538         else
3539                 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
3540
3541         if (se_num == 0xffffffff)
3542                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
3543         else
3544                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
3545
3546         if (sh_num == 0xffffffff)
3547                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
3548         else
3549                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
3550
3551         WREG32(mmGRBM_GFX_INDEX, data);
3552 }
3553
3554 static void gfx_v8_0_select_me_pipe_q(struct amdgpu_device *adev,
3555                                   u32 me, u32 pipe, u32 q)
3556 {
3557         vi_srbm_select(adev, me, pipe, q, 0);
3558 }
3559
3560 static u32 gfx_v8_0_get_rb_active_bitmap(struct amdgpu_device *adev)
3561 {
3562         u32 data, mask;
3563
3564         data =  RREG32(mmCC_RB_BACKEND_DISABLE) |
3565                 RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3566
3567         data = REG_GET_FIELD(data, GC_USER_RB_BACKEND_DISABLE, BACKEND_DISABLE);
3568
3569         mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se /
3570                                          adev->gfx.config.max_sh_per_se);
3571
3572         return (~data) & mask;
3573 }
3574
3575 static void
3576 gfx_v8_0_raster_config(struct amdgpu_device *adev, u32 *rconf, u32 *rconf1)
3577 {
3578         switch (adev->asic_type) {
3579         case CHIP_FIJI:
3580         case CHIP_VEGAM:
3581                 *rconf |= RB_MAP_PKR0(2) | RB_MAP_PKR1(2) |
3582                           RB_XSEL2(1) | PKR_MAP(2) |
3583                           PKR_XSEL(1) | PKR_YSEL(1) |
3584                           SE_MAP(2) | SE_XSEL(2) | SE_YSEL(3);
3585                 *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(3) |
3586                            SE_PAIR_YSEL(2);
3587                 break;
3588         case CHIP_TONGA:
3589         case CHIP_POLARIS10:
3590                 *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3591                           SE_XSEL(1) | SE_YSEL(1);
3592                 *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(2) |
3593                            SE_PAIR_YSEL(2);
3594                 break;
3595         case CHIP_TOPAZ:
3596         case CHIP_CARRIZO:
3597                 *rconf |= RB_MAP_PKR0(2);
3598                 *rconf1 |= 0x0;
3599                 break;
3600         case CHIP_POLARIS11:
3601         case CHIP_POLARIS12:
3602                 *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3603                           SE_XSEL(1) | SE_YSEL(1);
3604                 *rconf1 |= 0x0;
3605                 break;
3606         case CHIP_STONEY:
3607                 *rconf |= 0x0;
3608                 *rconf1 |= 0x0;
3609                 break;
3610         default:
3611                 DRM_ERROR("unknown asic: 0x%x\n", adev->asic_type);
3612                 break;
3613         }
3614 }
3615
3616 static void
3617 gfx_v8_0_write_harvested_raster_configs(struct amdgpu_device *adev,
3618                                         u32 raster_config, u32 raster_config_1,
3619                                         unsigned rb_mask, unsigned num_rb)
3620 {
3621         unsigned sh_per_se = max_t(unsigned, adev->gfx.config.max_sh_per_se, 1);
3622         unsigned num_se = max_t(unsigned, adev->gfx.config.max_shader_engines, 1);
3623         unsigned rb_per_pkr = min_t(unsigned, num_rb / num_se / sh_per_se, 2);
3624         unsigned rb_per_se = num_rb / num_se;
3625         unsigned se_mask[4];
3626         unsigned se;
3627
3628         se_mask[0] = ((1 << rb_per_se) - 1) & rb_mask;
3629         se_mask[1] = (se_mask[0] << rb_per_se) & rb_mask;
3630         se_mask[2] = (se_mask[1] << rb_per_se) & rb_mask;
3631         se_mask[3] = (se_mask[2] << rb_per_se) & rb_mask;
3632
3633         WARN_ON(!(num_se == 1 || num_se == 2 || num_se == 4));
3634         WARN_ON(!(sh_per_se == 1 || sh_per_se == 2));
3635         WARN_ON(!(rb_per_pkr == 1 || rb_per_pkr == 2));
3636
3637         if ((num_se > 2) && ((!se_mask[0] && !se_mask[1]) ||
3638                              (!se_mask[2] && !se_mask[3]))) {
3639                 raster_config_1 &= ~SE_PAIR_MAP_MASK;
3640
3641                 if (!se_mask[0] && !se_mask[1]) {
3642                         raster_config_1 |=
3643                                 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_3);
3644                 } else {
3645                         raster_config_1 |=
3646                                 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_0);
3647                 }
3648         }
3649
3650         for (se = 0; se < num_se; se++) {
3651                 unsigned raster_config_se = raster_config;
3652                 unsigned pkr0_mask = ((1 << rb_per_pkr) - 1) << (se * rb_per_se);
3653                 unsigned pkr1_mask = pkr0_mask << rb_per_pkr;
3654                 int idx = (se / 2) * 2;
3655
3656                 if ((num_se > 1) && (!se_mask[idx] || !se_mask[idx + 1])) {
3657                         raster_config_se &= ~SE_MAP_MASK;
3658
3659                         if (!se_mask[idx]) {
3660                                 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_3);
3661                         } else {
3662                                 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_0);
3663                         }
3664                 }
3665
3666                 pkr0_mask &= rb_mask;
3667                 pkr1_mask &= rb_mask;
3668                 if (rb_per_se > 2 && (!pkr0_mask || !pkr1_mask)) {
3669                         raster_config_se &= ~PKR_MAP_MASK;
3670
3671                         if (!pkr0_mask) {
3672                                 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_3);
3673                         } else {
3674                                 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_0);
3675                         }
3676                 }
3677
3678                 if (rb_per_se >= 2) {
3679                         unsigned rb0_mask = 1 << (se * rb_per_se);
3680                         unsigned rb1_mask = rb0_mask << 1;
3681
3682                         rb0_mask &= rb_mask;
3683                         rb1_mask &= rb_mask;
3684                         if (!rb0_mask || !rb1_mask) {
3685                                 raster_config_se &= ~RB_MAP_PKR0_MASK;
3686
3687                                 if (!rb0_mask) {
3688                                         raster_config_se |=
3689                                                 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_3);
3690                                 } else {
3691                                         raster_config_se |=
3692                                                 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_0);
3693                                 }
3694                         }
3695
3696                         if (rb_per_se > 2) {
3697                                 rb0_mask = 1 << (se * rb_per_se + rb_per_pkr);
3698                                 rb1_mask = rb0_mask << 1;
3699                                 rb0_mask &= rb_mask;
3700                                 rb1_mask &= rb_mask;
3701                                 if (!rb0_mask || !rb1_mask) {
3702                                         raster_config_se &= ~RB_MAP_PKR1_MASK;
3703
3704                                         if (!rb0_mask) {
3705                                                 raster_config_se |=
3706                                                         RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_3);
3707                                         } else {
3708                                                 raster_config_se |=
3709                                                         RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_0);
3710                                         }
3711                                 }
3712                         }
3713                 }
3714
3715                 /* GRBM_GFX_INDEX has a different offset on VI */
3716                 gfx_v8_0_select_se_sh(adev, se, 0xffffffff, 0xffffffff);
3717                 WREG32(mmPA_SC_RASTER_CONFIG, raster_config_se);
3718                 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3719         }
3720
3721         /* GRBM_GFX_INDEX has a different offset on VI */
3722         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3723 }
3724
3725 static void gfx_v8_0_setup_rb(struct amdgpu_device *adev)
3726 {
3727         int i, j;
3728         u32 data;
3729         u32 raster_config = 0, raster_config_1 = 0;
3730         u32 active_rbs = 0;
3731         u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
3732                                         adev->gfx.config.max_sh_per_se;
3733         unsigned num_rb_pipes;
3734
3735         mutex_lock(&adev->grbm_idx_mutex);
3736         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3737                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3738                         gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3739                         data = gfx_v8_0_get_rb_active_bitmap(adev);
3740                         active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
3741                                                rb_bitmap_width_per_sh);
3742                 }
3743         }
3744         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3745
3746         adev->gfx.config.backend_enable_mask = active_rbs;
3747         adev->gfx.config.num_rbs = hweight32(active_rbs);
3748
3749         num_rb_pipes = min_t(unsigned, adev->gfx.config.max_backends_per_se *
3750                              adev->gfx.config.max_shader_engines, 16);
3751
3752         gfx_v8_0_raster_config(adev, &raster_config, &raster_config_1);
3753
3754         if (!adev->gfx.config.backend_enable_mask ||
3755                         adev->gfx.config.num_rbs >= num_rb_pipes) {
3756                 WREG32(mmPA_SC_RASTER_CONFIG, raster_config);
3757                 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3758         } else {
3759                 gfx_v8_0_write_harvested_raster_configs(adev, raster_config, raster_config_1,
3760                                                         adev->gfx.config.backend_enable_mask,
3761                                                         num_rb_pipes);
3762         }
3763
3764         /* cache the values for userspace */
3765         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3766                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3767                         gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3768                         adev->gfx.config.rb_config[i][j].rb_backend_disable =
3769                                 RREG32(mmCC_RB_BACKEND_DISABLE);
3770                         adev->gfx.config.rb_config[i][j].user_rb_backend_disable =
3771                                 RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3772                         adev->gfx.config.rb_config[i][j].raster_config =
3773                                 RREG32(mmPA_SC_RASTER_CONFIG);
3774                         adev->gfx.config.rb_config[i][j].raster_config_1 =
3775                                 RREG32(mmPA_SC_RASTER_CONFIG_1);
3776                 }
3777         }
3778         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3779         mutex_unlock(&adev->grbm_idx_mutex);
3780 }
3781
3782 /**
3783  * gfx_v8_0_init_compute_vmid - gart enable
3784  *
3785  * @adev: amdgpu_device pointer
3786  *
3787  * Initialize compute vmid sh_mem registers
3788  *
3789  */
3790 #define DEFAULT_SH_MEM_BASES    (0x6000)
3791 #define FIRST_COMPUTE_VMID      (8)
3792 #define LAST_COMPUTE_VMID       (16)
3793 static void gfx_v8_0_init_compute_vmid(struct amdgpu_device *adev)
3794 {
3795         int i;
3796         uint32_t sh_mem_config;
3797         uint32_t sh_mem_bases;
3798
3799         /*
3800          * Configure apertures:
3801          * LDS:         0x60000000'00000000 - 0x60000001'00000000 (4GB)
3802          * Scratch:     0x60000001'00000000 - 0x60000002'00000000 (4GB)
3803          * GPUVM:       0x60010000'00000000 - 0x60020000'00000000 (1TB)
3804          */
3805         sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
3806
3807         sh_mem_config = SH_MEM_ADDRESS_MODE_HSA64 <<
3808                         SH_MEM_CONFIG__ADDRESS_MODE__SHIFT |
3809                         SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
3810                         SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT |
3811                         MTYPE_CC << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT |
3812                         SH_MEM_CONFIG__PRIVATE_ATC_MASK;
3813
3814         mutex_lock(&adev->srbm_mutex);
3815         for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
3816                 vi_srbm_select(adev, 0, 0, 0, i);
3817                 /* CP and shaders */
3818                 WREG32(mmSH_MEM_CONFIG, sh_mem_config);
3819                 WREG32(mmSH_MEM_APE1_BASE, 1);
3820                 WREG32(mmSH_MEM_APE1_LIMIT, 0);
3821                 WREG32(mmSH_MEM_BASES, sh_mem_bases);
3822         }
3823         vi_srbm_select(adev, 0, 0, 0, 0);
3824         mutex_unlock(&adev->srbm_mutex);
3825 }
3826
3827 static void gfx_v8_0_config_init(struct amdgpu_device *adev)
3828 {
3829         switch (adev->asic_type) {
3830         default:
3831                 adev->gfx.config.double_offchip_lds_buf = 1;
3832                 break;
3833         case CHIP_CARRIZO:
3834         case CHIP_STONEY:
3835                 adev->gfx.config.double_offchip_lds_buf = 0;
3836                 break;
3837         }
3838 }
3839
3840 static void gfx_v8_0_gpu_init(struct amdgpu_device *adev)
3841 {
3842         u32 tmp, sh_static_mem_cfg;
3843         int i;
3844
3845         WREG32_FIELD(GRBM_CNTL, READ_TIMEOUT, 0xFF);
3846         WREG32(mmGB_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3847         WREG32(mmHDP_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3848         WREG32(mmDMIF_ADDR_CALC, adev->gfx.config.gb_addr_config);
3849
3850         gfx_v8_0_tiling_mode_table_init(adev);
3851         gfx_v8_0_setup_rb(adev);
3852         gfx_v8_0_get_cu_info(adev);
3853         gfx_v8_0_config_init(adev);
3854
3855         /* XXX SH_MEM regs */
3856         /* where to put LDS, scratch, GPUVM in FSA64 space */
3857         sh_static_mem_cfg = REG_SET_FIELD(0, SH_STATIC_MEM_CONFIG,
3858                                    SWIZZLE_ENABLE, 1);
3859         sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG,
3860                                    ELEMENT_SIZE, 1);
3861         sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG,
3862                                    INDEX_STRIDE, 3);
3863         WREG32(mmSH_STATIC_MEM_CONFIG, sh_static_mem_cfg);
3864
3865         mutex_lock(&adev->srbm_mutex);
3866         for (i = 0; i < adev->vm_manager.id_mgr[0].num_ids; i++) {
3867                 vi_srbm_select(adev, 0, 0, 0, i);
3868                 /* CP and shaders */
3869                 if (i == 0) {
3870                         tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_UC);
3871                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
3872                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3873                                             SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3874                         WREG32(mmSH_MEM_CONFIG, tmp);
3875                         WREG32(mmSH_MEM_BASES, 0);
3876                 } else {
3877                         tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_NC);
3878                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
3879                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3880                                             SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3881                         WREG32(mmSH_MEM_CONFIG, tmp);
3882                         tmp = adev->gmc.shared_aperture_start >> 48;
3883                         WREG32(mmSH_MEM_BASES, tmp);
3884                 }
3885
3886                 WREG32(mmSH_MEM_APE1_BASE, 1);
3887                 WREG32(mmSH_MEM_APE1_LIMIT, 0);
3888         }
3889         vi_srbm_select(adev, 0, 0, 0, 0);
3890         mutex_unlock(&adev->srbm_mutex);
3891
3892         gfx_v8_0_init_compute_vmid(adev);
3893
3894         mutex_lock(&adev->grbm_idx_mutex);
3895         /*
3896          * making sure that the following register writes will be broadcasted
3897          * to all the shaders
3898          */
3899         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3900
3901         WREG32(mmPA_SC_FIFO_SIZE,
3902                    (adev->gfx.config.sc_prim_fifo_size_frontend <<
3903                         PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) |
3904                    (adev->gfx.config.sc_prim_fifo_size_backend <<
3905                         PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT) |
3906                    (adev->gfx.config.sc_hiz_tile_fifo_size <<
3907                         PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) |
3908                    (adev->gfx.config.sc_earlyz_tile_fifo_size <<
3909                         PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT));
3910
3911         tmp = RREG32(mmSPI_ARB_PRIORITY);
3912         tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS0, 2);
3913         tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS1, 2);
3914         tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS2, 2);
3915         tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS3, 2);
3916         WREG32(mmSPI_ARB_PRIORITY, tmp);
3917
3918         mutex_unlock(&adev->grbm_idx_mutex);
3919
3920 }
3921
3922 static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
3923 {
3924         u32 i, j, k;
3925         u32 mask;
3926
3927         mutex_lock(&adev->grbm_idx_mutex);
3928         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3929                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3930                         gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3931                         for (k = 0; k < adev->usec_timeout; k++) {
3932                                 if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0)
3933                                         break;
3934                                 udelay(1);
3935                         }
3936                         if (k == adev->usec_timeout) {
3937                                 gfx_v8_0_select_se_sh(adev, 0xffffffff,
3938                                                       0xffffffff, 0xffffffff);
3939                                 mutex_unlock(&adev->grbm_idx_mutex);
3940                                 DRM_INFO("Timeout wait for RLC serdes %u,%u\n",
3941                                          i, j);
3942                                 return;
3943                         }
3944                 }
3945         }
3946         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3947         mutex_unlock(&adev->grbm_idx_mutex);
3948
3949         mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
3950                 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
3951                 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
3952                 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
3953         for (k = 0; k < adev->usec_timeout; k++) {
3954                 if ((RREG32(mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
3955                         break;
3956                 udelay(1);
3957         }
3958 }
3959
3960 static void gfx_v8_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
3961                                                bool enable)
3962 {
3963         u32 tmp = RREG32(mmCP_INT_CNTL_RING0);
3964
3965         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
3966         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
3967         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
3968         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
3969
3970         WREG32(mmCP_INT_CNTL_RING0, tmp);
3971 }
3972
3973 static void gfx_v8_0_init_csb(struct amdgpu_device *adev)
3974 {
3975         /* csib */
3976         WREG32(mmRLC_CSIB_ADDR_HI,
3977                         adev->gfx.rlc.clear_state_gpu_addr >> 32);
3978         WREG32(mmRLC_CSIB_ADDR_LO,
3979                         adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
3980         WREG32(mmRLC_CSIB_LENGTH,
3981                         adev->gfx.rlc.clear_state_size);
3982 }
3983
3984 static void gfx_v8_0_parse_ind_reg_list(int *register_list_format,
3985                                 int ind_offset,
3986                                 int list_size,
3987                                 int *unique_indices,
3988                                 int *indices_count,
3989                                 int max_indices,
3990                                 int *ind_start_offsets,
3991                                 int *offset_count,
3992                                 int max_offset)
3993 {
3994         int indices;
3995         bool new_entry = true;
3996
3997         for (; ind_offset < list_size; ind_offset++) {
3998
3999                 if (new_entry) {
4000                         new_entry = false;
4001                         ind_start_offsets[*offset_count] = ind_offset;
4002                         *offset_count = *offset_count + 1;
4003                         BUG_ON(*offset_count >= max_offset);
4004                 }
4005
4006                 if (register_list_format[ind_offset] == 0xFFFFFFFF) {
4007                         new_entry = true;
4008                         continue;
4009                 }
4010
4011                 ind_offset += 2;
4012
4013                 /* look for the matching indice */
4014                 for (indices = 0;
4015                         indices < *indices_count;
4016                         indices++) {
4017                         if (unique_indices[indices] ==
4018                                 register_list_format[ind_offset])
4019                                 break;
4020                 }
4021
4022                 if (indices >= *indices_count) {
4023                         unique_indices[*indices_count] =
4024                                 register_list_format[ind_offset];
4025                         indices = *indices_count;
4026                         *indices_count = *indices_count + 1;
4027                         BUG_ON(*indices_count >= max_indices);
4028                 }
4029
4030                 register_list_format[ind_offset] = indices;
4031         }
4032 }
4033
4034 static int gfx_v8_0_init_save_restore_list(struct amdgpu_device *adev)
4035 {
4036         int i, temp, data;
4037         int unique_indices[] = {0, 0, 0, 0, 0, 0, 0, 0};
4038         int indices_count = 0;
4039         int indirect_start_offsets[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
4040         int offset_count = 0;
4041
4042         int list_size;
4043         unsigned int *register_list_format =
4044                 kmalloc(adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
4045         if (!register_list_format)
4046                 return -ENOMEM;
4047         memcpy(register_list_format, adev->gfx.rlc.register_list_format,
4048                         adev->gfx.rlc.reg_list_format_size_bytes);
4049
4050         gfx_v8_0_parse_ind_reg_list(register_list_format,
4051                                 RLC_FormatDirectRegListLength,
4052                                 adev->gfx.rlc.reg_list_format_size_bytes >> 2,
4053                                 unique_indices,
4054                                 &indices_count,
4055                                 ARRAY_SIZE(unique_indices),
4056                                 indirect_start_offsets,
4057                                 &offset_count,
4058                                 ARRAY_SIZE(indirect_start_offsets));
4059
4060         /* save and restore list */
4061         WREG32_FIELD(RLC_SRM_CNTL, AUTO_INCR_ADDR, 1);
4062
4063         WREG32(mmRLC_SRM_ARAM_ADDR, 0);
4064         for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
4065                 WREG32(mmRLC_SRM_ARAM_DATA, adev->gfx.rlc.register_restore[i]);
4066
4067         /* indirect list */
4068         WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_list_format_start);
4069         for (i = 0; i < adev->gfx.rlc.reg_list_format_size_bytes >> 2; i++)
4070                 WREG32(mmRLC_GPM_SCRATCH_DATA, register_list_format[i]);
4071
4072         list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
4073         list_size = list_size >> 1;
4074         WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_restore_list_size);
4075         WREG32(mmRLC_GPM_SCRATCH_DATA, list_size);
4076
4077         /* starting offsets starts */
4078         WREG32(mmRLC_GPM_SCRATCH_ADDR,
4079                 adev->gfx.rlc.starting_offsets_start);
4080         for (i = 0; i < ARRAY_SIZE(indirect_start_offsets); i++)
4081                 WREG32(mmRLC_GPM_SCRATCH_DATA,
4082                                 indirect_start_offsets[i]);
4083
4084         /* unique indices */
4085         temp = mmRLC_SRM_INDEX_CNTL_ADDR_0;
4086         data = mmRLC_SRM_INDEX_CNTL_DATA_0;
4087         for (i = 0; i < ARRAY_SIZE(unique_indices); i++) {
4088                 if (unique_indices[i] != 0) {
4089                         WREG32(temp + i, unique_indices[i] & 0x3FFFF);
4090                         WREG32(data + i, unique_indices[i] >> 20);
4091                 }
4092         }
4093         kfree(register_list_format);
4094
4095         return 0;
4096 }
4097
4098 static void gfx_v8_0_enable_save_restore_machine(struct amdgpu_device *adev)
4099 {
4100         WREG32_FIELD(RLC_SRM_CNTL, SRM_ENABLE, 1);
4101 }
4102
4103 static void gfx_v8_0_init_power_gating(struct amdgpu_device *adev)
4104 {
4105         uint32_t data;
4106
4107         WREG32_FIELD(CP_RB_WPTR_POLL_CNTL, IDLE_POLL_COUNT, 0x60);
4108
4109         data = REG_SET_FIELD(0, RLC_PG_DELAY, POWER_UP_DELAY, 0x10);
4110         data = REG_SET_FIELD(data, RLC_PG_DELAY, POWER_DOWN_DELAY, 0x10);
4111         data = REG_SET_FIELD(data, RLC_PG_DELAY, CMD_PROPAGATE_DELAY, 0x10);
4112         data = REG_SET_FIELD(data, RLC_PG_DELAY, MEM_SLEEP_DELAY, 0x10);
4113         WREG32(mmRLC_PG_DELAY, data);
4114
4115         WREG32_FIELD(RLC_PG_DELAY_2, SERDES_CMD_DELAY, 0x3);
4116         WREG32_FIELD(RLC_AUTO_PG_CTRL, GRBM_REG_SAVE_GFX_IDLE_THRESHOLD, 0x55f0);
4117
4118 }
4119
4120 static void cz_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
4121                                                 bool enable)
4122 {
4123         WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PU_ENABLE, enable ? 1 : 0);
4124 }
4125
4126 static void cz_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
4127                                                   bool enable)
4128 {
4129         WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PD_ENABLE, enable ? 1 : 0);
4130 }
4131
4132 static void cz_enable_cp_power_gating(struct amdgpu_device *adev, bool enable)
4133 {
4134         WREG32_FIELD(RLC_PG_CNTL, CP_PG_DISABLE, enable ? 0 : 1);
4135 }
4136
4137 static void gfx_v8_0_init_pg(struct amdgpu_device *adev)
4138 {
4139         if ((adev->asic_type == CHIP_CARRIZO) ||
4140             (adev->asic_type == CHIP_STONEY)) {
4141                 gfx_v8_0_init_csb(adev);
4142                 gfx_v8_0_init_save_restore_list(adev);
4143                 gfx_v8_0_enable_save_restore_machine(adev);
4144                 WREG32(mmRLC_JUMP_TABLE_RESTORE, adev->gfx.rlc.cp_table_gpu_addr >> 8);
4145                 gfx_v8_0_init_power_gating(adev);
4146                 WREG32(mmRLC_PG_ALWAYS_ON_CU_MASK, adev->gfx.cu_info.ao_cu_mask);
4147         } else if ((adev->asic_type == CHIP_POLARIS11) ||
4148                    (adev->asic_type == CHIP_POLARIS12) ||
4149                    (adev->asic_type == CHIP_VEGAM)) {
4150                 gfx_v8_0_init_csb(adev);
4151                 gfx_v8_0_init_save_restore_list(adev);
4152                 gfx_v8_0_enable_save_restore_machine(adev);
4153                 gfx_v8_0_init_power_gating(adev);
4154         }
4155
4156 }
4157
4158 static void gfx_v8_0_rlc_stop(struct amdgpu_device *adev)
4159 {
4160         WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 0);
4161
4162         gfx_v8_0_enable_gui_idle_interrupt(adev, false);
4163         gfx_v8_0_wait_for_rlc_serdes(adev);
4164 }
4165
4166 static void gfx_v8_0_rlc_reset(struct amdgpu_device *adev)
4167 {
4168         WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
4169         udelay(50);
4170
4171         WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
4172         udelay(50);
4173 }
4174
4175 static void gfx_v8_0_rlc_start(struct amdgpu_device *adev)
4176 {
4177         WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 1);
4178
4179         /* carrizo do enable cp interrupt after cp inited */
4180         if (!(adev->flags & AMD_IS_APU))
4181                 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
4182
4183         udelay(50);
4184 }
4185
4186 static int gfx_v8_0_rlc_load_microcode(struct amdgpu_device *adev)
4187 {
4188         const struct rlc_firmware_header_v2_0 *hdr;
4189         const __le32 *fw_data;
4190         unsigned i, fw_size;
4191
4192         if (!adev->gfx.rlc_fw)
4193                 return -EINVAL;
4194
4195         hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
4196         amdgpu_ucode_print_rlc_hdr(&hdr->header);
4197
4198         fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
4199                            le32_to_cpu(hdr->header.ucode_array_offset_bytes));
4200         fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
4201
4202         WREG32(mmRLC_GPM_UCODE_ADDR, 0);
4203         for (i = 0; i < fw_size; i++)
4204                 WREG32(mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
4205         WREG32(mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
4206
4207         return 0;
4208 }
4209
4210 static int gfx_v8_0_rlc_resume(struct amdgpu_device *adev)
4211 {
4212         int r;
4213         u32 tmp;
4214
4215         gfx_v8_0_rlc_stop(adev);
4216
4217         /* disable CG */
4218         tmp = RREG32(mmRLC_CGCG_CGLS_CTRL);
4219         tmp &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
4220                  RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
4221         WREG32(mmRLC_CGCG_CGLS_CTRL, tmp);
4222         if (adev->asic_type == CHIP_POLARIS11 ||
4223             adev->asic_type == CHIP_POLARIS10 ||
4224             adev->asic_type == CHIP_POLARIS12 ||
4225             adev->asic_type == CHIP_VEGAM) {
4226                 tmp = RREG32(mmRLC_CGCG_CGLS_CTRL_3D);
4227                 tmp &= ~0x3;
4228                 WREG32(mmRLC_CGCG_CGLS_CTRL_3D, tmp);
4229         }
4230
4231         /* disable PG */
4232         WREG32(mmRLC_PG_CNTL, 0);
4233
4234         gfx_v8_0_rlc_reset(adev);
4235         gfx_v8_0_init_pg(adev);
4236
4237
4238         if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
4239                 /* legacy rlc firmware loading */
4240                 r = gfx_v8_0_rlc_load_microcode(adev);
4241                 if (r)
4242                         return r;
4243         }
4244
4245         gfx_v8_0_rlc_start(adev);
4246
4247         return 0;
4248 }
4249
4250 static void gfx_v8_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
4251 {
4252         int i;
4253         u32 tmp = RREG32(mmCP_ME_CNTL);
4254
4255         if (enable) {
4256                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 0);
4257                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 0);
4258                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 0);
4259         } else {
4260                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 1);
4261                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 1);
4262                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 1);
4263                 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
4264                         adev->gfx.gfx_ring[i].ready = false;
4265         }
4266         WREG32(mmCP_ME_CNTL, tmp);
4267         udelay(50);
4268 }
4269
4270 static int gfx_v8_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
4271 {
4272         const struct gfx_firmware_header_v1_0 *pfp_hdr;
4273         const struct gfx_firmware_header_v1_0 *ce_hdr;
4274         const struct gfx_firmware_header_v1_0 *me_hdr;
4275         const __le32 *fw_data;
4276         unsigned i, fw_size;
4277
4278         if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
4279                 return -EINVAL;
4280
4281         pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
4282                 adev->gfx.pfp_fw->data;
4283         ce_hdr = (const struct gfx_firmware_header_v1_0 *)
4284                 adev->gfx.ce_fw->data;
4285         me_hdr = (const struct gfx_firmware_header_v1_0 *)
4286                 adev->gfx.me_fw->data;
4287
4288         amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
4289         amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
4290         amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
4291
4292         gfx_v8_0_cp_gfx_enable(adev, false);
4293
4294         /* PFP */
4295         fw_data = (const __le32 *)
4296                 (adev->gfx.pfp_fw->data +
4297                  le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
4298         fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
4299         WREG32(mmCP_PFP_UCODE_ADDR, 0);
4300         for (i = 0; i < fw_size; i++)
4301                 WREG32(mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
4302         WREG32(mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
4303
4304         /* CE */
4305         fw_data = (const __le32 *)
4306                 (adev->gfx.ce_fw->data +
4307                  le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
4308         fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
4309         WREG32(mmCP_CE_UCODE_ADDR, 0);
4310         for (i = 0; i < fw_size; i++)
4311                 WREG32(mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
4312         WREG32(mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
4313
4314         /* ME */
4315         fw_data = (const __le32 *)
4316                 (adev->gfx.me_fw->data +
4317                  le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
4318         fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
4319         WREG32(mmCP_ME_RAM_WADDR, 0);
4320         for (i = 0; i < fw_size; i++)
4321                 WREG32(mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
4322         WREG32(mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version);
4323
4324         return 0;
4325 }
4326
4327 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev)
4328 {
4329         u32 count = 0;
4330         const struct cs_section_def *sect = NULL;
4331         const struct cs_extent_def *ext = NULL;
4332
4333         /* begin clear state */
4334         count += 2;
4335         /* context control state */
4336         count += 3;
4337
4338         for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4339                 for (ext = sect->section; ext->extent != NULL; ++ext) {
4340                         if (sect->id == SECT_CONTEXT)
4341                                 count += 2 + ext->reg_count;
4342                         else
4343                                 return 0;
4344                 }
4345         }
4346         /* pa_sc_raster_config/pa_sc_raster_config1 */
4347         count += 4;
4348         /* end clear state */
4349         count += 2;
4350         /* clear state */
4351         count += 2;
4352
4353         return count;
4354 }
4355
4356 static int gfx_v8_0_cp_gfx_start(struct amdgpu_device *adev)
4357 {
4358         struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
4359         const struct cs_section_def *sect = NULL;
4360         const struct cs_extent_def *ext = NULL;
4361         int r, i;
4362
4363         /* init the CP */
4364         WREG32(mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
4365         WREG32(mmCP_ENDIAN_SWAP, 0);
4366         WREG32(mmCP_DEVICE_ID, 1);
4367
4368         gfx_v8_0_cp_gfx_enable(adev, true);
4369
4370         r = amdgpu_ring_alloc(ring, gfx_v8_0_get_csb_size(adev) + 4);
4371         if (r) {
4372                 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
4373                 return r;
4374         }
4375
4376         /* clear state buffer */
4377         amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4378         amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
4379
4380         amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4381         amdgpu_ring_write(ring, 0x80000000);
4382         amdgpu_ring_write(ring, 0x80000000);
4383
4384         for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4385                 for (ext = sect->section; ext->extent != NULL; ++ext) {
4386                         if (sect->id == SECT_CONTEXT) {
4387                                 amdgpu_ring_write(ring,
4388                                        PACKET3(PACKET3_SET_CONTEXT_REG,
4389                                                ext->reg_count));
4390                                 amdgpu_ring_write(ring,
4391                                        ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
4392                                 for (i = 0; i < ext->reg_count; i++)
4393                                         amdgpu_ring_write(ring, ext->extent[i]);
4394                         }
4395                 }
4396         }
4397
4398         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
4399         amdgpu_ring_write(ring, mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
4400         amdgpu_ring_write(ring, adev->gfx.config.rb_config[0][0].raster_config);
4401         amdgpu_ring_write(ring, adev->gfx.config.rb_config[0][0].raster_config_1);
4402
4403         amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4404         amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
4405
4406         amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
4407         amdgpu_ring_write(ring, 0);
4408
4409         /* init the CE partitions */
4410         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
4411         amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
4412         amdgpu_ring_write(ring, 0x8000);
4413         amdgpu_ring_write(ring, 0x8000);
4414
4415         amdgpu_ring_commit(ring);
4416
4417         return 0;
4418 }
4419 static void gfx_v8_0_set_cpg_door_bell(struct amdgpu_device *adev, struct amdgpu_ring *ring)
4420 {
4421         u32 tmp;
4422         /* no gfx doorbells on iceland */
4423         if (adev->asic_type == CHIP_TOPAZ)
4424                 return;
4425
4426         tmp = RREG32(mmCP_RB_DOORBELL_CONTROL);
4427
4428         if (ring->use_doorbell) {
4429                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4430                                 DOORBELL_OFFSET, ring->doorbell_index);
4431                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4432                                                 DOORBELL_HIT, 0);
4433                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4434                                             DOORBELL_EN, 1);
4435         } else {
4436                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0);
4437         }
4438
4439         WREG32(mmCP_RB_DOORBELL_CONTROL, tmp);
4440
4441         if (adev->flags & AMD_IS_APU)
4442                 return;
4443
4444         tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
4445                                         DOORBELL_RANGE_LOWER,
4446                                         AMDGPU_DOORBELL_GFX_RING0);
4447         WREG32(mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
4448
4449         WREG32(mmCP_RB_DOORBELL_RANGE_UPPER,
4450                 CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
4451 }
4452
4453 static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev)
4454 {
4455         struct amdgpu_ring *ring;
4456         u32 tmp;
4457         u32 rb_bufsz;
4458         u64 rb_addr, rptr_addr, wptr_gpu_addr;
4459         int r;
4460
4461         /* Set the write pointer delay */
4462         WREG32(mmCP_RB_WPTR_DELAY, 0);
4463
4464         /* set the RB to use vmid 0 */
4465         WREG32(mmCP_RB_VMID, 0);
4466
4467         /* Set ring buffer size */
4468         ring = &adev->gfx.gfx_ring[0];
4469         rb_bufsz = order_base_2(ring->ring_size / 8);
4470         tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
4471         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
4472         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MTYPE, 3);
4473         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MIN_IB_AVAILSZ, 1);
4474 #ifdef __BIG_ENDIAN
4475         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
4476 #endif
4477         WREG32(mmCP_RB0_CNTL, tmp);
4478
4479         /* Initialize the ring buffer's read and write pointers */
4480         WREG32(mmCP_RB0_CNTL, tmp | CP_RB0_CNTL__RB_RPTR_WR_ENA_MASK);
4481         ring->wptr = 0;
4482         WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
4483
4484         /* set the wb address wether it's enabled or not */
4485         rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4486         WREG32(mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
4487         WREG32(mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 0xFF);
4488
4489         wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4490         WREG32(mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr));
4491         WREG32(mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr));
4492         mdelay(1);
4493         WREG32(mmCP_RB0_CNTL, tmp);
4494
4495         rb_addr = ring->gpu_addr >> 8;
4496         WREG32(mmCP_RB0_BASE, rb_addr);
4497         WREG32(mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
4498
4499         gfx_v8_0_set_cpg_door_bell(adev, ring);
4500         /* start the ring */
4501         amdgpu_ring_clear_ring(ring);
4502         gfx_v8_0_cp_gfx_start(adev);
4503         ring->ready = true;
4504         r = amdgpu_ring_test_ring(ring);
4505         if (r)
4506                 ring->ready = false;
4507
4508         return r;
4509 }
4510
4511 static void gfx_v8_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
4512 {
4513         int i;
4514
4515         if (enable) {
4516                 WREG32(mmCP_MEC_CNTL, 0);
4517         } else {
4518                 WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
4519                 for (i = 0; i < adev->gfx.num_compute_rings; i++)
4520                         adev->gfx.compute_ring[i].ready = false;
4521                 adev->gfx.kiq.ring.ready = false;
4522         }
4523         udelay(50);
4524 }
4525
4526 static int gfx_v8_0_cp_compute_load_microcode(struct amdgpu_device *adev)
4527 {
4528         const struct gfx_firmware_header_v1_0 *mec_hdr;
4529         const __le32 *fw_data;
4530         unsigned i, fw_size;
4531
4532         if (!adev->gfx.mec_fw)
4533                 return -EINVAL;
4534
4535         gfx_v8_0_cp_compute_enable(adev, false);
4536
4537         mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
4538         amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
4539
4540         fw_data = (const __le32 *)
4541                 (adev->gfx.mec_fw->data +
4542                  le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
4543         fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
4544
4545         /* MEC1 */
4546         WREG32(mmCP_MEC_ME1_UCODE_ADDR, 0);
4547         for (i = 0; i < fw_size; i++)
4548                 WREG32(mmCP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data+i));
4549         WREG32(mmCP_MEC_ME1_UCODE_ADDR, adev->gfx.mec_fw_version);
4550
4551         /* Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */
4552         if (adev->gfx.mec2_fw) {
4553                 const struct gfx_firmware_header_v1_0 *mec2_hdr;
4554
4555                 mec2_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
4556                 amdgpu_ucode_print_gfx_hdr(&mec2_hdr->header);
4557
4558                 fw_data = (const __le32 *)
4559                         (adev->gfx.mec2_fw->data +
4560                          le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes));
4561                 fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4;
4562
4563                 WREG32(mmCP_MEC_ME2_UCODE_ADDR, 0);
4564                 for (i = 0; i < fw_size; i++)
4565                         WREG32(mmCP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data+i));
4566                 WREG32(mmCP_MEC_ME2_UCODE_ADDR, adev->gfx.mec2_fw_version);
4567         }
4568
4569         return 0;
4570 }
4571
4572 /* KIQ functions */
4573 static void gfx_v8_0_kiq_setting(struct amdgpu_ring *ring)
4574 {
4575         uint32_t tmp;
4576         struct amdgpu_device *adev = ring->adev;
4577
4578         /* tell RLC which is KIQ queue */
4579         tmp = RREG32(mmRLC_CP_SCHEDULERS);
4580         tmp &= 0xffffff00;
4581         tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
4582         WREG32(mmRLC_CP_SCHEDULERS, tmp);
4583         tmp |= 0x80;
4584         WREG32(mmRLC_CP_SCHEDULERS, tmp);
4585 }
4586
4587 static int gfx_v8_0_kiq_kcq_enable(struct amdgpu_device *adev)
4588 {
4589         struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
4590         uint32_t scratch, tmp = 0;
4591         uint64_t queue_mask = 0;
4592         int r, i;
4593
4594         for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) {
4595                 if (!test_bit(i, adev->gfx.mec.queue_bitmap))
4596                         continue;
4597
4598                 /* This situation may be hit in the future if a new HW
4599                  * generation exposes more than 64 queues. If so, the
4600                  * definition of queue_mask needs updating */
4601                 if (WARN_ON(i >= (sizeof(queue_mask)*8))) {
4602                         DRM_ERROR("Invalid KCQ enabled: %d\n", i);
4603                         break;
4604                 }
4605
4606                 queue_mask |= (1ull << i);
4607         }
4608
4609         r = amdgpu_gfx_scratch_get(adev, &scratch);
4610         if (r) {
4611                 DRM_ERROR("Failed to get scratch reg (%d).\n", r);
4612                 return r;
4613         }
4614         WREG32(scratch, 0xCAFEDEAD);
4615
4616         r = amdgpu_ring_alloc(kiq_ring, (8 * adev->gfx.num_compute_rings) + 11);
4617         if (r) {
4618                 DRM_ERROR("Failed to lock KIQ (%d).\n", r);
4619                 amdgpu_gfx_scratch_free(adev, scratch);
4620                 return r;
4621         }
4622         /* set resources */
4623         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
4624         amdgpu_ring_write(kiq_ring, 0); /* vmid_mask:0 queue_type:0 (KIQ) */
4625         amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask)); /* queue mask lo */
4626         amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask)); /* queue mask hi */
4627         amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */
4628         amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */
4629         amdgpu_ring_write(kiq_ring, 0); /* oac mask */
4630         amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */
4631         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4632                 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4633                 uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
4634                 uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4635
4636                 /* map queues */
4637                 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
4638                 /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
4639                 amdgpu_ring_write(kiq_ring,
4640                                   PACKET3_MAP_QUEUES_NUM_QUEUES(1));
4641                 amdgpu_ring_write(kiq_ring,
4642                                   PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index) |
4643                                   PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
4644                                   PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
4645                                   PACKET3_MAP_QUEUES_ME(ring->me == 1 ? 0 : 1)); /* doorbell */
4646                 amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
4647                 amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
4648                 amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
4649                 amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
4650         }
4651         /* write to scratch for completion */
4652         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
4653         amdgpu_ring_write(kiq_ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
4654         amdgpu_ring_write(kiq_ring, 0xDEADBEEF);
4655         amdgpu_ring_commit(kiq_ring);
4656
4657         for (i = 0; i < adev->usec_timeout; i++) {
4658                 tmp = RREG32(scratch);
4659                 if (tmp == 0xDEADBEEF)
4660                         break;
4661                 DRM_UDELAY(1);
4662         }
4663         if (i >= adev->usec_timeout) {
4664                 DRM_ERROR("KCQ enable failed (scratch(0x%04X)=0x%08X)\n",
4665                           scratch, tmp);
4666                 r = -EINVAL;
4667         }
4668         amdgpu_gfx_scratch_free(adev, scratch);
4669
4670         return r;
4671 }
4672
4673 static int gfx_v8_0_deactivate_hqd(struct amdgpu_device *adev, u32 req)
4674 {
4675         int i, r = 0;
4676
4677         if (RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK) {
4678                 WREG32_FIELD(CP_HQD_DEQUEUE_REQUEST, DEQUEUE_REQ, req);
4679                 for (i = 0; i < adev->usec_timeout; i++) {
4680                         if (!(RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK))
4681                                 break;
4682                         udelay(1);
4683                 }
4684                 if (i == adev->usec_timeout)
4685                         r = -ETIMEDOUT;
4686         }
4687         WREG32(mmCP_HQD_DEQUEUE_REQUEST, 0);
4688         WREG32(mmCP_HQD_PQ_RPTR, 0);
4689         WREG32(mmCP_HQD_PQ_WPTR, 0);
4690
4691         return r;
4692 }
4693
4694 static int gfx_v8_0_mqd_init(struct amdgpu_ring *ring)
4695 {
4696         struct amdgpu_device *adev = ring->adev;
4697         struct vi_mqd *mqd = ring->mqd_ptr;
4698         uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
4699         uint32_t tmp;
4700
4701         mqd->header = 0xC0310800;
4702         mqd->compute_pipelinestat_enable = 0x00000001;
4703         mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
4704         mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
4705         mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
4706         mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
4707         mqd->compute_misc_reserved = 0x00000003;
4708         mqd->dynamic_cu_mask_addr_lo = lower_32_bits(ring->mqd_gpu_addr
4709                                                      + offsetof(struct vi_mqd_allocation, dynamic_cu_mask));
4710         mqd->dynamic_cu_mask_addr_hi = upper_32_bits(ring->mqd_gpu_addr
4711                                                      + offsetof(struct vi_mqd_allocation, dynamic_cu_mask));
4712         eop_base_addr = ring->eop_gpu_addr >> 8;
4713         mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
4714         mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
4715
4716         /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4717         tmp = RREG32(mmCP_HQD_EOP_CONTROL);
4718         tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
4719                         (order_base_2(GFX8_MEC_HPD_SIZE / 4) - 1));
4720
4721         mqd->cp_hqd_eop_control = tmp;
4722
4723         /* enable doorbell? */
4724         tmp = REG_SET_FIELD(RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL),
4725                             CP_HQD_PQ_DOORBELL_CONTROL,
4726                             DOORBELL_EN,
4727                             ring->use_doorbell ? 1 : 0);
4728
4729         mqd->cp_hqd_pq_doorbell_control = tmp;
4730
4731         /* set the pointer to the MQD */
4732         mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
4733         mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
4734
4735         /* set MQD vmid to 0 */
4736         tmp = RREG32(mmCP_MQD_CONTROL);
4737         tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
4738         mqd->cp_mqd_control = tmp;
4739
4740         /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4741         hqd_gpu_addr = ring->gpu_addr >> 8;
4742         mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
4743         mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4744
4745         /* set up the HQD, this is similar to CP_RB0_CNTL */
4746         tmp = RREG32(mmCP_HQD_PQ_CONTROL);
4747         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
4748                             (order_base_2(ring->ring_size / 4) - 1));
4749         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
4750                         ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
4751 #ifdef __BIG_ENDIAN
4752         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
4753 #endif
4754         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
4755         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
4756         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
4757         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
4758         mqd->cp_hqd_pq_control = tmp;
4759
4760         /* set the wb address whether it's enabled or not */
4761         wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4762         mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
4763         mqd->cp_hqd_pq_rptr_report_addr_hi =
4764                 upper_32_bits(wb_gpu_addr) & 0xffff;
4765
4766         /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
4767         wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4768         mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
4769         mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4770
4771         tmp = 0;
4772         /* enable the doorbell if requested */
4773         if (ring->use_doorbell) {
4774                 tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
4775                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4776                                 DOORBELL_OFFSET, ring->doorbell_index);
4777
4778                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4779                                          DOORBELL_EN, 1);
4780                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4781                                          DOORBELL_SOURCE, 0);
4782                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4783                                          DOORBELL_HIT, 0);
4784         }
4785
4786         mqd->cp_hqd_pq_doorbell_control = tmp;
4787
4788         /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4789         ring->wptr = 0;
4790         mqd->cp_hqd_pq_wptr = ring->wptr;
4791         mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
4792
4793         /* set the vmid for the queue */
4794         mqd->cp_hqd_vmid = 0;
4795
4796         tmp = RREG32(mmCP_HQD_PERSISTENT_STATE);
4797         tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
4798         mqd->cp_hqd_persistent_state = tmp;
4799
4800         /* set MTYPE */
4801         tmp = RREG32(mmCP_HQD_IB_CONTROL);
4802         tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
4803         tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MTYPE, 3);
4804         mqd->cp_hqd_ib_control = tmp;
4805
4806         tmp = RREG32(mmCP_HQD_IQ_TIMER);
4807         tmp = REG_SET_FIELD(tmp, CP_HQD_IQ_TIMER, MTYPE, 3);
4808         mqd->cp_hqd_iq_timer = tmp;
4809
4810         tmp = RREG32(mmCP_HQD_CTX_SAVE_CONTROL);
4811         tmp = REG_SET_FIELD(tmp, CP_HQD_CTX_SAVE_CONTROL, MTYPE, 3);
4812         mqd->cp_hqd_ctx_save_control = tmp;
4813
4814         /* defaults */
4815         mqd->cp_hqd_eop_rptr = RREG32(mmCP_HQD_EOP_RPTR);
4816         mqd->cp_hqd_eop_wptr = RREG32(mmCP_HQD_EOP_WPTR);
4817         mqd->cp_hqd_pipe_priority = RREG32(mmCP_HQD_PIPE_PRIORITY);
4818         mqd->cp_hqd_queue_priority = RREG32(mmCP_HQD_QUEUE_PRIORITY);
4819         mqd->cp_hqd_quantum = RREG32(mmCP_HQD_QUANTUM);
4820         mqd->cp_hqd_ctx_save_base_addr_lo = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_LO);
4821         mqd->cp_hqd_ctx_save_base_addr_hi = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_HI);
4822         mqd->cp_hqd_cntl_stack_offset = RREG32(mmCP_HQD_CNTL_STACK_OFFSET);
4823         mqd->cp_hqd_cntl_stack_size = RREG32(mmCP_HQD_CNTL_STACK_SIZE);
4824         mqd->cp_hqd_wg_state_offset = RREG32(mmCP_HQD_WG_STATE_OFFSET);
4825         mqd->cp_hqd_ctx_save_size = RREG32(mmCP_HQD_CTX_SAVE_SIZE);
4826         mqd->cp_hqd_eop_done_events = RREG32(mmCP_HQD_EOP_EVENTS);
4827         mqd->cp_hqd_error = RREG32(mmCP_HQD_ERROR);
4828         mqd->cp_hqd_eop_wptr_mem = RREG32(mmCP_HQD_EOP_WPTR_MEM);
4829         mqd->cp_hqd_eop_dones = RREG32(mmCP_HQD_EOP_DONES);
4830
4831         /* activate the queue */
4832         mqd->cp_hqd_active = 1;
4833
4834         return 0;
4835 }
4836
4837 int gfx_v8_0_mqd_commit(struct amdgpu_device *adev,
4838                         struct vi_mqd *mqd)
4839 {
4840         uint32_t mqd_reg;
4841         uint32_t *mqd_data;
4842
4843         /* HQD registers extend from mmCP_MQD_BASE_ADDR to mmCP_HQD_ERROR */
4844         mqd_data = &mqd->cp_mqd_base_addr_lo;
4845
4846         /* disable wptr polling */
4847         WREG32_FIELD(CP_PQ_WPTR_POLL_CNTL, EN, 0);
4848
4849         /* program all HQD registers */
4850         for (mqd_reg = mmCP_HQD_VMID; mqd_reg <= mmCP_HQD_EOP_CONTROL; mqd_reg++)
4851                 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4852
4853         /* Tonga errata: EOP RPTR/WPTR should be left unmodified.
4854          * This is safe since EOP RPTR==WPTR for any inactive HQD
4855          * on ASICs that do not support context-save.
4856          * EOP writes/reads can start anywhere in the ring.
4857          */
4858         if (adev->asic_type != CHIP_TONGA) {
4859                 WREG32(mmCP_HQD_EOP_RPTR, mqd->cp_hqd_eop_rptr);
4860                 WREG32(mmCP_HQD_EOP_WPTR, mqd->cp_hqd_eop_wptr);
4861                 WREG32(mmCP_HQD_EOP_WPTR_MEM, mqd->cp_hqd_eop_wptr_mem);
4862         }
4863
4864         for (mqd_reg = mmCP_HQD_EOP_EVENTS; mqd_reg <= mmCP_HQD_ERROR; mqd_reg++)
4865                 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4866
4867         /* activate the HQD */
4868         for (mqd_reg = mmCP_MQD_BASE_ADDR; mqd_reg <= mmCP_HQD_ACTIVE; mqd_reg++)
4869                 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4870
4871         return 0;
4872 }
4873
4874 static int gfx_v8_0_kiq_init_queue(struct amdgpu_ring *ring)
4875 {
4876         struct amdgpu_device *adev = ring->adev;
4877         struct vi_mqd *mqd = ring->mqd_ptr;
4878         int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS;
4879
4880         gfx_v8_0_kiq_setting(ring);
4881
4882         if (adev->in_gpu_reset) { /* for GPU_RESET case */
4883                 /* reset MQD to a clean status */
4884                 if (adev->gfx.mec.mqd_backup[mqd_idx])
4885                         memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation));
4886
4887                 /* reset ring buffer */
4888                 ring->wptr = 0;
4889                 amdgpu_ring_clear_ring(ring);
4890                 mutex_lock(&adev->srbm_mutex);
4891                 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4892                 gfx_v8_0_mqd_commit(adev, mqd);
4893                 vi_srbm_select(adev, 0, 0, 0, 0);
4894                 mutex_unlock(&adev->srbm_mutex);
4895         } else {
4896                 memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation));
4897                 ((struct vi_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
4898                 ((struct vi_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
4899                 mutex_lock(&adev->srbm_mutex);
4900                 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4901                 gfx_v8_0_mqd_init(ring);
4902                 gfx_v8_0_mqd_commit(adev, mqd);
4903                 vi_srbm_select(adev, 0, 0, 0, 0);
4904                 mutex_unlock(&adev->srbm_mutex);
4905
4906                 if (adev->gfx.mec.mqd_backup[mqd_idx])
4907                         memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation));
4908         }
4909
4910         return 0;
4911 }
4912
4913 static int gfx_v8_0_kcq_init_queue(struct amdgpu_ring *ring)
4914 {
4915         struct amdgpu_device *adev = ring->adev;
4916         struct vi_mqd *mqd = ring->mqd_ptr;
4917         int mqd_idx = ring - &adev->gfx.compute_ring[0];
4918
4919         if (!adev->in_gpu_reset && !adev->gfx.in_suspend) {
4920                 memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation));
4921                 ((struct vi_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
4922                 ((struct vi_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
4923                 mutex_lock(&adev->srbm_mutex);
4924                 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4925                 gfx_v8_0_mqd_init(ring);
4926                 vi_srbm_select(adev, 0, 0, 0, 0);
4927                 mutex_unlock(&adev->srbm_mutex);
4928
4929                 if (adev->gfx.mec.mqd_backup[mqd_idx])
4930                         memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation));
4931         } else if (adev->in_gpu_reset) { /* for GPU_RESET case */
4932                 /* reset MQD to a clean status */
4933                 if (adev->gfx.mec.mqd_backup[mqd_idx])
4934                         memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation));
4935                 /* reset ring buffer */
4936                 ring->wptr = 0;
4937                 amdgpu_ring_clear_ring(ring);
4938         } else {
4939                 amdgpu_ring_clear_ring(ring);
4940         }
4941         return 0;
4942 }
4943
4944 static void gfx_v8_0_set_mec_doorbell_range(struct amdgpu_device *adev)
4945 {
4946         if (adev->asic_type > CHIP_TONGA) {
4947                 WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER, AMDGPU_DOORBELL_KIQ << 2);
4948                 WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER, AMDGPU_DOORBELL_MEC_RING7 << 2);
4949         }
4950         /* enable doorbells */
4951         WREG32_FIELD(CP_PQ_STATUS, DOORBELL_ENABLE, 1);
4952 }
4953
4954 static int gfx_v8_0_kiq_resume(struct amdgpu_device *adev)
4955 {
4956         struct amdgpu_ring *ring = NULL;
4957         int r = 0, i;
4958
4959         gfx_v8_0_cp_compute_enable(adev, true);
4960
4961         ring = &adev->gfx.kiq.ring;
4962
4963         r = amdgpu_bo_reserve(ring->mqd_obj, false);
4964         if (unlikely(r != 0))
4965                 goto done;
4966
4967         r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr);
4968         if (!r) {
4969                 r = gfx_v8_0_kiq_init_queue(ring);
4970                 amdgpu_bo_kunmap(ring->mqd_obj);
4971                 ring->mqd_ptr = NULL;
4972         }
4973         amdgpu_bo_unreserve(ring->mqd_obj);
4974         if (r)
4975                 goto done;
4976
4977         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4978                 ring = &adev->gfx.compute_ring[i];
4979
4980                 r = amdgpu_bo_reserve(ring->mqd_obj, false);
4981                 if (unlikely(r != 0))
4982                         goto done;
4983                 r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr);
4984                 if (!r) {
4985                         r = gfx_v8_0_kcq_init_queue(ring);
4986                         amdgpu_bo_kunmap(ring->mqd_obj);
4987                         ring->mqd_ptr = NULL;
4988                 }
4989                 amdgpu_bo_unreserve(ring->mqd_obj);
4990                 if (r)
4991                         goto done;
4992         }
4993
4994         gfx_v8_0_set_mec_doorbell_range(adev);
4995
4996         r = gfx_v8_0_kiq_kcq_enable(adev);
4997         if (r)
4998                 goto done;
4999
5000         /* Test KIQ */
5001         ring = &adev->gfx.kiq.ring;
5002         ring->ready = true;
5003         r = amdgpu_ring_test_ring(ring);
5004         if (r) {
5005                 ring->ready = false;
5006                 goto done;
5007         }
5008
5009         /* Test KCQs */
5010         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5011                 ring = &adev->gfx.compute_ring[i];
5012                 ring->ready = true;
5013                 r = amdgpu_ring_test_ring(ring);
5014                 if (r)
5015                         ring->ready = false;
5016         }
5017
5018 done:
5019         return r;
5020 }
5021
5022 static int gfx_v8_0_cp_resume(struct amdgpu_device *adev)
5023 {
5024         int r;
5025
5026         if (!(adev->flags & AMD_IS_APU))
5027                 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
5028
5029         if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
5030                         /* legacy firmware loading */
5031                 r = gfx_v8_0_cp_gfx_load_microcode(adev);
5032                 if (r)
5033                         return r;
5034
5035                 r = gfx_v8_0_cp_compute_load_microcode(adev);
5036                 if (r)
5037                         return r;
5038         }
5039
5040         r = gfx_v8_0_cp_gfx_resume(adev);
5041         if (r)
5042                 return r;
5043
5044         r = gfx_v8_0_kiq_resume(adev);
5045         if (r)
5046                 return r;
5047
5048         gfx_v8_0_enable_gui_idle_interrupt(adev, true);
5049
5050         return 0;
5051 }
5052
5053 static void gfx_v8_0_cp_enable(struct amdgpu_device *adev, bool enable)
5054 {
5055         gfx_v8_0_cp_gfx_enable(adev, enable);
5056         gfx_v8_0_cp_compute_enable(adev, enable);
5057 }
5058
5059 static int gfx_v8_0_hw_init(void *handle)
5060 {
5061         int r;
5062         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5063
5064         gfx_v8_0_init_golden_registers(adev);
5065         gfx_v8_0_gpu_init(adev);
5066
5067         r = gfx_v8_0_rlc_resume(adev);
5068         if (r)
5069                 return r;
5070
5071         r = gfx_v8_0_cp_resume(adev);
5072
5073         return r;
5074 }
5075
5076 static int gfx_v8_0_kcq_disable(struct amdgpu_ring *kiq_ring,struct amdgpu_ring *ring)
5077 {
5078         struct amdgpu_device *adev = kiq_ring->adev;
5079         uint32_t scratch, tmp = 0;
5080         int r, i;
5081
5082         r = amdgpu_gfx_scratch_get(adev, &scratch);
5083         if (r) {
5084                 DRM_ERROR("Failed to get scratch reg (%d).\n", r);
5085                 return r;
5086         }
5087         WREG32(scratch, 0xCAFEDEAD);
5088
5089         r = amdgpu_ring_alloc(kiq_ring, 10);
5090         if (r) {
5091                 DRM_ERROR("Failed to lock KIQ (%d).\n", r);
5092                 amdgpu_gfx_scratch_free(adev, scratch);
5093                 return r;
5094         }
5095
5096         /* unmap queues */
5097         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
5098         amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
5099                                                 PACKET3_UNMAP_QUEUES_ACTION(1) | /* RESET_QUEUES */
5100                                                 PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) |
5101                                                 PACKET3_UNMAP_QUEUES_ENGINE_SEL(0) |
5102                                                 PACKET3_UNMAP_QUEUES_NUM_QUEUES(1));
5103         amdgpu_ring_write(kiq_ring, PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
5104         amdgpu_ring_write(kiq_ring, 0);
5105         amdgpu_ring_write(kiq_ring, 0);
5106         amdgpu_ring_write(kiq_ring, 0);
5107         /* write to scratch for completion */
5108         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
5109         amdgpu_ring_write(kiq_ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
5110         amdgpu_ring_write(kiq_ring, 0xDEADBEEF);
5111         amdgpu_ring_commit(kiq_ring);
5112
5113         for (i = 0; i < adev->usec_timeout; i++) {
5114                 tmp = RREG32(scratch);
5115                 if (tmp == 0xDEADBEEF)
5116                         break;
5117                 DRM_UDELAY(1);
5118         }
5119         if (i >= adev->usec_timeout) {
5120                 DRM_ERROR("KCQ disabled failed (scratch(0x%04X)=0x%08X)\n", scratch, tmp);
5121                 r = -EINVAL;
5122         }
5123         amdgpu_gfx_scratch_free(adev, scratch);
5124         return r;
5125 }
5126
5127 static int gfx_v8_0_hw_fini(void *handle)
5128 {
5129         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5130         int i;
5131
5132         amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
5133         amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
5134
5135         amdgpu_irq_put(adev, &adev->gfx.cp_ecc_error_irq, 0);
5136
5137         amdgpu_irq_put(adev, &adev->gfx.sq_irq, 0);
5138
5139         /* disable KCQ to avoid CPC touch memory not valid anymore */
5140         for (i = 0; i < adev->gfx.num_compute_rings; i++)
5141                 gfx_v8_0_kcq_disable(&adev->gfx.kiq.ring, &adev->gfx.compute_ring[i]);
5142
5143         if (amdgpu_sriov_vf(adev)) {
5144                 pr_debug("For SRIOV client, shouldn't do anything.\n");
5145                 return 0;
5146         }
5147         gfx_v8_0_cp_enable(adev, false);
5148         gfx_v8_0_rlc_stop(adev);
5149
5150         amdgpu_device_ip_set_powergating_state(adev,
5151                                                AMD_IP_BLOCK_TYPE_GFX,
5152                                                AMD_PG_STATE_UNGATE);
5153
5154         return 0;
5155 }
5156
5157 static int gfx_v8_0_suspend(void *handle)
5158 {
5159         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5160         adev->gfx.in_suspend = true;
5161         return gfx_v8_0_hw_fini(adev);
5162 }
5163
5164 static int gfx_v8_0_resume(void *handle)
5165 {
5166         int r;
5167         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5168
5169         r = gfx_v8_0_hw_init(adev);
5170         adev->gfx.in_suspend = false;
5171         return r;
5172 }
5173
5174 static bool gfx_v8_0_is_idle(void *handle)
5175 {
5176         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5177
5178         if (REG_GET_FIELD(RREG32(mmGRBM_STATUS), GRBM_STATUS, GUI_ACTIVE))
5179                 return false;
5180         else
5181                 return true;
5182 }
5183
5184 static int gfx_v8_0_wait_for_idle(void *handle)
5185 {
5186         unsigned i;
5187         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5188
5189         for (i = 0; i < adev->usec_timeout; i++) {
5190                 if (gfx_v8_0_is_idle(handle))
5191                         return 0;
5192
5193                 udelay(1);
5194         }
5195         return -ETIMEDOUT;
5196 }
5197
5198 static bool gfx_v8_0_check_soft_reset(void *handle)
5199 {
5200         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5201         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5202         u32 tmp;
5203
5204         /* GRBM_STATUS */
5205         tmp = RREG32(mmGRBM_STATUS);
5206         if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
5207                    GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
5208                    GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
5209                    GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
5210                    GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
5211                    GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK |
5212                    GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
5213                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5214                                                 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
5215                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5216                                                 GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
5217                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5218                                                 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
5219         }
5220
5221         /* GRBM_STATUS2 */
5222         tmp = RREG32(mmGRBM_STATUS2);
5223         if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
5224                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5225                                                 GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
5226
5227         if (REG_GET_FIELD(tmp, GRBM_STATUS2, CPF_BUSY) ||
5228             REG_GET_FIELD(tmp, GRBM_STATUS2, CPC_BUSY) ||
5229             REG_GET_FIELD(tmp, GRBM_STATUS2, CPG_BUSY)) {
5230                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5231                                                 SOFT_RESET_CPF, 1);
5232                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5233                                                 SOFT_RESET_CPC, 1);
5234                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5235                                                 SOFT_RESET_CPG, 1);
5236                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET,
5237                                                 SOFT_RESET_GRBM, 1);
5238         }
5239
5240         /* SRBM_STATUS */
5241         tmp = RREG32(mmSRBM_STATUS);
5242         if (REG_GET_FIELD(tmp, SRBM_STATUS, GRBM_RQ_PENDING))
5243                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5244                                                 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
5245         if (REG_GET_FIELD(tmp, SRBM_STATUS, SEM_BUSY))
5246                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5247                                                 SRBM_SOFT_RESET, SOFT_RESET_SEM, 1);
5248
5249         if (grbm_soft_reset || srbm_soft_reset) {
5250                 adev->gfx.grbm_soft_reset = grbm_soft_reset;
5251                 adev->gfx.srbm_soft_reset = srbm_soft_reset;
5252                 return true;
5253         } else {
5254                 adev->gfx.grbm_soft_reset = 0;
5255                 adev->gfx.srbm_soft_reset = 0;
5256                 return false;
5257         }
5258 }
5259
5260 static int gfx_v8_0_pre_soft_reset(void *handle)
5261 {
5262         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5263         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5264
5265         if ((!adev->gfx.grbm_soft_reset) &&
5266             (!adev->gfx.srbm_soft_reset))
5267                 return 0;
5268
5269         grbm_soft_reset = adev->gfx.grbm_soft_reset;
5270         srbm_soft_reset = adev->gfx.srbm_soft_reset;
5271
5272         /* stop the rlc */
5273         gfx_v8_0_rlc_stop(adev);
5274
5275         if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5276             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
5277                 /* Disable GFX parsing/prefetching */
5278                 gfx_v8_0_cp_gfx_enable(adev, false);
5279
5280         if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5281             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
5282             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
5283             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
5284                 int i;
5285
5286                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5287                         struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5288
5289                         mutex_lock(&adev->srbm_mutex);
5290                         vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5291                         gfx_v8_0_deactivate_hqd(adev, 2);
5292                         vi_srbm_select(adev, 0, 0, 0, 0);
5293                         mutex_unlock(&adev->srbm_mutex);
5294                 }
5295                 /* Disable MEC parsing/prefetching */
5296                 gfx_v8_0_cp_compute_enable(adev, false);
5297         }
5298
5299        return 0;
5300 }
5301
5302 static int gfx_v8_0_soft_reset(void *handle)
5303 {
5304         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5305         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5306         u32 tmp;
5307
5308         if ((!adev->gfx.grbm_soft_reset) &&
5309             (!adev->gfx.srbm_soft_reset))
5310                 return 0;
5311
5312         grbm_soft_reset = adev->gfx.grbm_soft_reset;
5313         srbm_soft_reset = adev->gfx.srbm_soft_reset;
5314
5315         if (grbm_soft_reset || srbm_soft_reset) {
5316                 tmp = RREG32(mmGMCON_DEBUG);
5317                 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 1);
5318                 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 1);
5319                 WREG32(mmGMCON_DEBUG, tmp);
5320                 udelay(50);
5321         }
5322
5323         if (grbm_soft_reset) {
5324                 tmp = RREG32(mmGRBM_SOFT_RESET);
5325                 tmp |= grbm_soft_reset;
5326                 dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
5327                 WREG32(mmGRBM_SOFT_RESET, tmp);
5328                 tmp = RREG32(mmGRBM_SOFT_RESET);
5329
5330                 udelay(50);
5331
5332                 tmp &= ~grbm_soft_reset;
5333                 WREG32(mmGRBM_SOFT_RESET, tmp);
5334                 tmp = RREG32(mmGRBM_SOFT_RESET);
5335         }
5336
5337         if (srbm_soft_reset) {
5338                 tmp = RREG32(mmSRBM_SOFT_RESET);
5339                 tmp |= srbm_soft_reset;
5340                 dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
5341                 WREG32(mmSRBM_SOFT_RESET, tmp);
5342                 tmp = RREG32(mmSRBM_SOFT_RESET);
5343
5344                 udelay(50);
5345
5346                 tmp &= ~srbm_soft_reset;
5347                 WREG32(mmSRBM_SOFT_RESET, tmp);
5348                 tmp = RREG32(mmSRBM_SOFT_RESET);
5349         }
5350
5351         if (grbm_soft_reset || srbm_soft_reset) {
5352                 tmp = RREG32(mmGMCON_DEBUG);
5353                 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 0);
5354                 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 0);
5355                 WREG32(mmGMCON_DEBUG, tmp);
5356         }
5357
5358         /* Wait a little for things to settle down */
5359         udelay(50);
5360
5361         return 0;
5362 }
5363
5364 static int gfx_v8_0_post_soft_reset(void *handle)
5365 {
5366         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5367         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5368
5369         if ((!adev->gfx.grbm_soft_reset) &&
5370             (!adev->gfx.srbm_soft_reset))
5371                 return 0;
5372
5373         grbm_soft_reset = adev->gfx.grbm_soft_reset;
5374         srbm_soft_reset = adev->gfx.srbm_soft_reset;
5375
5376         if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5377             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
5378                 gfx_v8_0_cp_gfx_resume(adev);
5379
5380         if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5381             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
5382             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
5383             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
5384                 int i;
5385
5386                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5387                         struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5388
5389                         mutex_lock(&adev->srbm_mutex);
5390                         vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5391                         gfx_v8_0_deactivate_hqd(adev, 2);
5392                         vi_srbm_select(adev, 0, 0, 0, 0);
5393                         mutex_unlock(&adev->srbm_mutex);
5394                 }
5395                 gfx_v8_0_kiq_resume(adev);
5396         }
5397         gfx_v8_0_rlc_start(adev);
5398
5399         return 0;
5400 }
5401
5402 /**
5403  * gfx_v8_0_get_gpu_clock_counter - return GPU clock counter snapshot
5404  *
5405  * @adev: amdgpu_device pointer
5406  *
5407  * Fetches a GPU clock counter snapshot.
5408  * Returns the 64 bit clock counter snapshot.
5409  */
5410 static uint64_t gfx_v8_0_get_gpu_clock_counter(struct amdgpu_device *adev)
5411 {
5412         uint64_t clock;
5413
5414         mutex_lock(&adev->gfx.gpu_clock_mutex);
5415         WREG32(mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
5416         clock = (uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_LSB) |
5417                 ((uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
5418         mutex_unlock(&adev->gfx.gpu_clock_mutex);
5419         return clock;
5420 }
5421
5422 static void gfx_v8_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
5423                                           uint32_t vmid,
5424                                           uint32_t gds_base, uint32_t gds_size,
5425                                           uint32_t gws_base, uint32_t gws_size,
5426                                           uint32_t oa_base, uint32_t oa_size)
5427 {
5428         gds_base = gds_base >> AMDGPU_GDS_SHIFT;
5429         gds_size = gds_size >> AMDGPU_GDS_SHIFT;
5430
5431         gws_base = gws_base >> AMDGPU_GWS_SHIFT;
5432         gws_size = gws_size >> AMDGPU_GWS_SHIFT;
5433
5434         oa_base = oa_base >> AMDGPU_OA_SHIFT;
5435         oa_size = oa_size >> AMDGPU_OA_SHIFT;
5436
5437         /* GDS Base */
5438         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5439         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5440                                 WRITE_DATA_DST_SEL(0)));
5441         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_base);
5442         amdgpu_ring_write(ring, 0);
5443         amdgpu_ring_write(ring, gds_base);
5444
5445         /* GDS Size */
5446         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5447         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5448                                 WRITE_DATA_DST_SEL(0)));
5449         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_size);
5450         amdgpu_ring_write(ring, 0);
5451         amdgpu_ring_write(ring, gds_size);
5452
5453         /* GWS */
5454         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5455         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5456                                 WRITE_DATA_DST_SEL(0)));
5457         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].gws);
5458         amdgpu_ring_write(ring, 0);
5459         amdgpu_ring_write(ring, gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
5460
5461         /* OA */
5462         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5463         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5464                                 WRITE_DATA_DST_SEL(0)));
5465         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].oa);
5466         amdgpu_ring_write(ring, 0);
5467         amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << oa_base));
5468 }
5469
5470 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
5471 {
5472         WREG32(mmSQ_IND_INDEX,
5473                 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
5474                 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
5475                 (address << SQ_IND_INDEX__INDEX__SHIFT) |
5476                 (SQ_IND_INDEX__FORCE_READ_MASK));
5477         return RREG32(mmSQ_IND_DATA);
5478 }
5479
5480 static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
5481                            uint32_t wave, uint32_t thread,
5482                            uint32_t regno, uint32_t num, uint32_t *out)
5483 {
5484         WREG32(mmSQ_IND_INDEX,
5485                 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
5486                 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
5487                 (regno << SQ_IND_INDEX__INDEX__SHIFT) |
5488                 (thread << SQ_IND_INDEX__THREAD_ID__SHIFT) |
5489                 (SQ_IND_INDEX__FORCE_READ_MASK) |
5490                 (SQ_IND_INDEX__AUTO_INCR_MASK));
5491         while (num--)
5492                 *(out++) = RREG32(mmSQ_IND_DATA);
5493 }
5494
5495 static void gfx_v8_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
5496 {
5497         /* type 0 wave data */
5498         dst[(*no_fields)++] = 0;
5499         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS);
5500         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO);
5501         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI);
5502         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO);
5503         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI);
5504         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID);
5505         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0);
5506         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1);
5507         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC);
5508         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC);
5509         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS);
5510         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS);
5511         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_LO);
5512         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_HI);
5513         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_LO);
5514         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_HI);
5515         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0);
5516         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0);
5517 }
5518
5519 static void gfx_v8_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
5520                                      uint32_t wave, uint32_t start,
5521                                      uint32_t size, uint32_t *dst)
5522 {
5523         wave_read_regs(
5524                 adev, simd, wave, 0,
5525                 start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
5526 }
5527
5528
5529 static const struct amdgpu_gfx_funcs gfx_v8_0_gfx_funcs = {
5530         .get_gpu_clock_counter = &gfx_v8_0_get_gpu_clock_counter,
5531         .select_se_sh = &gfx_v8_0_select_se_sh,
5532         .read_wave_data = &gfx_v8_0_read_wave_data,
5533         .read_wave_sgprs = &gfx_v8_0_read_wave_sgprs,
5534         .select_me_pipe_q = &gfx_v8_0_select_me_pipe_q
5535 };
5536
5537 static int gfx_v8_0_early_init(void *handle)
5538 {
5539         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5540
5541         adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS;
5542         adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
5543         adev->gfx.funcs = &gfx_v8_0_gfx_funcs;
5544         gfx_v8_0_set_ring_funcs(adev);
5545         gfx_v8_0_set_irq_funcs(adev);
5546         gfx_v8_0_set_gds_init(adev);
5547         gfx_v8_0_set_rlc_funcs(adev);
5548
5549         return 0;
5550 }
5551
5552 static int gfx_v8_0_late_init(void *handle)
5553 {
5554         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5555         int r;
5556
5557         r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
5558         if (r)
5559                 return r;
5560
5561         r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
5562         if (r)
5563                 return r;
5564
5565         /* requires IBs so do in late init after IB pool is initialized */
5566         r = gfx_v8_0_do_edc_gpr_workarounds(adev);
5567         if (r)
5568                 return r;
5569
5570         r = amdgpu_irq_get(adev, &adev->gfx.cp_ecc_error_irq, 0);
5571         if (r) {
5572                 DRM_ERROR("amdgpu_irq_get() failed to get IRQ for EDC, r: %d.\n", r);
5573                 return r;
5574         }
5575
5576         r = amdgpu_irq_get(adev, &adev->gfx.sq_irq, 0);
5577         if (r) {
5578                 DRM_ERROR(
5579                         "amdgpu_irq_get() failed to get IRQ for SQ, r: %d.\n",
5580                         r);
5581                 return r;
5582         }
5583
5584         amdgpu_device_ip_set_powergating_state(adev,
5585                                                AMD_IP_BLOCK_TYPE_GFX,
5586                                                AMD_PG_STATE_GATE);
5587
5588         return 0;
5589 }
5590
5591 static void gfx_v8_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
5592                                                        bool enable)
5593 {
5594         if ((adev->asic_type == CHIP_POLARIS11) ||
5595             (adev->asic_type == CHIP_POLARIS12) ||
5596             (adev->asic_type == CHIP_VEGAM))
5597                 /* Send msg to SMU via Powerplay */
5598                 amdgpu_device_ip_set_powergating_state(adev,
5599                                                        AMD_IP_BLOCK_TYPE_SMC,
5600                                                        enable ?
5601                                                        AMD_PG_STATE_GATE : AMD_PG_STATE_UNGATE);
5602
5603         WREG32_FIELD(RLC_PG_CNTL, STATIC_PER_CU_PG_ENABLE, enable ? 1 : 0);
5604 }
5605
5606 static void gfx_v8_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
5607                                                         bool enable)
5608 {
5609         WREG32_FIELD(RLC_PG_CNTL, DYN_PER_CU_PG_ENABLE, enable ? 1 : 0);
5610 }
5611
5612 static void polaris11_enable_gfx_quick_mg_power_gating(struct amdgpu_device *adev,
5613                 bool enable)
5614 {
5615         WREG32_FIELD(RLC_PG_CNTL, QUICK_PG_ENABLE, enable ? 1 : 0);
5616 }
5617
5618 static void cz_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
5619                                           bool enable)
5620 {
5621         WREG32_FIELD(RLC_PG_CNTL, GFX_POWER_GATING_ENABLE, enable ? 1 : 0);
5622 }
5623
5624 static void cz_enable_gfx_pipeline_power_gating(struct amdgpu_device *adev,
5625                                                 bool enable)
5626 {
5627         WREG32_FIELD(RLC_PG_CNTL, GFX_PIPELINE_PG_ENABLE, enable ? 1 : 0);
5628
5629         /* Read any GFX register to wake up GFX. */
5630         if (!enable)
5631                 RREG32(mmDB_RENDER_CONTROL);
5632 }
5633
5634 static void cz_update_gfx_cg_power_gating(struct amdgpu_device *adev,
5635                                           bool enable)
5636 {
5637         if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
5638                 cz_enable_gfx_cg_power_gating(adev, true);
5639                 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
5640                         cz_enable_gfx_pipeline_power_gating(adev, true);
5641         } else {
5642                 cz_enable_gfx_cg_power_gating(adev, false);
5643                 cz_enable_gfx_pipeline_power_gating(adev, false);
5644         }
5645 }
5646
5647 static int gfx_v8_0_set_powergating_state(void *handle,
5648                                           enum amd_powergating_state state)
5649 {
5650         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5651         bool enable = (state == AMD_PG_STATE_GATE);
5652
5653         if (amdgpu_sriov_vf(adev))
5654                 return 0;
5655
5656         switch (adev->asic_type) {
5657         case CHIP_CARRIZO:
5658         case CHIP_STONEY:
5659
5660                 if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
5661                         cz_enable_sck_slow_down_on_power_up(adev, true);
5662                         cz_enable_sck_slow_down_on_power_down(adev, true);
5663                 } else {
5664                         cz_enable_sck_slow_down_on_power_up(adev, false);
5665                         cz_enable_sck_slow_down_on_power_down(adev, false);
5666                 }
5667                 if (adev->pg_flags & AMD_PG_SUPPORT_CP)
5668                         cz_enable_cp_power_gating(adev, true);
5669                 else
5670                         cz_enable_cp_power_gating(adev, false);
5671
5672                 cz_update_gfx_cg_power_gating(adev, enable);
5673
5674                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5675                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5676                 else
5677                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5678
5679                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5680                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5681                 else
5682                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5683                 break;
5684         case CHIP_POLARIS11:
5685         case CHIP_POLARIS12:
5686         case CHIP_VEGAM:
5687                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5688                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5689                 else
5690                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5691
5692                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5693                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5694                 else
5695                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5696
5697                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_QUICK_MG) && enable)
5698                         polaris11_enable_gfx_quick_mg_power_gating(adev, true);
5699                 else
5700                         polaris11_enable_gfx_quick_mg_power_gating(adev, false);
5701                 break;
5702         default:
5703                 break;
5704         }
5705
5706         return 0;
5707 }
5708
5709 static void gfx_v8_0_get_clockgating_state(void *handle, u32 *flags)
5710 {
5711         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5712         int data;
5713
5714         if (amdgpu_sriov_vf(adev))
5715                 *flags = 0;
5716
5717         /* AMD_CG_SUPPORT_GFX_MGCG */
5718         data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5719         if (!(data & RLC_CGTT_MGCG_OVERRIDE__CPF_MASK))
5720                 *flags |= AMD_CG_SUPPORT_GFX_MGCG;
5721
5722         /* AMD_CG_SUPPORT_GFX_CGLG */
5723         data = RREG32(mmRLC_CGCG_CGLS_CTRL);
5724         if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
5725                 *flags |= AMD_CG_SUPPORT_GFX_CGCG;
5726
5727         /* AMD_CG_SUPPORT_GFX_CGLS */
5728         if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
5729                 *flags |= AMD_CG_SUPPORT_GFX_CGLS;
5730
5731         /* AMD_CG_SUPPORT_GFX_CGTS */
5732         data = RREG32(mmCGTS_SM_CTRL_REG);
5733         if (!(data & CGTS_SM_CTRL_REG__OVERRIDE_MASK))
5734                 *flags |= AMD_CG_SUPPORT_GFX_CGTS;
5735
5736         /* AMD_CG_SUPPORT_GFX_CGTS_LS */
5737         if (!(data & CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK))
5738                 *flags |= AMD_CG_SUPPORT_GFX_CGTS_LS;
5739
5740         /* AMD_CG_SUPPORT_GFX_RLC_LS */
5741         data = RREG32(mmRLC_MEM_SLP_CNTL);
5742         if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK)
5743                 *flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS;
5744
5745         /* AMD_CG_SUPPORT_GFX_CP_LS */
5746         data = RREG32(mmCP_MEM_SLP_CNTL);
5747         if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK)
5748                 *flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS;
5749 }
5750
5751 static void gfx_v8_0_send_serdes_cmd(struct amdgpu_device *adev,
5752                                      uint32_t reg_addr, uint32_t cmd)
5753 {
5754         uint32_t data;
5755
5756         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
5757
5758         WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5759         WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5760
5761         data = RREG32(mmRLC_SERDES_WR_CTRL);
5762         if (adev->asic_type == CHIP_STONEY)
5763                 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5764                           RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5765                           RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5766                           RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5767                           RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5768                           RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5769                           RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5770                           RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5771                           RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5772         else
5773                 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5774                           RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5775                           RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5776                           RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5777                           RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5778                           RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5779                           RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5780                           RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5781                           RLC_SERDES_WR_CTRL__BPM_DATA_MASK |
5782                           RLC_SERDES_WR_CTRL__REG_ADDR_MASK |
5783                           RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5784         data |= (RLC_SERDES_WR_CTRL__RSVD_BPM_ADDR_MASK |
5785                  (cmd << RLC_SERDES_WR_CTRL__BPM_DATA__SHIFT) |
5786                  (reg_addr << RLC_SERDES_WR_CTRL__REG_ADDR__SHIFT) |
5787                  (0xff << RLC_SERDES_WR_CTRL__BPM_ADDR__SHIFT));
5788
5789         WREG32(mmRLC_SERDES_WR_CTRL, data);
5790 }
5791
5792 #define MSG_ENTER_RLC_SAFE_MODE     1
5793 #define MSG_EXIT_RLC_SAFE_MODE      0
5794 #define RLC_GPR_REG2__REQ_MASK 0x00000001
5795 #define RLC_GPR_REG2__REQ__SHIFT 0
5796 #define RLC_GPR_REG2__MESSAGE__SHIFT 0x00000001
5797 #define RLC_GPR_REG2__MESSAGE_MASK 0x0000001e
5798
5799 static void iceland_enter_rlc_safe_mode(struct amdgpu_device *adev)
5800 {
5801         u32 data;
5802         unsigned i;
5803
5804         data = RREG32(mmRLC_CNTL);
5805         if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK))
5806                 return;
5807
5808         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) {
5809                 data |= RLC_SAFE_MODE__CMD_MASK;
5810                 data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5811                 data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
5812                 WREG32(mmRLC_SAFE_MODE, data);
5813
5814                 for (i = 0; i < adev->usec_timeout; i++) {
5815                         if ((RREG32(mmRLC_GPM_STAT) &
5816                              (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5817                               RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) ==
5818                             (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5819                              RLC_GPM_STAT__GFX_POWER_STATUS_MASK))
5820                                 break;
5821                         udelay(1);
5822                 }
5823
5824                 for (i = 0; i < adev->usec_timeout; i++) {
5825                         if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
5826                                 break;
5827                         udelay(1);
5828                 }
5829                 adev->gfx.rlc.in_safe_mode = true;
5830         }
5831 }
5832
5833 static void iceland_exit_rlc_safe_mode(struct amdgpu_device *adev)
5834 {
5835         u32 data = 0;
5836         unsigned i;
5837
5838         data = RREG32(mmRLC_CNTL);
5839         if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK))
5840                 return;
5841
5842         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) {
5843                 if (adev->gfx.rlc.in_safe_mode) {
5844                         data |= RLC_SAFE_MODE__CMD_MASK;
5845                         data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5846                         WREG32(mmRLC_SAFE_MODE, data);
5847                         adev->gfx.rlc.in_safe_mode = false;
5848                 }
5849         }
5850
5851         for (i = 0; i < adev->usec_timeout; i++) {
5852                 if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
5853                         break;
5854                 udelay(1);
5855         }
5856 }
5857
5858 static const struct amdgpu_rlc_funcs iceland_rlc_funcs = {
5859         .enter_safe_mode = iceland_enter_rlc_safe_mode,
5860         .exit_safe_mode = iceland_exit_rlc_safe_mode
5861 };
5862
5863 static void gfx_v8_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
5864                                                       bool enable)
5865 {
5866         uint32_t temp, data;
5867
5868         adev->gfx.rlc.funcs->enter_safe_mode(adev);
5869
5870         /* It is disabled by HW by default */
5871         if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
5872                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5873                         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS)
5874                                 /* 1 - RLC memory Light sleep */
5875                                 WREG32_FIELD(RLC_MEM_SLP_CNTL, RLC_MEM_LS_EN, 1);
5876
5877                         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS)
5878                                 WREG32_FIELD(CP_MEM_SLP_CNTL, CP_MEM_LS_EN, 1);
5879                 }
5880
5881                 /* 3 - RLC_CGTT_MGCG_OVERRIDE */
5882                 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5883                 if (adev->flags & AMD_IS_APU)
5884                         data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5885                                   RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5886                                   RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK);
5887                 else
5888                         data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5889                                   RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5890                                   RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5891                                   RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5892
5893                 if (temp != data)
5894                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5895
5896                 /* 4 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5897                 gfx_v8_0_wait_for_rlc_serdes(adev);
5898
5899                 /* 5 - clear mgcg override */
5900                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5901
5902                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS) {
5903                         /* 6 - Enable CGTS(Tree Shade) MGCG /MGLS */
5904                         temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5905                         data &= ~(CGTS_SM_CTRL_REG__SM_MODE_MASK);
5906                         data |= (0x2 << CGTS_SM_CTRL_REG__SM_MODE__SHIFT);
5907                         data |= CGTS_SM_CTRL_REG__SM_MODE_ENABLE_MASK;
5908                         data &= ~CGTS_SM_CTRL_REG__OVERRIDE_MASK;
5909                         if ((adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) &&
5910                             (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS_LS))
5911                                 data &= ~CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK;
5912                         data |= CGTS_SM_CTRL_REG__ON_MONITOR_ADD_EN_MASK;
5913                         data |= (0x96 << CGTS_SM_CTRL_REG__ON_MONITOR_ADD__SHIFT);
5914                         if (temp != data)
5915                                 WREG32(mmCGTS_SM_CTRL_REG, data);
5916                 }
5917                 udelay(50);
5918
5919                 /* 7 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5920                 gfx_v8_0_wait_for_rlc_serdes(adev);
5921         } else {
5922                 /* 1 - MGCG_OVERRIDE[0] for CP and MGCG_OVERRIDE[1] for RLC */
5923                 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5924                 data |= (RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5925                                 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5926                                 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5927                                 RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5928                 if (temp != data)
5929                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5930
5931                 /* 2 - disable MGLS in RLC */
5932                 data = RREG32(mmRLC_MEM_SLP_CNTL);
5933                 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
5934                         data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
5935                         WREG32(mmRLC_MEM_SLP_CNTL, data);
5936                 }
5937
5938                 /* 3 - disable MGLS in CP */
5939                 data = RREG32(mmCP_MEM_SLP_CNTL);
5940                 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
5941                         data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
5942                         WREG32(mmCP_MEM_SLP_CNTL, data);
5943                 }
5944
5945                 /* 4 - Disable CGTS(Tree Shade) MGCG and MGLS */
5946                 temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5947                 data |= (CGTS_SM_CTRL_REG__OVERRIDE_MASK |
5948                                 CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK);
5949                 if (temp != data)
5950                         WREG32(mmCGTS_SM_CTRL_REG, data);
5951
5952                 /* 5 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5953                 gfx_v8_0_wait_for_rlc_serdes(adev);
5954
5955                 /* 6 - set mgcg override */
5956                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, SET_BPM_SERDES_CMD);
5957
5958                 udelay(50);
5959
5960                 /* 7- wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5961                 gfx_v8_0_wait_for_rlc_serdes(adev);
5962         }
5963
5964         adev->gfx.rlc.funcs->exit_safe_mode(adev);
5965 }
5966
5967 static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
5968                                                       bool enable)
5969 {
5970         uint32_t temp, temp1, data, data1;
5971
5972         temp = data = RREG32(mmRLC_CGCG_CGLS_CTRL);
5973
5974         adev->gfx.rlc.funcs->enter_safe_mode(adev);
5975
5976         if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
5977                 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5978                 data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK;
5979                 if (temp1 != data1)
5980                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5981
5982                 /* : wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5983                 gfx_v8_0_wait_for_rlc_serdes(adev);
5984
5985                 /* 2 - clear cgcg override */
5986                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5987
5988                 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5989                 gfx_v8_0_wait_for_rlc_serdes(adev);
5990
5991                 /* 3 - write cmd to set CGLS */
5992                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, SET_BPM_SERDES_CMD);
5993
5994                 /* 4 - enable cgcg */
5995                 data |= RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
5996
5997                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5998                         /* enable cgls*/
5999                         data |= RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
6000
6001                         temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
6002                         data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK;
6003
6004                         if (temp1 != data1)
6005                                 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
6006                 } else {
6007                         data &= ~RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
6008                 }
6009
6010                 if (temp != data)
6011                         WREG32(mmRLC_CGCG_CGLS_CTRL, data);
6012
6013                 /* 5 enable cntx_empty_int_enable/cntx_busy_int_enable/
6014                  * Cmp_busy/GFX_Idle interrupts
6015                  */
6016                 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
6017         } else {
6018                 /* disable cntx_empty_int_enable & GFX Idle interrupt */
6019                 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
6020
6021                 /* TEST CGCG */
6022                 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
6023                 data1 |= (RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK |
6024                                 RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK);
6025                 if (temp1 != data1)
6026                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
6027
6028                 /* read gfx register to wake up cgcg */
6029                 RREG32(mmCB_CGTT_SCLK_CTRL);
6030                 RREG32(mmCB_CGTT_SCLK_CTRL);
6031                 RREG32(mmCB_CGTT_SCLK_CTRL);
6032                 RREG32(mmCB_CGTT_SCLK_CTRL);
6033
6034                 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
6035                 gfx_v8_0_wait_for_rlc_serdes(adev);
6036
6037                 /* write cmd to Set CGCG Overrride */
6038                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, SET_BPM_SERDES_CMD);
6039
6040                 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
6041                 gfx_v8_0_wait_for_rlc_serdes(adev);
6042
6043                 /* write cmd to Clear CGLS */
6044                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, CLE_BPM_SERDES_CMD);
6045
6046                 /* disable cgcg, cgls should be disabled too. */
6047                 data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
6048                           RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
6049                 if (temp != data)
6050                         WREG32(mmRLC_CGCG_CGLS_CTRL, data);
6051                 /* enable interrupts again for PG */
6052                 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
6053         }
6054
6055         gfx_v8_0_wait_for_rlc_serdes(adev);
6056
6057         adev->gfx.rlc.funcs->exit_safe_mode(adev);
6058 }
6059 static int gfx_v8_0_update_gfx_clock_gating(struct amdgpu_device *adev,
6060                                             bool enable)
6061 {
6062         if (enable) {
6063                 /* CGCG/CGLS should be enabled after MGCG/MGLS/TS(CG/LS)
6064                  * ===  MGCG + MGLS + TS(CG/LS) ===
6065                  */
6066                 gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
6067                 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
6068         } else {
6069                 /* CGCG/CGLS should be disabled before MGCG/MGLS/TS(CG/LS)
6070                  * ===  CGCG + CGLS ===
6071                  */
6072                 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
6073                 gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
6074         }
6075         return 0;
6076 }
6077
6078 static int gfx_v8_0_tonga_update_gfx_clock_gating(struct amdgpu_device *adev,
6079                                           enum amd_clockgating_state state)
6080 {
6081         uint32_t msg_id, pp_state = 0;
6082         uint32_t pp_support_state = 0;
6083
6084         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) {
6085                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
6086                         pp_support_state = PP_STATE_SUPPORT_LS;
6087                         pp_state = PP_STATE_LS;
6088                 }
6089                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
6090                         pp_support_state |= PP_STATE_SUPPORT_CG;
6091                         pp_state |= PP_STATE_CG;
6092                 }
6093                 if (state == AMD_CG_STATE_UNGATE)
6094                         pp_state = 0;
6095
6096                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6097                                 PP_BLOCK_GFX_CG,
6098                                 pp_support_state,
6099                                 pp_state);
6100                 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
6101                         amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
6102         }
6103
6104         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) {
6105                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
6106                         pp_support_state = PP_STATE_SUPPORT_LS;
6107                         pp_state = PP_STATE_LS;
6108                 }
6109
6110                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
6111                         pp_support_state |= PP_STATE_SUPPORT_CG;
6112                         pp_state |= PP_STATE_CG;
6113                 }
6114
6115                 if (state == AMD_CG_STATE_UNGATE)
6116                         pp_state = 0;
6117
6118                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6119                                 PP_BLOCK_GFX_MG,
6120                                 pp_support_state,
6121                                 pp_state);
6122                 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
6123                         amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
6124         }
6125
6126         return 0;
6127 }
6128
6129 static int gfx_v8_0_polaris_update_gfx_clock_gating(struct amdgpu_device *adev,
6130                                           enum amd_clockgating_state state)
6131 {
6132
6133         uint32_t msg_id, pp_state = 0;
6134         uint32_t pp_support_state = 0;
6135
6136         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) {
6137                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
6138                         pp_support_state = PP_STATE_SUPPORT_LS;
6139                         pp_state = PP_STATE_LS;
6140                 }
6141                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
6142                         pp_support_state |= PP_STATE_SUPPORT_CG;
6143                         pp_state |= PP_STATE_CG;
6144                 }
6145                 if (state == AMD_CG_STATE_UNGATE)
6146                         pp_state = 0;
6147
6148                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6149                                 PP_BLOCK_GFX_CG,
6150                                 pp_support_state,
6151                                 pp_state);
6152                 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
6153                         amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
6154         }
6155
6156         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_3D_CGCG | AMD_CG_SUPPORT_GFX_3D_CGLS)) {
6157                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) {
6158                         pp_support_state = PP_STATE_SUPPORT_LS;
6159                         pp_state = PP_STATE_LS;
6160                 }
6161                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG) {
6162                         pp_support_state |= PP_STATE_SUPPORT_CG;
6163                         pp_state |= PP_STATE_CG;
6164                 }
6165                 if (state == AMD_CG_STATE_UNGATE)
6166                         pp_state = 0;
6167
6168                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6169                                 PP_BLOCK_GFX_3D,
6170                                 pp_support_state,
6171                                 pp_state);
6172                 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
6173                         amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
6174         }
6175
6176         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) {
6177                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
6178                         pp_support_state = PP_STATE_SUPPORT_LS;
6179                         pp_state = PP_STATE_LS;
6180                 }
6181
6182                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
6183                         pp_support_state |= PP_STATE_SUPPORT_CG;
6184                         pp_state |= PP_STATE_CG;
6185                 }
6186
6187                 if (state == AMD_CG_STATE_UNGATE)
6188                         pp_state = 0;
6189
6190                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6191                                 PP_BLOCK_GFX_MG,
6192                                 pp_support_state,
6193                                 pp_state);
6194                 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
6195                         amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
6196         }
6197
6198         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
6199                 pp_support_state = PP_STATE_SUPPORT_LS;
6200
6201                 if (state == AMD_CG_STATE_UNGATE)
6202                         pp_state = 0;
6203                 else
6204                         pp_state = PP_STATE_LS;
6205
6206                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6207                                 PP_BLOCK_GFX_RLC,
6208                                 pp_support_state,
6209                                 pp_state);
6210                 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
6211                         amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
6212         }
6213
6214         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
6215                 pp_support_state = PP_STATE_SUPPORT_LS;
6216
6217                 if (state == AMD_CG_STATE_UNGATE)
6218                         pp_state = 0;
6219                 else
6220                         pp_state = PP_STATE_LS;
6221                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6222                         PP_BLOCK_GFX_CP,
6223                         pp_support_state,
6224                         pp_state);
6225                 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
6226                         amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
6227         }
6228
6229         return 0;
6230 }
6231
6232 static int gfx_v8_0_set_clockgating_state(void *handle,
6233                                           enum amd_clockgating_state state)
6234 {
6235         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
6236
6237         if (amdgpu_sriov_vf(adev))
6238                 return 0;
6239
6240         switch (adev->asic_type) {
6241         case CHIP_FIJI:
6242         case CHIP_CARRIZO:
6243         case CHIP_STONEY:
6244                 gfx_v8_0_update_gfx_clock_gating(adev,
6245                                                  state == AMD_CG_STATE_GATE);
6246                 break;
6247         case CHIP_TONGA:
6248                 gfx_v8_0_tonga_update_gfx_clock_gating(adev, state);
6249                 break;
6250         case CHIP_POLARIS10:
6251         case CHIP_POLARIS11:
6252         case CHIP_POLARIS12:
6253         case CHIP_VEGAM:
6254                 gfx_v8_0_polaris_update_gfx_clock_gating(adev, state);
6255                 break;
6256         default:
6257                 break;
6258         }
6259         return 0;
6260 }
6261
6262 static u64 gfx_v8_0_ring_get_rptr(struct amdgpu_ring *ring)
6263 {
6264         return ring->adev->wb.wb[ring->rptr_offs];
6265 }
6266
6267 static u64 gfx_v8_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
6268 {
6269         struct amdgpu_device *adev = ring->adev;
6270
6271         if (ring->use_doorbell)
6272                 /* XXX check if swapping is necessary on BE */
6273                 return ring->adev->wb.wb[ring->wptr_offs];
6274         else
6275                 return RREG32(mmCP_RB0_WPTR);
6276 }
6277
6278 static void gfx_v8_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
6279 {
6280         struct amdgpu_device *adev = ring->adev;
6281
6282         if (ring->use_doorbell) {
6283                 /* XXX check if swapping is necessary on BE */
6284                 adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
6285                 WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
6286         } else {
6287                 WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
6288                 (void)RREG32(mmCP_RB0_WPTR);
6289         }
6290 }
6291
6292 static void gfx_v8_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
6293 {
6294         u32 ref_and_mask, reg_mem_engine;
6295
6296         if ((ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) ||
6297             (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)) {
6298                 switch (ring->me) {
6299                 case 1:
6300                         ref_and_mask = GPU_HDP_FLUSH_DONE__CP2_MASK << ring->pipe;
6301                         break;
6302                 case 2:
6303                         ref_and_mask = GPU_HDP_FLUSH_DONE__CP6_MASK << ring->pipe;
6304                         break;
6305                 default:
6306                         return;
6307                 }
6308                 reg_mem_engine = 0;
6309         } else {
6310                 ref_and_mask = GPU_HDP_FLUSH_DONE__CP0_MASK;
6311                 reg_mem_engine = WAIT_REG_MEM_ENGINE(1); /* pfp */
6312         }
6313
6314         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6315         amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
6316                                  WAIT_REG_MEM_FUNCTION(3) |  /* == */
6317                                  reg_mem_engine));
6318         amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_REQ);
6319         amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_DONE);
6320         amdgpu_ring_write(ring, ref_and_mask);
6321         amdgpu_ring_write(ring, ref_and_mask);
6322         amdgpu_ring_write(ring, 0x20); /* poll interval */
6323 }
6324
6325 static void gfx_v8_0_ring_emit_vgt_flush(struct amdgpu_ring *ring)
6326 {
6327         amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
6328         amdgpu_ring_write(ring, EVENT_TYPE(VS_PARTIAL_FLUSH) |
6329                 EVENT_INDEX(4));
6330
6331         amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
6332         amdgpu_ring_write(ring, EVENT_TYPE(VGT_FLUSH) |
6333                 EVENT_INDEX(0));
6334 }
6335
6336 static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
6337                                       struct amdgpu_ib *ib,
6338                                       unsigned vmid, bool ctx_switch)
6339 {
6340         u32 header, control = 0;
6341
6342         if (ib->flags & AMDGPU_IB_FLAG_CE)
6343                 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
6344         else
6345                 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
6346
6347         control |= ib->length_dw | (vmid << 24);
6348
6349         if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) {
6350                 control |= INDIRECT_BUFFER_PRE_ENB(1);
6351
6352                 if (!(ib->flags & AMDGPU_IB_FLAG_CE))
6353                         gfx_v8_0_ring_emit_de_meta(ring);
6354         }
6355
6356         amdgpu_ring_write(ring, header);
6357         amdgpu_ring_write(ring,
6358 #ifdef __BIG_ENDIAN
6359                           (2 << 0) |
6360 #endif
6361                           (ib->gpu_addr & 0xFFFFFFFC));
6362         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6363         amdgpu_ring_write(ring, control);
6364 }
6365
6366 static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
6367                                           struct amdgpu_ib *ib,
6368                                           unsigned vmid, bool ctx_switch)
6369 {
6370         u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
6371
6372         amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
6373         amdgpu_ring_write(ring,
6374 #ifdef __BIG_ENDIAN
6375                                 (2 << 0) |
6376 #endif
6377                                 (ib->gpu_addr & 0xFFFFFFFC));
6378         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6379         amdgpu_ring_write(ring, control);
6380 }
6381
6382 static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr,
6383                                          u64 seq, unsigned flags)
6384 {
6385         bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6386         bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6387
6388         /* EVENT_WRITE_EOP - flush caches, send int */
6389         amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
6390         amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6391                                  EOP_TC_ACTION_EN |
6392                                  EOP_TC_WB_ACTION_EN |
6393                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6394                                  EVENT_INDEX(5)));
6395         amdgpu_ring_write(ring, addr & 0xfffffffc);
6396         amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
6397                           DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
6398         amdgpu_ring_write(ring, lower_32_bits(seq));
6399         amdgpu_ring_write(ring, upper_32_bits(seq));
6400
6401 }
6402
6403 static void gfx_v8_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
6404 {
6405         int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
6406         uint32_t seq = ring->fence_drv.sync_seq;
6407         uint64_t addr = ring->fence_drv.gpu_addr;
6408
6409         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6410         amdgpu_ring_write(ring, (WAIT_REG_MEM_MEM_SPACE(1) | /* memory */
6411                                  WAIT_REG_MEM_FUNCTION(3) | /* equal */
6412                                  WAIT_REG_MEM_ENGINE(usepfp))); /* pfp or me */
6413         amdgpu_ring_write(ring, addr & 0xfffffffc);
6414         amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
6415         amdgpu_ring_write(ring, seq);
6416         amdgpu_ring_write(ring, 0xffffffff);
6417         amdgpu_ring_write(ring, 4); /* poll interval */
6418 }
6419
6420 static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
6421                                         unsigned vmid, uint64_t pd_addr)
6422 {
6423         int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
6424
6425         amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
6426
6427         /* wait for the invalidate to complete */
6428         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6429         amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
6430                                  WAIT_REG_MEM_FUNCTION(0) |  /* always */
6431                                  WAIT_REG_MEM_ENGINE(0))); /* me */
6432         amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
6433         amdgpu_ring_write(ring, 0);
6434         amdgpu_ring_write(ring, 0); /* ref */
6435         amdgpu_ring_write(ring, 0); /* mask */
6436         amdgpu_ring_write(ring, 0x20); /* poll interval */
6437
6438         /* compute doesn't have PFP */
6439         if (usepfp) {
6440                 /* sync PFP to ME, otherwise we might get invalid PFP reads */
6441                 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
6442                 amdgpu_ring_write(ring, 0x0);
6443         }
6444 }
6445
6446 static u64 gfx_v8_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
6447 {
6448         return ring->adev->wb.wb[ring->wptr_offs];
6449 }
6450
6451 static void gfx_v8_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
6452 {
6453         struct amdgpu_device *adev = ring->adev;
6454
6455         /* XXX check if swapping is necessary on BE */
6456         adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
6457         WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
6458 }
6459
6460 static void gfx_v8_0_ring_set_pipe_percent(struct amdgpu_ring *ring,
6461                                            bool acquire)
6462 {
6463         struct amdgpu_device *adev = ring->adev;
6464         int pipe_num, tmp, reg;
6465         int pipe_percent = acquire ? SPI_WCL_PIPE_PERCENT_GFX__VALUE_MASK : 0x1;
6466
6467         pipe_num = ring->me * adev->gfx.mec.num_pipe_per_mec + ring->pipe;
6468
6469         /* first me only has 2 entries, GFX and HP3D */
6470         if (ring->me > 0)
6471                 pipe_num -= 2;
6472
6473         reg = mmSPI_WCL_PIPE_PERCENT_GFX + pipe_num;
6474         tmp = RREG32(reg);
6475         tmp = REG_SET_FIELD(tmp, SPI_WCL_PIPE_PERCENT_GFX, VALUE, pipe_percent);
6476         WREG32(reg, tmp);
6477 }
6478
6479 static void gfx_v8_0_pipe_reserve_resources(struct amdgpu_device *adev,
6480                                             struct amdgpu_ring *ring,
6481                                             bool acquire)
6482 {
6483         int i, pipe;
6484         bool reserve;
6485         struct amdgpu_ring *iring;
6486
6487         mutex_lock(&adev->gfx.pipe_reserve_mutex);
6488         pipe = amdgpu_gfx_queue_to_bit(adev, ring->me, ring->pipe, 0);
6489         if (acquire)
6490                 set_bit(pipe, adev->gfx.pipe_reserve_bitmap);
6491         else
6492                 clear_bit(pipe, adev->gfx.pipe_reserve_bitmap);
6493
6494         if (!bitmap_weight(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES)) {
6495                 /* Clear all reservations - everyone reacquires all resources */
6496                 for (i = 0; i < adev->gfx.num_gfx_rings; ++i)
6497                         gfx_v8_0_ring_set_pipe_percent(&adev->gfx.gfx_ring[i],
6498                                                        true);
6499
6500                 for (i = 0; i < adev->gfx.num_compute_rings; ++i)
6501                         gfx_v8_0_ring_set_pipe_percent(&adev->gfx.compute_ring[i],
6502                                                        true);
6503         } else {
6504                 /* Lower all pipes without a current reservation */
6505                 for (i = 0; i < adev->gfx.num_gfx_rings; ++i) {
6506                         iring = &adev->gfx.gfx_ring[i];
6507                         pipe = amdgpu_gfx_queue_to_bit(adev,
6508                                                        iring->me,
6509                                                        iring->pipe,
6510                                                        0);
6511                         reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap);
6512                         gfx_v8_0_ring_set_pipe_percent(iring, reserve);
6513                 }
6514
6515                 for (i = 0; i < adev->gfx.num_compute_rings; ++i) {
6516                         iring = &adev->gfx.compute_ring[i];
6517                         pipe = amdgpu_gfx_queue_to_bit(adev,
6518                                                        iring->me,
6519                                                        iring->pipe,
6520                                                        0);
6521                         reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap);
6522                         gfx_v8_0_ring_set_pipe_percent(iring, reserve);
6523                 }
6524         }
6525
6526         mutex_unlock(&adev->gfx.pipe_reserve_mutex);
6527 }
6528
6529 static void gfx_v8_0_hqd_set_priority(struct amdgpu_device *adev,
6530                                       struct amdgpu_ring *ring,
6531                                       bool acquire)
6532 {
6533         uint32_t pipe_priority = acquire ? 0x2 : 0x0;
6534         uint32_t queue_priority = acquire ? 0xf : 0x0;
6535
6536         mutex_lock(&adev->srbm_mutex);
6537         vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
6538
6539         WREG32(mmCP_HQD_PIPE_PRIORITY, pipe_priority);
6540         WREG32(mmCP_HQD_QUEUE_PRIORITY, queue_priority);
6541
6542         vi_srbm_select(adev, 0, 0, 0, 0);
6543         mutex_unlock(&adev->srbm_mutex);
6544 }
6545 static void gfx_v8_0_ring_set_priority_compute(struct amdgpu_ring *ring,
6546                                                enum drm_sched_priority priority)
6547 {
6548         struct amdgpu_device *adev = ring->adev;
6549         bool acquire = priority == DRM_SCHED_PRIORITY_HIGH_HW;
6550
6551         if (ring->funcs->type != AMDGPU_RING_TYPE_COMPUTE)
6552                 return;
6553
6554         gfx_v8_0_hqd_set_priority(adev, ring, acquire);
6555         gfx_v8_0_pipe_reserve_resources(adev, ring, acquire);
6556 }
6557
6558 static void gfx_v8_0_ring_emit_fence_compute(struct amdgpu_ring *ring,
6559                                              u64 addr, u64 seq,
6560                                              unsigned flags)
6561 {
6562         bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6563         bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6564
6565         /* RELEASE_MEM - flush caches, send int */
6566         amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
6567         amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6568                                  EOP_TC_ACTION_EN |
6569                                  EOP_TC_WB_ACTION_EN |
6570                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6571                                  EVENT_INDEX(5)));
6572         amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
6573         amdgpu_ring_write(ring, addr & 0xfffffffc);
6574         amdgpu_ring_write(ring, upper_32_bits(addr));
6575         amdgpu_ring_write(ring, lower_32_bits(seq));
6576         amdgpu_ring_write(ring, upper_32_bits(seq));
6577 }
6578
6579 static void gfx_v8_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
6580                                          u64 seq, unsigned int flags)
6581 {
6582         /* we only allocate 32bit for each seq wb address */
6583         BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
6584
6585         /* write fence seq to the "addr" */
6586         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6587         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6588                                  WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
6589         amdgpu_ring_write(ring, lower_32_bits(addr));
6590         amdgpu_ring_write(ring, upper_32_bits(addr));
6591         amdgpu_ring_write(ring, lower_32_bits(seq));
6592
6593         if (flags & AMDGPU_FENCE_FLAG_INT) {
6594                 /* set register to trigger INT */
6595                 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6596                 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6597                                          WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
6598                 amdgpu_ring_write(ring, mmCPC_INT_STATUS);
6599                 amdgpu_ring_write(ring, 0);
6600                 amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
6601         }
6602 }
6603
6604 static void gfx_v8_ring_emit_sb(struct amdgpu_ring *ring)
6605 {
6606         amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
6607         amdgpu_ring_write(ring, 0);
6608 }
6609
6610 static void gfx_v8_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
6611 {
6612         uint32_t dw2 = 0;
6613
6614         if (amdgpu_sriov_vf(ring->adev))
6615                 gfx_v8_0_ring_emit_ce_meta(ring);
6616
6617         dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
6618         if (flags & AMDGPU_HAVE_CTX_SWITCH) {
6619                 gfx_v8_0_ring_emit_vgt_flush(ring);
6620                 /* set load_global_config & load_global_uconfig */
6621                 dw2 |= 0x8001;
6622                 /* set load_cs_sh_regs */
6623                 dw2 |= 0x01000000;
6624                 /* set load_per_context_state & load_gfx_sh_regs for GFX */
6625                 dw2 |= 0x10002;
6626
6627                 /* set load_ce_ram if preamble presented */
6628                 if (AMDGPU_PREAMBLE_IB_PRESENT & flags)
6629                         dw2 |= 0x10000000;
6630         } else {
6631                 /* still load_ce_ram if this is the first time preamble presented
6632                  * although there is no context switch happens.
6633                  */
6634                 if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags)
6635                         dw2 |= 0x10000000;
6636         }
6637
6638         amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
6639         amdgpu_ring_write(ring, dw2);
6640         amdgpu_ring_write(ring, 0);
6641 }
6642
6643 static unsigned gfx_v8_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring)
6644 {
6645         unsigned ret;
6646
6647         amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
6648         amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
6649         amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
6650         amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */
6651         ret = ring->wptr & ring->buf_mask;
6652         amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */
6653         return ret;
6654 }
6655
6656 static void gfx_v8_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset)
6657 {
6658         unsigned cur;
6659
6660         BUG_ON(offset > ring->buf_mask);
6661         BUG_ON(ring->ring[offset] != 0x55aa55aa);
6662
6663         cur = (ring->wptr & ring->buf_mask) - 1;
6664         if (likely(cur > offset))
6665                 ring->ring[offset] = cur - offset;
6666         else
6667                 ring->ring[offset] = (ring->ring_size >> 2) - offset + cur;
6668 }
6669
6670 static void gfx_v8_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg)
6671 {
6672         struct amdgpu_device *adev = ring->adev;
6673
6674         amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
6675         amdgpu_ring_write(ring, 0 |     /* src: register*/
6676                                 (5 << 8) |      /* dst: memory */
6677                                 (1 << 20));     /* write confirm */
6678         amdgpu_ring_write(ring, reg);
6679         amdgpu_ring_write(ring, 0);
6680         amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
6681                                 adev->virt.reg_val_offs * 4));
6682         amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
6683                                 adev->virt.reg_val_offs * 4));
6684 }
6685
6686 static void gfx_v8_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
6687                                   uint32_t val)
6688 {
6689         uint32_t cmd;
6690
6691         switch (ring->funcs->type) {
6692         case AMDGPU_RING_TYPE_GFX:
6693                 cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM;
6694                 break;
6695         case AMDGPU_RING_TYPE_KIQ:
6696                 cmd = 1 << 16; /* no inc addr */
6697                 break;
6698         default:
6699                 cmd = WR_CONFIRM;
6700                 break;
6701         }
6702
6703         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6704         amdgpu_ring_write(ring, cmd);
6705         amdgpu_ring_write(ring, reg);
6706         amdgpu_ring_write(ring, 0);
6707         amdgpu_ring_write(ring, val);
6708 }
6709
6710 static void gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
6711                                                  enum amdgpu_interrupt_state state)
6712 {
6713         WREG32_FIELD(CP_INT_CNTL_RING0, TIME_STAMP_INT_ENABLE,
6714                      state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6715 }
6716
6717 static void gfx_v8_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
6718                                                      int me, int pipe,
6719                                                      enum amdgpu_interrupt_state state)
6720 {
6721         u32 mec_int_cntl, mec_int_cntl_reg;
6722
6723         /*
6724          * amdgpu controls only the first MEC. That's why this function only
6725          * handles the setting of interrupts for this specific MEC. All other
6726          * pipes' interrupts are set by amdkfd.
6727          */
6728
6729         if (me == 1) {
6730                 switch (pipe) {
6731                 case 0:
6732                         mec_int_cntl_reg = mmCP_ME1_PIPE0_INT_CNTL;
6733                         break;
6734                 case 1:
6735                         mec_int_cntl_reg = mmCP_ME1_PIPE1_INT_CNTL;
6736                         break;
6737                 case 2:
6738                         mec_int_cntl_reg = mmCP_ME1_PIPE2_INT_CNTL;
6739                         break;
6740                 case 3:
6741                         mec_int_cntl_reg = mmCP_ME1_PIPE3_INT_CNTL;
6742                         break;
6743                 default:
6744                         DRM_DEBUG("invalid pipe %d\n", pipe);
6745                         return;
6746                 }
6747         } else {
6748                 DRM_DEBUG("invalid me %d\n", me);
6749                 return;
6750         }
6751
6752         switch (state) {
6753         case AMDGPU_IRQ_STATE_DISABLE:
6754                 mec_int_cntl = RREG32(mec_int_cntl_reg);
6755                 mec_int_cntl &= ~CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK;
6756                 WREG32(mec_int_cntl_reg, mec_int_cntl);
6757                 break;
6758         case AMDGPU_IRQ_STATE_ENABLE:
6759                 mec_int_cntl = RREG32(mec_int_cntl_reg);
6760                 mec_int_cntl |= CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK;
6761                 WREG32(mec_int_cntl_reg, mec_int_cntl);
6762                 break;
6763         default:
6764                 break;
6765         }
6766 }
6767
6768 static int gfx_v8_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
6769                                              struct amdgpu_irq_src *source,
6770                                              unsigned type,
6771                                              enum amdgpu_interrupt_state state)
6772 {
6773         WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_REG_INT_ENABLE,
6774                      state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6775
6776         return 0;
6777 }
6778
6779 static int gfx_v8_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
6780                                               struct amdgpu_irq_src *source,
6781                                               unsigned type,
6782                                               enum amdgpu_interrupt_state state)
6783 {
6784         WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_INSTR_INT_ENABLE,
6785                      state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6786
6787         return 0;
6788 }
6789
6790 static int gfx_v8_0_set_eop_interrupt_state(struct amdgpu_device *adev,
6791                                             struct amdgpu_irq_src *src,
6792                                             unsigned type,
6793                                             enum amdgpu_interrupt_state state)
6794 {
6795         switch (type) {
6796         case AMDGPU_CP_IRQ_GFX_EOP:
6797                 gfx_v8_0_set_gfx_eop_interrupt_state(adev, state);
6798                 break;
6799         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
6800                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
6801                 break;
6802         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
6803                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
6804                 break;
6805         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
6806                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
6807                 break;
6808         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
6809                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
6810                 break;
6811         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
6812                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
6813                 break;
6814         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
6815                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
6816                 break;
6817         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
6818                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
6819                 break;
6820         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
6821                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
6822                 break;
6823         default:
6824                 break;
6825         }
6826         return 0;
6827 }
6828
6829 static int gfx_v8_0_set_cp_ecc_int_state(struct amdgpu_device *adev,
6830                                          struct amdgpu_irq_src *source,
6831                                          unsigned int type,
6832                                          enum amdgpu_interrupt_state state)
6833 {
6834         int enable_flag;
6835
6836         switch (state) {
6837         case AMDGPU_IRQ_STATE_DISABLE:
6838                 enable_flag = 0;
6839                 break;
6840
6841         case AMDGPU_IRQ_STATE_ENABLE:
6842                 enable_flag = 1;
6843                 break;
6844
6845         default:
6846                 return -EINVAL;
6847         }
6848
6849         WREG32_FIELD(CP_INT_CNTL, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6850         WREG32_FIELD(CP_INT_CNTL_RING0, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6851         WREG32_FIELD(CP_INT_CNTL_RING1, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6852         WREG32_FIELD(CP_INT_CNTL_RING2, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6853         WREG32_FIELD(CPC_INT_CNTL, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6854         WREG32_FIELD(CP_ME1_PIPE0_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6855                      enable_flag);
6856         WREG32_FIELD(CP_ME1_PIPE1_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6857                      enable_flag);
6858         WREG32_FIELD(CP_ME1_PIPE2_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6859                      enable_flag);
6860         WREG32_FIELD(CP_ME1_PIPE3_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6861                      enable_flag);
6862         WREG32_FIELD(CP_ME2_PIPE0_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6863                      enable_flag);
6864         WREG32_FIELD(CP_ME2_PIPE1_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6865                      enable_flag);
6866         WREG32_FIELD(CP_ME2_PIPE2_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6867                      enable_flag);
6868         WREG32_FIELD(CP_ME2_PIPE3_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6869                      enable_flag);
6870
6871         return 0;
6872 }
6873
6874 static int gfx_v8_0_set_sq_int_state(struct amdgpu_device *adev,
6875                                      struct amdgpu_irq_src *source,
6876                                      unsigned int type,
6877                                      enum amdgpu_interrupt_state state)
6878 {
6879         int enable_flag;
6880
6881         switch (state) {
6882         case AMDGPU_IRQ_STATE_DISABLE:
6883                 enable_flag = 1;
6884                 break;
6885
6886         case AMDGPU_IRQ_STATE_ENABLE:
6887                 enable_flag = 0;
6888                 break;
6889
6890         default:
6891                 return -EINVAL;
6892         }
6893
6894         WREG32_FIELD(SQ_INTERRUPT_MSG_CTRL, STALL,
6895                      enable_flag);
6896
6897         return 0;
6898 }
6899
6900 static int gfx_v8_0_eop_irq(struct amdgpu_device *adev,
6901                             struct amdgpu_irq_src *source,
6902                             struct amdgpu_iv_entry *entry)
6903 {
6904         int i;
6905         u8 me_id, pipe_id, queue_id;
6906         struct amdgpu_ring *ring;
6907
6908         DRM_DEBUG("IH: CP EOP\n");
6909         me_id = (entry->ring_id & 0x0c) >> 2;
6910         pipe_id = (entry->ring_id & 0x03) >> 0;
6911         queue_id = (entry->ring_id & 0x70) >> 4;
6912
6913         switch (me_id) {
6914         case 0:
6915                 amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
6916                 break;
6917         case 1:
6918         case 2:
6919                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
6920                         ring = &adev->gfx.compute_ring[i];
6921                         /* Per-queue interrupt is supported for MEC starting from VI.
6922                           * The interrupt can only be enabled/disabled per pipe instead of per queue.
6923                           */
6924                         if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
6925                                 amdgpu_fence_process(ring);
6926                 }
6927                 break;
6928         }
6929         return 0;
6930 }
6931
6932 static int gfx_v8_0_priv_reg_irq(struct amdgpu_device *adev,
6933                                  struct amdgpu_irq_src *source,
6934                                  struct amdgpu_iv_entry *entry)
6935 {
6936         DRM_ERROR("Illegal register access in command stream\n");
6937         schedule_work(&adev->reset_work);
6938         return 0;
6939 }
6940
6941 static int gfx_v8_0_priv_inst_irq(struct amdgpu_device *adev,
6942                                   struct amdgpu_irq_src *source,
6943                                   struct amdgpu_iv_entry *entry)
6944 {
6945         DRM_ERROR("Illegal instruction in command stream\n");
6946         schedule_work(&adev->reset_work);
6947         return 0;
6948 }
6949
6950 static int gfx_v8_0_cp_ecc_error_irq(struct amdgpu_device *adev,
6951                                      struct amdgpu_irq_src *source,
6952                                      struct amdgpu_iv_entry *entry)
6953 {
6954         DRM_ERROR("CP EDC/ECC error detected.");
6955         return 0;
6956 }
6957
6958 static int gfx_v8_0_sq_irq(struct amdgpu_device *adev,
6959                            struct amdgpu_irq_src *source,
6960                            struct amdgpu_iv_entry *entry)
6961 {
6962         u8 enc, se_id;
6963         char type[20];
6964
6965         /* Parse all fields according to SQ_INTERRUPT* registers */
6966         enc = (entry->src_data[0] >> 26) & 0x3;
6967         se_id = (entry->src_data[0] >> 24) & 0x3;
6968
6969         switch (enc) {
6970                 case 0:
6971                         DRM_INFO("SQ general purpose intr detected:"
6972                                         "se_id %d, immed_overflow %d, host_reg_overflow %d,"
6973                                         "host_cmd_overflow %d, cmd_timestamp %d,"
6974                                         "reg_timestamp %d, thread_trace_buff_full %d,"
6975                                         "wlt %d, thread_trace %d.\n",
6976                                         se_id,
6977                                         (entry->src_data[0] >> 7) & 0x1,
6978                                         (entry->src_data[0] >> 6) & 0x1,
6979                                         (entry->src_data[0] >> 5) & 0x1,
6980                                         (entry->src_data[0] >> 4) & 0x1,
6981                                         (entry->src_data[0] >> 3) & 0x1,
6982                                         (entry->src_data[0] >> 2) & 0x1,
6983                                         (entry->src_data[0] >> 1) & 0x1,
6984                                         entry->src_data[0] & 0x1
6985                                         );
6986                         break;
6987                 case 1:
6988                 case 2:
6989
6990                         if (enc == 1)
6991                                 sprintf(type, "instruction intr");
6992                         else
6993                                 sprintf(type, "EDC/ECC error");
6994
6995                         DRM_INFO(
6996                                 "SQ %s detected: "
6997                                         "se_id %d, cu_id %d, simd_id %d, wave_id %d, vm_id %d\n",
6998                                         type, se_id,
6999                                         (entry->src_data[0] >> 20) & 0xf,
7000                                         (entry->src_data[0] >> 18) & 0x3,
7001                                         (entry->src_data[0] >> 14) & 0xf,
7002                                         (entry->src_data[0] >> 10) & 0xf
7003                                         );
7004                         break;
7005                 default:
7006                         DRM_ERROR("SQ invalid encoding type\n.");
7007                         return -EINVAL;
7008         }
7009
7010         return 0;
7011 }
7012
7013 static int gfx_v8_0_kiq_set_interrupt_state(struct amdgpu_device *adev,
7014                                             struct amdgpu_irq_src *src,
7015                                             unsigned int type,
7016                                             enum amdgpu_interrupt_state state)
7017 {
7018         struct amdgpu_ring *ring = &(adev->gfx.kiq.ring);
7019
7020         switch (type) {
7021         case AMDGPU_CP_KIQ_IRQ_DRIVER0:
7022                 WREG32_FIELD(CPC_INT_CNTL, GENERIC2_INT_ENABLE,
7023                              state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
7024                 if (ring->me == 1)
7025                         WREG32_FIELD_OFFSET(CP_ME1_PIPE0_INT_CNTL,
7026                                      ring->pipe,
7027                                      GENERIC2_INT_ENABLE,
7028                                      state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
7029                 else
7030                         WREG32_FIELD_OFFSET(CP_ME2_PIPE0_INT_CNTL,
7031                                      ring->pipe,
7032                                      GENERIC2_INT_ENABLE,
7033                                      state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
7034                 break;
7035         default:
7036                 BUG(); /* kiq only support GENERIC2_INT now */
7037                 break;
7038         }
7039         return 0;
7040 }
7041
7042 static int gfx_v8_0_kiq_irq(struct amdgpu_device *adev,
7043                             struct amdgpu_irq_src *source,
7044                             struct amdgpu_iv_entry *entry)
7045 {
7046         u8 me_id, pipe_id, queue_id;
7047         struct amdgpu_ring *ring = &(adev->gfx.kiq.ring);
7048
7049         me_id = (entry->ring_id & 0x0c) >> 2;
7050         pipe_id = (entry->ring_id & 0x03) >> 0;
7051         queue_id = (entry->ring_id & 0x70) >> 4;
7052         DRM_DEBUG("IH: CPC GENERIC2_INT, me:%d, pipe:%d, queue:%d\n",
7053                    me_id, pipe_id, queue_id);
7054
7055         amdgpu_fence_process(ring);
7056         return 0;
7057 }
7058
7059 static const struct amd_ip_funcs gfx_v8_0_ip_funcs = {
7060         .name = "gfx_v8_0",
7061         .early_init = gfx_v8_0_early_init,
7062         .late_init = gfx_v8_0_late_init,
7063         .sw_init = gfx_v8_0_sw_init,
7064         .sw_fini = gfx_v8_0_sw_fini,
7065         .hw_init = gfx_v8_0_hw_init,
7066         .hw_fini = gfx_v8_0_hw_fini,
7067         .suspend = gfx_v8_0_suspend,
7068         .resume = gfx_v8_0_resume,
7069         .is_idle = gfx_v8_0_is_idle,
7070         .wait_for_idle = gfx_v8_0_wait_for_idle,
7071         .check_soft_reset = gfx_v8_0_check_soft_reset,
7072         .pre_soft_reset = gfx_v8_0_pre_soft_reset,
7073         .soft_reset = gfx_v8_0_soft_reset,
7074         .post_soft_reset = gfx_v8_0_post_soft_reset,
7075         .set_clockgating_state = gfx_v8_0_set_clockgating_state,
7076         .set_powergating_state = gfx_v8_0_set_powergating_state,
7077         .get_clockgating_state = gfx_v8_0_get_clockgating_state,
7078 };
7079
7080 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
7081         .type = AMDGPU_RING_TYPE_GFX,
7082         .align_mask = 0xff,
7083         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
7084         .support_64bit_ptrs = false,
7085         .get_rptr = gfx_v8_0_ring_get_rptr,
7086         .get_wptr = gfx_v8_0_ring_get_wptr_gfx,
7087         .set_wptr = gfx_v8_0_ring_set_wptr_gfx,
7088         .emit_frame_size = /* maximum 215dw if count 16 IBs in */
7089                 5 +  /* COND_EXEC */
7090                 7 +  /* PIPELINE_SYNC */
7091                 VI_FLUSH_GPU_TLB_NUM_WREG * 5 + 9 + /* VM_FLUSH */
7092                 8 +  /* FENCE for VM_FLUSH */
7093                 20 + /* GDS switch */
7094                 4 + /* double SWITCH_BUFFER,
7095                        the first COND_EXEC jump to the place just
7096                            prior to this double SWITCH_BUFFER  */
7097                 5 + /* COND_EXEC */
7098                 7 +      /*     HDP_flush */
7099                 4 +      /*     VGT_flush */
7100                 14 + /* CE_META */
7101                 31 + /* DE_META */
7102                 3 + /* CNTX_CTRL */
7103                 5 + /* HDP_INVL */
7104                 8 + 8 + /* FENCE x2 */
7105                 2, /* SWITCH_BUFFER */
7106         .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_gfx */
7107         .emit_ib = gfx_v8_0_ring_emit_ib_gfx,
7108         .emit_fence = gfx_v8_0_ring_emit_fence_gfx,
7109         .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
7110         .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
7111         .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
7112         .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
7113         .test_ring = gfx_v8_0_ring_test_ring,
7114         .test_ib = gfx_v8_0_ring_test_ib,
7115         .insert_nop = amdgpu_ring_insert_nop,
7116         .pad_ib = amdgpu_ring_generic_pad_ib,
7117         .emit_switch_buffer = gfx_v8_ring_emit_sb,
7118         .emit_cntxcntl = gfx_v8_ring_emit_cntxcntl,
7119         .init_cond_exec = gfx_v8_0_ring_emit_init_cond_exec,
7120         .patch_cond_exec = gfx_v8_0_ring_emit_patch_cond_exec,
7121         .emit_wreg = gfx_v8_0_ring_emit_wreg,
7122 };
7123
7124 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
7125         .type = AMDGPU_RING_TYPE_COMPUTE,
7126         .align_mask = 0xff,
7127         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
7128         .support_64bit_ptrs = false,
7129         .get_rptr = gfx_v8_0_ring_get_rptr,
7130         .get_wptr = gfx_v8_0_ring_get_wptr_compute,
7131         .set_wptr = gfx_v8_0_ring_set_wptr_compute,
7132         .emit_frame_size =
7133                 20 + /* gfx_v8_0_ring_emit_gds_switch */
7134                 7 + /* gfx_v8_0_ring_emit_hdp_flush */
7135                 5 + /* hdp_invalidate */
7136                 7 + /* gfx_v8_0_ring_emit_pipeline_sync */
7137                 VI_FLUSH_GPU_TLB_NUM_WREG * 5 + 7 + /* gfx_v8_0_ring_emit_vm_flush */
7138                 7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_compute x3 for user fence, vm fence */
7139         .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_compute */
7140         .emit_ib = gfx_v8_0_ring_emit_ib_compute,
7141         .emit_fence = gfx_v8_0_ring_emit_fence_compute,
7142         .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
7143         .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
7144         .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
7145         .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
7146         .test_ring = gfx_v8_0_ring_test_ring,
7147         .test_ib = gfx_v8_0_ring_test_ib,
7148         .insert_nop = amdgpu_ring_insert_nop,
7149         .pad_ib = amdgpu_ring_generic_pad_ib,
7150         .set_priority = gfx_v8_0_ring_set_priority_compute,
7151         .emit_wreg = gfx_v8_0_ring_emit_wreg,
7152 };
7153
7154 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_kiq = {
7155         .type = AMDGPU_RING_TYPE_KIQ,
7156         .align_mask = 0xff,
7157         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
7158         .support_64bit_ptrs = false,
7159         .get_rptr = gfx_v8_0_ring_get_rptr,
7160         .get_wptr = gfx_v8_0_ring_get_wptr_compute,
7161         .set_wptr = gfx_v8_0_ring_set_wptr_compute,
7162         .emit_frame_size =
7163                 20 + /* gfx_v8_0_ring_emit_gds_switch */
7164                 7 + /* gfx_v8_0_ring_emit_hdp_flush */
7165                 5 + /* hdp_invalidate */
7166                 7 + /* gfx_v8_0_ring_emit_pipeline_sync */
7167                 17 + /* gfx_v8_0_ring_emit_vm_flush */
7168                 7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_kiq x3 for user fence, vm fence */
7169         .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_compute */
7170         .emit_ib = gfx_v8_0_ring_emit_ib_compute,
7171         .emit_fence = gfx_v8_0_ring_emit_fence_kiq,
7172         .test_ring = gfx_v8_0_ring_test_ring,
7173         .test_ib = gfx_v8_0_ring_test_ib,
7174         .insert_nop = amdgpu_ring_insert_nop,
7175         .pad_ib = amdgpu_ring_generic_pad_ib,
7176         .emit_rreg = gfx_v8_0_ring_emit_rreg,
7177         .emit_wreg = gfx_v8_0_ring_emit_wreg,
7178 };
7179
7180 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev)
7181 {
7182         int i;
7183
7184         adev->gfx.kiq.ring.funcs = &gfx_v8_0_ring_funcs_kiq;
7185
7186         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
7187                 adev->gfx.gfx_ring[i].funcs = &gfx_v8_0_ring_funcs_gfx;
7188
7189         for (i = 0; i < adev->gfx.num_compute_rings; i++)
7190                 adev->gfx.compute_ring[i].funcs = &gfx_v8_0_ring_funcs_compute;
7191 }
7192
7193 static const struct amdgpu_irq_src_funcs gfx_v8_0_eop_irq_funcs = {
7194         .set = gfx_v8_0_set_eop_interrupt_state,
7195         .process = gfx_v8_0_eop_irq,
7196 };
7197
7198 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_reg_irq_funcs = {
7199         .set = gfx_v8_0_set_priv_reg_fault_state,
7200         .process = gfx_v8_0_priv_reg_irq,
7201 };
7202
7203 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_inst_irq_funcs = {
7204         .set = gfx_v8_0_set_priv_inst_fault_state,
7205         .process = gfx_v8_0_priv_inst_irq,
7206 };
7207
7208 static const struct amdgpu_irq_src_funcs gfx_v8_0_kiq_irq_funcs = {
7209         .set = gfx_v8_0_kiq_set_interrupt_state,
7210         .process = gfx_v8_0_kiq_irq,
7211 };
7212
7213 static const struct amdgpu_irq_src_funcs gfx_v8_0_cp_ecc_error_irq_funcs = {
7214         .set = gfx_v8_0_set_cp_ecc_int_state,
7215         .process = gfx_v8_0_cp_ecc_error_irq,
7216 };
7217
7218 static const struct amdgpu_irq_src_funcs gfx_v8_0_sq_irq_funcs = {
7219         .set = gfx_v8_0_set_sq_int_state,
7220         .process = gfx_v8_0_sq_irq,
7221 };
7222
7223 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev)
7224 {
7225         adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
7226         adev->gfx.eop_irq.funcs = &gfx_v8_0_eop_irq_funcs;
7227
7228         adev->gfx.priv_reg_irq.num_types = 1;
7229         adev->gfx.priv_reg_irq.funcs = &gfx_v8_0_priv_reg_irq_funcs;
7230
7231         adev->gfx.priv_inst_irq.num_types = 1;
7232         adev->gfx.priv_inst_irq.funcs = &gfx_v8_0_priv_inst_irq_funcs;
7233
7234         adev->gfx.kiq.irq.num_types = AMDGPU_CP_KIQ_IRQ_LAST;
7235         adev->gfx.kiq.irq.funcs = &gfx_v8_0_kiq_irq_funcs;
7236
7237         adev->gfx.cp_ecc_error_irq.num_types = 1;
7238         adev->gfx.cp_ecc_error_irq.funcs = &gfx_v8_0_cp_ecc_error_irq_funcs;
7239
7240         adev->gfx.sq_irq.num_types = 1;
7241         adev->gfx.sq_irq.funcs = &gfx_v8_0_sq_irq_funcs;
7242 }
7243
7244 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev)
7245 {
7246         adev->gfx.rlc.funcs = &iceland_rlc_funcs;
7247 }
7248
7249 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev)
7250 {
7251         /* init asci gds info */
7252         adev->gds.mem.total_size = RREG32(mmGDS_VMID0_SIZE);
7253         adev->gds.gws.total_size = 64;
7254         adev->gds.oa.total_size = 16;
7255
7256         if (adev->gds.mem.total_size == 64 * 1024) {
7257                 adev->gds.mem.gfx_partition_size = 4096;
7258                 adev->gds.mem.cs_partition_size = 4096;
7259
7260                 adev->gds.gws.gfx_partition_size = 4;
7261                 adev->gds.gws.cs_partition_size = 4;
7262
7263                 adev->gds.oa.gfx_partition_size = 4;
7264                 adev->gds.oa.cs_partition_size = 1;
7265         } else {
7266                 adev->gds.mem.gfx_partition_size = 1024;
7267                 adev->gds.mem.cs_partition_size = 1024;
7268
7269                 adev->gds.gws.gfx_partition_size = 16;
7270                 adev->gds.gws.cs_partition_size = 16;
7271
7272                 adev->gds.oa.gfx_partition_size = 4;
7273                 adev->gds.oa.cs_partition_size = 4;
7274         }
7275 }
7276
7277 static void gfx_v8_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
7278                                                  u32 bitmap)
7279 {
7280         u32 data;
7281
7282         if (!bitmap)
7283                 return;
7284
7285         data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
7286         data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
7287
7288         WREG32(mmGC_USER_SHADER_ARRAY_CONFIG, data);
7289 }
7290
7291 static u32 gfx_v8_0_get_cu_active_bitmap(struct amdgpu_device *adev)
7292 {
7293         u32 data, mask;
7294
7295         data =  RREG32(mmCC_GC_SHADER_ARRAY_CONFIG) |
7296                 RREG32(mmGC_USER_SHADER_ARRAY_CONFIG);
7297
7298         mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh);
7299
7300         return ~REG_GET_FIELD(data, CC_GC_SHADER_ARRAY_CONFIG, INACTIVE_CUS) & mask;
7301 }
7302
7303 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev)
7304 {
7305         int i, j, k, counter, active_cu_number = 0;
7306         u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
7307         struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
7308         unsigned disable_masks[4 * 2];
7309         u32 ao_cu_num;
7310
7311         memset(cu_info, 0, sizeof(*cu_info));
7312
7313         if (adev->flags & AMD_IS_APU)
7314                 ao_cu_num = 2;
7315         else
7316                 ao_cu_num = adev->gfx.config.max_cu_per_sh;
7317
7318         amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2);
7319
7320         mutex_lock(&adev->grbm_idx_mutex);
7321         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
7322                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
7323                         mask = 1;
7324                         ao_bitmap = 0;
7325                         counter = 0;
7326                         gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
7327                         if (i < 4 && j < 2)
7328                                 gfx_v8_0_set_user_cu_inactive_bitmap(
7329                                         adev, disable_masks[i * 2 + j]);
7330                         bitmap = gfx_v8_0_get_cu_active_bitmap(adev);
7331                         cu_info->bitmap[i][j] = bitmap;
7332
7333                         for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
7334                                 if (bitmap & mask) {
7335                                         if (counter < ao_cu_num)
7336                                                 ao_bitmap |= mask;
7337                                         counter ++;
7338                                 }
7339                                 mask <<= 1;
7340                         }
7341                         active_cu_number += counter;
7342                         if (i < 2 && j < 2)
7343                                 ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
7344                         cu_info->ao_cu_bitmap[i][j] = ao_bitmap;
7345                 }
7346         }
7347         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
7348         mutex_unlock(&adev->grbm_idx_mutex);
7349
7350         cu_info->number = active_cu_number;
7351         cu_info->ao_cu_mask = ao_cu_mask;
7352         cu_info->simd_per_cu = NUM_SIMD_PER_CU;
7353         cu_info->max_waves_per_simd = 10;
7354         cu_info->max_scratch_slots_per_cu = 32;
7355         cu_info->wave_front_size = 64;
7356         cu_info->lds_size = 64;
7357 }
7358
7359 const struct amdgpu_ip_block_version gfx_v8_0_ip_block =
7360 {
7361         .type = AMD_IP_BLOCK_TYPE_GFX,
7362         .major = 8,
7363         .minor = 0,
7364         .rev = 0,
7365         .funcs = &gfx_v8_0_ip_funcs,
7366 };
7367
7368 const struct amdgpu_ip_block_version gfx_v8_1_ip_block =
7369 {
7370         .type = AMD_IP_BLOCK_TYPE_GFX,
7371         .major = 8,
7372         .minor = 1,
7373         .rev = 0,
7374         .funcs = &gfx_v8_0_ip_funcs,
7375 };
7376
7377 static void gfx_v8_0_ring_emit_ce_meta(struct amdgpu_ring *ring)
7378 {
7379         uint64_t ce_payload_addr;
7380         int cnt_ce;
7381         union {
7382                 struct vi_ce_ib_state regular;
7383                 struct vi_ce_ib_state_chained_ib chained;
7384         } ce_payload = {};
7385
7386         if (ring->adev->virt.chained_ib_support) {
7387                 ce_payload_addr = amdgpu_csa_vaddr(ring->adev) +
7388                         offsetof(struct vi_gfx_meta_data_chained_ib, ce_payload);
7389                 cnt_ce = (sizeof(ce_payload.chained) >> 2) + 4 - 2;
7390         } else {
7391                 ce_payload_addr = amdgpu_csa_vaddr(ring->adev) +
7392                         offsetof(struct vi_gfx_meta_data, ce_payload);
7393                 cnt_ce = (sizeof(ce_payload.regular) >> 2) + 4 - 2;
7394         }
7395
7396         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_ce));
7397         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
7398                                 WRITE_DATA_DST_SEL(8) |
7399                                 WR_CONFIRM) |
7400                                 WRITE_DATA_CACHE_POLICY(0));
7401         amdgpu_ring_write(ring, lower_32_bits(ce_payload_addr));
7402         amdgpu_ring_write(ring, upper_32_bits(ce_payload_addr));
7403         amdgpu_ring_write_multiple(ring, (void *)&ce_payload, cnt_ce - 2);
7404 }
7405
7406 static void gfx_v8_0_ring_emit_de_meta(struct amdgpu_ring *ring)
7407 {
7408         uint64_t de_payload_addr, gds_addr, csa_addr;
7409         int cnt_de;
7410         union {
7411                 struct vi_de_ib_state regular;
7412                 struct vi_de_ib_state_chained_ib chained;
7413         } de_payload = {};
7414
7415         csa_addr = amdgpu_csa_vaddr(ring->adev);
7416         gds_addr = csa_addr + 4096;
7417         if (ring->adev->virt.chained_ib_support) {
7418                 de_payload.chained.gds_backup_addrlo = lower_32_bits(gds_addr);
7419                 de_payload.chained.gds_backup_addrhi = upper_32_bits(gds_addr);
7420                 de_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data_chained_ib, de_payload);
7421                 cnt_de = (sizeof(de_payload.chained) >> 2) + 4 - 2;
7422         } else {
7423                 de_payload.regular.gds_backup_addrlo = lower_32_bits(gds_addr);
7424                 de_payload.regular.gds_backup_addrhi = upper_32_bits(gds_addr);
7425                 de_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data, de_payload);
7426                 cnt_de = (sizeof(de_payload.regular) >> 2) + 4 - 2;
7427         }
7428
7429         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_de));
7430         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
7431                                 WRITE_DATA_DST_SEL(8) |
7432                                 WR_CONFIRM) |
7433                                 WRITE_DATA_CACHE_POLICY(0));
7434         amdgpu_ring_write(ring, lower_32_bits(de_payload_addr));
7435         amdgpu_ring_write(ring, upper_32_bits(de_payload_addr));
7436         amdgpu_ring_write_multiple(ring, (void *)&de_payload, cnt_de - 2);
7437 }
This page took 0.489124 seconds and 4 git commands to generate.