]> Git Repo - linux.git/blob - drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
Merge branch 'linus' of git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6
[linux.git] / drivers / gpu / drm / amd / amdgpu / gfx_v8_0.c
1 /*
2  * Copyright 2014 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23 #include <linux/firmware.h>
24 #include <drm/drmP.h>
25 #include "amdgpu.h"
26 #include "amdgpu_gfx.h"
27 #include "vi.h"
28 #include "vi_structs.h"
29 #include "vid.h"
30 #include "amdgpu_ucode.h"
31 #include "amdgpu_atombios.h"
32 #include "atombios_i2c.h"
33 #include "clearstate_vi.h"
34
35 #include "gmc/gmc_8_2_d.h"
36 #include "gmc/gmc_8_2_sh_mask.h"
37
38 #include "oss/oss_3_0_d.h"
39 #include "oss/oss_3_0_sh_mask.h"
40
41 #include "bif/bif_5_0_d.h"
42 #include "bif/bif_5_0_sh_mask.h"
43 #include "gca/gfx_8_0_d.h"
44 #include "gca/gfx_8_0_enum.h"
45 #include "gca/gfx_8_0_sh_mask.h"
46 #include "gca/gfx_8_0_enum.h"
47
48 #include "dce/dce_10_0_d.h"
49 #include "dce/dce_10_0_sh_mask.h"
50
51 #include "smu/smu_7_1_3_d.h"
52
53 #define GFX8_NUM_GFX_RINGS     1
54 #define GFX8_MEC_HPD_SIZE 2048
55
56 #define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001
57 #define CARRIZO_GB_ADDR_CONFIG_GOLDEN 0x22010001
58 #define POLARIS11_GB_ADDR_CONFIG_GOLDEN 0x22011002
59 #define TONGA_GB_ADDR_CONFIG_GOLDEN 0x22011003
60
61 #define ARRAY_MODE(x)                                   ((x) << GB_TILE_MODE0__ARRAY_MODE__SHIFT)
62 #define PIPE_CONFIG(x)                                  ((x) << GB_TILE_MODE0__PIPE_CONFIG__SHIFT)
63 #define TILE_SPLIT(x)                                   ((x) << GB_TILE_MODE0__TILE_SPLIT__SHIFT)
64 #define MICRO_TILE_MODE_NEW(x)                          ((x) << GB_TILE_MODE0__MICRO_TILE_MODE_NEW__SHIFT)
65 #define SAMPLE_SPLIT(x)                                 ((x) << GB_TILE_MODE0__SAMPLE_SPLIT__SHIFT)
66 #define BANK_WIDTH(x)                                   ((x) << GB_MACROTILE_MODE0__BANK_WIDTH__SHIFT)
67 #define BANK_HEIGHT(x)                                  ((x) << GB_MACROTILE_MODE0__BANK_HEIGHT__SHIFT)
68 #define MACRO_TILE_ASPECT(x)                            ((x) << GB_MACROTILE_MODE0__MACRO_TILE_ASPECT__SHIFT)
69 #define NUM_BANKS(x)                                    ((x) << GB_MACROTILE_MODE0__NUM_BANKS__SHIFT)
70
71 #define RLC_CGTT_MGCG_OVERRIDE__CPF_MASK            0x00000001L
72 #define RLC_CGTT_MGCG_OVERRIDE__RLC_MASK            0x00000002L
73 #define RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK           0x00000004L
74 #define RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK           0x00000008L
75 #define RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK           0x00000010L
76 #define RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK           0x00000020L
77
78 /* BPM SERDES CMD */
79 #define SET_BPM_SERDES_CMD    1
80 #define CLE_BPM_SERDES_CMD    0
81
82 /* BPM Register Address*/
83 enum {
84         BPM_REG_CGLS_EN = 0,        /* Enable/Disable CGLS */
85         BPM_REG_CGLS_ON,            /* ON/OFF CGLS: shall be controlled by RLC FW */
86         BPM_REG_CGCG_OVERRIDE,      /* Set/Clear CGCG Override */
87         BPM_REG_MGCG_OVERRIDE,      /* Set/Clear MGCG Override */
88         BPM_REG_FGCG_OVERRIDE,      /* Set/Clear FGCG Override */
89         BPM_REG_FGCG_MAX
90 };
91
92 #define RLC_FormatDirectRegListLength        14
93
94 MODULE_FIRMWARE("amdgpu/carrizo_ce.bin");
95 MODULE_FIRMWARE("amdgpu/carrizo_pfp.bin");
96 MODULE_FIRMWARE("amdgpu/carrizo_me.bin");
97 MODULE_FIRMWARE("amdgpu/carrizo_mec.bin");
98 MODULE_FIRMWARE("amdgpu/carrizo_mec2.bin");
99 MODULE_FIRMWARE("amdgpu/carrizo_rlc.bin");
100
101 MODULE_FIRMWARE("amdgpu/stoney_ce.bin");
102 MODULE_FIRMWARE("amdgpu/stoney_pfp.bin");
103 MODULE_FIRMWARE("amdgpu/stoney_me.bin");
104 MODULE_FIRMWARE("amdgpu/stoney_mec.bin");
105 MODULE_FIRMWARE("amdgpu/stoney_rlc.bin");
106
107 MODULE_FIRMWARE("amdgpu/tonga_ce.bin");
108 MODULE_FIRMWARE("amdgpu/tonga_pfp.bin");
109 MODULE_FIRMWARE("amdgpu/tonga_me.bin");
110 MODULE_FIRMWARE("amdgpu/tonga_mec.bin");
111 MODULE_FIRMWARE("amdgpu/tonga_mec2.bin");
112 MODULE_FIRMWARE("amdgpu/tonga_rlc.bin");
113
114 MODULE_FIRMWARE("amdgpu/topaz_ce.bin");
115 MODULE_FIRMWARE("amdgpu/topaz_pfp.bin");
116 MODULE_FIRMWARE("amdgpu/topaz_me.bin");
117 MODULE_FIRMWARE("amdgpu/topaz_mec.bin");
118 MODULE_FIRMWARE("amdgpu/topaz_rlc.bin");
119
120 MODULE_FIRMWARE("amdgpu/fiji_ce.bin");
121 MODULE_FIRMWARE("amdgpu/fiji_pfp.bin");
122 MODULE_FIRMWARE("amdgpu/fiji_me.bin");
123 MODULE_FIRMWARE("amdgpu/fiji_mec.bin");
124 MODULE_FIRMWARE("amdgpu/fiji_mec2.bin");
125 MODULE_FIRMWARE("amdgpu/fiji_rlc.bin");
126
127 MODULE_FIRMWARE("amdgpu/polaris11_ce.bin");
128 MODULE_FIRMWARE("amdgpu/polaris11_pfp.bin");
129 MODULE_FIRMWARE("amdgpu/polaris11_me.bin");
130 MODULE_FIRMWARE("amdgpu/polaris11_mec.bin");
131 MODULE_FIRMWARE("amdgpu/polaris11_mec2.bin");
132 MODULE_FIRMWARE("amdgpu/polaris11_rlc.bin");
133
134 MODULE_FIRMWARE("amdgpu/polaris10_ce.bin");
135 MODULE_FIRMWARE("amdgpu/polaris10_pfp.bin");
136 MODULE_FIRMWARE("amdgpu/polaris10_me.bin");
137 MODULE_FIRMWARE("amdgpu/polaris10_mec.bin");
138 MODULE_FIRMWARE("amdgpu/polaris10_mec2.bin");
139 MODULE_FIRMWARE("amdgpu/polaris10_rlc.bin");
140
141 MODULE_FIRMWARE("amdgpu/polaris12_ce.bin");
142 MODULE_FIRMWARE("amdgpu/polaris12_pfp.bin");
143 MODULE_FIRMWARE("amdgpu/polaris12_me.bin");
144 MODULE_FIRMWARE("amdgpu/polaris12_mec.bin");
145 MODULE_FIRMWARE("amdgpu/polaris12_mec2.bin");
146 MODULE_FIRMWARE("amdgpu/polaris12_rlc.bin");
147
148 static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] =
149 {
150         {mmGDS_VMID0_BASE, mmGDS_VMID0_SIZE, mmGDS_GWS_VMID0, mmGDS_OA_VMID0},
151         {mmGDS_VMID1_BASE, mmGDS_VMID1_SIZE, mmGDS_GWS_VMID1, mmGDS_OA_VMID1},
152         {mmGDS_VMID2_BASE, mmGDS_VMID2_SIZE, mmGDS_GWS_VMID2, mmGDS_OA_VMID2},
153         {mmGDS_VMID3_BASE, mmGDS_VMID3_SIZE, mmGDS_GWS_VMID3, mmGDS_OA_VMID3},
154         {mmGDS_VMID4_BASE, mmGDS_VMID4_SIZE, mmGDS_GWS_VMID4, mmGDS_OA_VMID4},
155         {mmGDS_VMID5_BASE, mmGDS_VMID5_SIZE, mmGDS_GWS_VMID5, mmGDS_OA_VMID5},
156         {mmGDS_VMID6_BASE, mmGDS_VMID6_SIZE, mmGDS_GWS_VMID6, mmGDS_OA_VMID6},
157         {mmGDS_VMID7_BASE, mmGDS_VMID7_SIZE, mmGDS_GWS_VMID7, mmGDS_OA_VMID7},
158         {mmGDS_VMID8_BASE, mmGDS_VMID8_SIZE, mmGDS_GWS_VMID8, mmGDS_OA_VMID8},
159         {mmGDS_VMID9_BASE, mmGDS_VMID9_SIZE, mmGDS_GWS_VMID9, mmGDS_OA_VMID9},
160         {mmGDS_VMID10_BASE, mmGDS_VMID10_SIZE, mmGDS_GWS_VMID10, mmGDS_OA_VMID10},
161         {mmGDS_VMID11_BASE, mmGDS_VMID11_SIZE, mmGDS_GWS_VMID11, mmGDS_OA_VMID11},
162         {mmGDS_VMID12_BASE, mmGDS_VMID12_SIZE, mmGDS_GWS_VMID12, mmGDS_OA_VMID12},
163         {mmGDS_VMID13_BASE, mmGDS_VMID13_SIZE, mmGDS_GWS_VMID13, mmGDS_OA_VMID13},
164         {mmGDS_VMID14_BASE, mmGDS_VMID14_SIZE, mmGDS_GWS_VMID14, mmGDS_OA_VMID14},
165         {mmGDS_VMID15_BASE, mmGDS_VMID15_SIZE, mmGDS_GWS_VMID15, mmGDS_OA_VMID15}
166 };
167
168 static const u32 golden_settings_tonga_a11[] =
169 {
170         mmCB_HW_CONTROL, 0xfffdf3cf, 0x00007208,
171         mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
172         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
173         mmGB_GPU_ID, 0x0000000f, 0x00000000,
174         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
175         mmPA_SC_FIFO_DEPTH_CNTL, 0x000003ff, 0x000000fc,
176         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
177         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
178         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
179         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
180         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
181         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
182         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000002fb,
183         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x0000543b,
184         mmTCP_CHAN_STEER_LO, 0xffffffff, 0xa9210876,
185         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
186 };
187
188 static const u32 tonga_golden_common_all[] =
189 {
190         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
191         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
192         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
193         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
194         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
195         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
196         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
197         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
198 };
199
200 static const u32 tonga_mgcg_cgcg_init[] =
201 {
202         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
203         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
204         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
205         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
206         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
207         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
208         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
209         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
210         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
211         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
212         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
213         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
214         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
215         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
216         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
217         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
218         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
219         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
220         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
221         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
222         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
223         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
224         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
225         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
226         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
227         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
228         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
229         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
230         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
231         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
232         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
233         mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
234         mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
235         mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
236         mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
237         mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
238         mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
239         mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
240         mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
241         mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
242         mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
243         mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
244         mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
245         mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
246         mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
247         mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
248         mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
249         mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
250         mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
251         mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
252         mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
253         mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
254         mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
255         mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
256         mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
257         mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
258         mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
259         mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
260         mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
261         mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
262         mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
263         mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
264         mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
265         mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
266         mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
267         mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
268         mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
269         mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
270         mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
271         mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
272         mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
273         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
274         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
275         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
276         mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
277 };
278
279 static const u32 golden_settings_polaris11_a11[] =
280 {
281         mmCB_HW_CONTROL, 0x0000f3cf, 0x00007208,
282         mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
283         mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
284         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
285         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
286         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
287         mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
288         mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
289         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
290         mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
291         mmSQ_CONFIG, 0x07f80000, 0x01180000,
292         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
293         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
294         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f3,
295         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
296         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003210,
297         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
298 };
299
300 static const u32 polaris11_golden_common_all[] =
301 {
302         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
303         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011002,
304         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
305         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
306         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
307         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
308 };
309
310 static const u32 golden_settings_polaris10_a11[] =
311 {
312         mmATC_MISC_CG, 0x000c0fc0, 0x000c0200,
313         mmCB_HW_CONTROL, 0x0001f3cf, 0x00007208,
314         mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
315         mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
316         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
317         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
318         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
319         mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
320         mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002a,
321         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
322         mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
323         mmSQ_CONFIG, 0x07f80000, 0x07180000,
324         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
325         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
326         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7,
327         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
328         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
329 };
330
331 static const u32 polaris10_golden_common_all[] =
332 {
333         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
334         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
335         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
336         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
337         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
338         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
339         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
340         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
341 };
342
343 static const u32 fiji_golden_common_all[] =
344 {
345         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
346         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x3a00161a,
347         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002e,
348         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
349         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
350         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
351         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
352         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
353         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
354         mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x00000009,
355 };
356
357 static const u32 golden_settings_fiji_a10[] =
358 {
359         mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
360         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
361         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
362         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
363         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
364         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
365         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
366         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
367         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
368         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000ff,
369         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
370 };
371
372 static const u32 fiji_mgcg_cgcg_init[] =
373 {
374         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
375         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
376         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
377         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
378         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
379         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
380         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
381         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
382         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
383         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
384         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
385         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
386         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
387         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
388         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
389         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
390         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
391         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
392         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
393         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
394         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
395         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
396         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
397         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
398         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
399         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
400         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
401         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
402         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
403         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
404         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
405         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
406         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
407         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
408         mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
409 };
410
411 static const u32 golden_settings_iceland_a11[] =
412 {
413         mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
414         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
415         mmDB_DEBUG3, 0xc0000000, 0xc0000000,
416         mmGB_GPU_ID, 0x0000000f, 0x00000000,
417         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
418         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
419         mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x00000002,
420         mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
421         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
422         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
423         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
424         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
425         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
426         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f1,
427         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
428         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010,
429 };
430
431 static const u32 iceland_golden_common_all[] =
432 {
433         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
434         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
435         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
436         mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
437         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
438         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
439         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
440         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
441 };
442
443 static const u32 iceland_mgcg_cgcg_init[] =
444 {
445         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
446         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
447         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
448         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
449         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0xc0000100,
450         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0xc0000100,
451         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0xc0000100,
452         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
453         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
454         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
455         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
456         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
457         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
458         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
459         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
460         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
461         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
462         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
463         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
464         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
465         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
466         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
467         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0xff000100,
468         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
469         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
470         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
471         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
472         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
473         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
474         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
475         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
476         mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
477         mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
478         mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
479         mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
480         mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
481         mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
482         mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
483         mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
484         mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
485         mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
486         mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
487         mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
488         mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
489         mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
490         mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
491         mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
492         mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
493         mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
494         mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
495         mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
496         mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
497         mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
498         mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
499         mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
500         mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
501         mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
502         mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
503         mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
504         mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
505         mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
506         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
507         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
508         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
509 };
510
511 static const u32 cz_golden_settings_a11[] =
512 {
513         mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
514         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
515         mmGB_GPU_ID, 0x0000000f, 0x00000000,
516         mmPA_SC_ENHANCE, 0xffffffff, 0x00000001,
517         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
518         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
519         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
520         mmTA_CNTL_AUX, 0x000f000f, 0x00010000,
521         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
522         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
523         mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f3,
524         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00001302
525 };
526
527 static const u32 cz_golden_common_all[] =
528 {
529         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
530         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
531         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
532         mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
533         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
534         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
535         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
536         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
537 };
538
539 static const u32 cz_mgcg_cgcg_init[] =
540 {
541         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
542         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
543         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
544         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
545         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
546         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
547         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x00000100,
548         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
549         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
550         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
551         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
552         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
553         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
554         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
555         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
556         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
557         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
558         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
559         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
560         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
561         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
562         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
563         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
564         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
565         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
566         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
567         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
568         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
569         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
570         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
571         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
572         mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
573         mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
574         mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
575         mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
576         mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
577         mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
578         mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
579         mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
580         mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
581         mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
582         mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
583         mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
584         mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
585         mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
586         mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
587         mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
588         mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
589         mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
590         mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
591         mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
592         mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
593         mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
594         mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
595         mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
596         mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
597         mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
598         mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
599         mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
600         mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
601         mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
602         mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
603         mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
604         mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
605         mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
606         mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
607         mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
608         mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
609         mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
610         mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
611         mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
612         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
613         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
614         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
615         mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
616 };
617
618 static const u32 stoney_golden_settings_a11[] =
619 {
620         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
621         mmGB_GPU_ID, 0x0000000f, 0x00000000,
622         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
623         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
624         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
625         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
626         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
627         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
628         mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f1,
629         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x10101010,
630 };
631
632 static const u32 stoney_golden_common_all[] =
633 {
634         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
635         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000000,
636         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
637         mmGB_ADDR_CONFIG, 0xffffffff, 0x12010001,
638         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
639         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
640         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
641         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
642 };
643
644 static const u32 stoney_mgcg_cgcg_init[] =
645 {
646         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
647         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
648         mmCP_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
649         mmRLC_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
650         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96940200,
651 };
652
653 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev);
654 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev);
655 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev);
656 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev);
657 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev);
658 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev);
659 static void gfx_v8_0_ring_emit_ce_meta(struct amdgpu_ring *ring);
660 static void gfx_v8_0_ring_emit_de_meta(struct amdgpu_ring *ring);
661
662 static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev)
663 {
664         switch (adev->asic_type) {
665         case CHIP_TOPAZ:
666                 amdgpu_program_register_sequence(adev,
667                                                  iceland_mgcg_cgcg_init,
668                                                  (const u32)ARRAY_SIZE(iceland_mgcg_cgcg_init));
669                 amdgpu_program_register_sequence(adev,
670                                                  golden_settings_iceland_a11,
671                                                  (const u32)ARRAY_SIZE(golden_settings_iceland_a11));
672                 amdgpu_program_register_sequence(adev,
673                                                  iceland_golden_common_all,
674                                                  (const u32)ARRAY_SIZE(iceland_golden_common_all));
675                 break;
676         case CHIP_FIJI:
677                 amdgpu_program_register_sequence(adev,
678                                                  fiji_mgcg_cgcg_init,
679                                                  (const u32)ARRAY_SIZE(fiji_mgcg_cgcg_init));
680                 amdgpu_program_register_sequence(adev,
681                                                  golden_settings_fiji_a10,
682                                                  (const u32)ARRAY_SIZE(golden_settings_fiji_a10));
683                 amdgpu_program_register_sequence(adev,
684                                                  fiji_golden_common_all,
685                                                  (const u32)ARRAY_SIZE(fiji_golden_common_all));
686                 break;
687
688         case CHIP_TONGA:
689                 amdgpu_program_register_sequence(adev,
690                                                  tonga_mgcg_cgcg_init,
691                                                  (const u32)ARRAY_SIZE(tonga_mgcg_cgcg_init));
692                 amdgpu_program_register_sequence(adev,
693                                                  golden_settings_tonga_a11,
694                                                  (const u32)ARRAY_SIZE(golden_settings_tonga_a11));
695                 amdgpu_program_register_sequence(adev,
696                                                  tonga_golden_common_all,
697                                                  (const u32)ARRAY_SIZE(tonga_golden_common_all));
698                 break;
699         case CHIP_POLARIS11:
700         case CHIP_POLARIS12:
701                 amdgpu_program_register_sequence(adev,
702                                                  golden_settings_polaris11_a11,
703                                                  (const u32)ARRAY_SIZE(golden_settings_polaris11_a11));
704                 amdgpu_program_register_sequence(adev,
705                                                  polaris11_golden_common_all,
706                                                  (const u32)ARRAY_SIZE(polaris11_golden_common_all));
707                 break;
708         case CHIP_POLARIS10:
709                 amdgpu_program_register_sequence(adev,
710                                                  golden_settings_polaris10_a11,
711                                                  (const u32)ARRAY_SIZE(golden_settings_polaris10_a11));
712                 amdgpu_program_register_sequence(adev,
713                                                  polaris10_golden_common_all,
714                                                  (const u32)ARRAY_SIZE(polaris10_golden_common_all));
715                 WREG32_SMC(ixCG_ACLK_CNTL, 0x0000001C);
716                 if (adev->pdev->revision == 0xc7 &&
717                     ((adev->pdev->subsystem_device == 0xb37 && adev->pdev->subsystem_vendor == 0x1002) ||
718                      (adev->pdev->subsystem_device == 0x4a8 && adev->pdev->subsystem_vendor == 0x1043) ||
719                      (adev->pdev->subsystem_device == 0x9480 && adev->pdev->subsystem_vendor == 0x1682))) {
720                         amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1E, 0xDD);
721                         amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1F, 0xD0);
722                 }
723                 break;
724         case CHIP_CARRIZO:
725                 amdgpu_program_register_sequence(adev,
726                                                  cz_mgcg_cgcg_init,
727                                                  (const u32)ARRAY_SIZE(cz_mgcg_cgcg_init));
728                 amdgpu_program_register_sequence(adev,
729                                                  cz_golden_settings_a11,
730                                                  (const u32)ARRAY_SIZE(cz_golden_settings_a11));
731                 amdgpu_program_register_sequence(adev,
732                                                  cz_golden_common_all,
733                                                  (const u32)ARRAY_SIZE(cz_golden_common_all));
734                 break;
735         case CHIP_STONEY:
736                 amdgpu_program_register_sequence(adev,
737                                                  stoney_mgcg_cgcg_init,
738                                                  (const u32)ARRAY_SIZE(stoney_mgcg_cgcg_init));
739                 amdgpu_program_register_sequence(adev,
740                                                  stoney_golden_settings_a11,
741                                                  (const u32)ARRAY_SIZE(stoney_golden_settings_a11));
742                 amdgpu_program_register_sequence(adev,
743                                                  stoney_golden_common_all,
744                                                  (const u32)ARRAY_SIZE(stoney_golden_common_all));
745                 break;
746         default:
747                 break;
748         }
749 }
750
751 static void gfx_v8_0_scratch_init(struct amdgpu_device *adev)
752 {
753         adev->gfx.scratch.num_reg = 7;
754         adev->gfx.scratch.reg_base = mmSCRATCH_REG0;
755         adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1;
756 }
757
758 static int gfx_v8_0_ring_test_ring(struct amdgpu_ring *ring)
759 {
760         struct amdgpu_device *adev = ring->adev;
761         uint32_t scratch;
762         uint32_t tmp = 0;
763         unsigned i;
764         int r;
765
766         r = amdgpu_gfx_scratch_get(adev, &scratch);
767         if (r) {
768                 DRM_ERROR("amdgpu: cp failed to get scratch reg (%d).\n", r);
769                 return r;
770         }
771         WREG32(scratch, 0xCAFEDEAD);
772         r = amdgpu_ring_alloc(ring, 3);
773         if (r) {
774                 DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n",
775                           ring->idx, r);
776                 amdgpu_gfx_scratch_free(adev, scratch);
777                 return r;
778         }
779         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
780         amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
781         amdgpu_ring_write(ring, 0xDEADBEEF);
782         amdgpu_ring_commit(ring);
783
784         for (i = 0; i < adev->usec_timeout; i++) {
785                 tmp = RREG32(scratch);
786                 if (tmp == 0xDEADBEEF)
787                         break;
788                 DRM_UDELAY(1);
789         }
790         if (i < adev->usec_timeout) {
791                 DRM_INFO("ring test on %d succeeded in %d usecs\n",
792                          ring->idx, i);
793         } else {
794                 DRM_ERROR("amdgpu: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
795                           ring->idx, scratch, tmp);
796                 r = -EINVAL;
797         }
798         amdgpu_gfx_scratch_free(adev, scratch);
799         return r;
800 }
801
802 static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
803 {
804         struct amdgpu_device *adev = ring->adev;
805         struct amdgpu_ib ib;
806         struct dma_fence *f = NULL;
807         uint32_t scratch;
808         uint32_t tmp = 0;
809         long r;
810
811         r = amdgpu_gfx_scratch_get(adev, &scratch);
812         if (r) {
813                 DRM_ERROR("amdgpu: failed to get scratch reg (%ld).\n", r);
814                 return r;
815         }
816         WREG32(scratch, 0xCAFEDEAD);
817         memset(&ib, 0, sizeof(ib));
818         r = amdgpu_ib_get(adev, NULL, 256, &ib);
819         if (r) {
820                 DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
821                 goto err1;
822         }
823         ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
824         ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START));
825         ib.ptr[2] = 0xDEADBEEF;
826         ib.length_dw = 3;
827
828         r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
829         if (r)
830                 goto err2;
831
832         r = dma_fence_wait_timeout(f, false, timeout);
833         if (r == 0) {
834                 DRM_ERROR("amdgpu: IB test timed out.\n");
835                 r = -ETIMEDOUT;
836                 goto err2;
837         } else if (r < 0) {
838                 DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
839                 goto err2;
840         }
841         tmp = RREG32(scratch);
842         if (tmp == 0xDEADBEEF) {
843                 DRM_INFO("ib test on ring %d succeeded\n", ring->idx);
844                 r = 0;
845         } else {
846                 DRM_ERROR("amdgpu: ib test failed (scratch(0x%04X)=0x%08X)\n",
847                           scratch, tmp);
848                 r = -EINVAL;
849         }
850 err2:
851         amdgpu_ib_free(adev, &ib, NULL);
852         dma_fence_put(f);
853 err1:
854         amdgpu_gfx_scratch_free(adev, scratch);
855         return r;
856 }
857
858
859 static void gfx_v8_0_free_microcode(struct amdgpu_device *adev)
860 {
861         release_firmware(adev->gfx.pfp_fw);
862         adev->gfx.pfp_fw = NULL;
863         release_firmware(adev->gfx.me_fw);
864         adev->gfx.me_fw = NULL;
865         release_firmware(adev->gfx.ce_fw);
866         adev->gfx.ce_fw = NULL;
867         release_firmware(adev->gfx.rlc_fw);
868         adev->gfx.rlc_fw = NULL;
869         release_firmware(adev->gfx.mec_fw);
870         adev->gfx.mec_fw = NULL;
871         if ((adev->asic_type != CHIP_STONEY) &&
872             (adev->asic_type != CHIP_TOPAZ))
873                 release_firmware(adev->gfx.mec2_fw);
874         adev->gfx.mec2_fw = NULL;
875
876         kfree(adev->gfx.rlc.register_list_format);
877 }
878
879 static int gfx_v8_0_init_microcode(struct amdgpu_device *adev)
880 {
881         const char *chip_name;
882         char fw_name[30];
883         int err;
884         struct amdgpu_firmware_info *info = NULL;
885         const struct common_firmware_header *header = NULL;
886         const struct gfx_firmware_header_v1_0 *cp_hdr;
887         const struct rlc_firmware_header_v2_0 *rlc_hdr;
888         unsigned int *tmp = NULL, i;
889
890         DRM_DEBUG("\n");
891
892         switch (adev->asic_type) {
893         case CHIP_TOPAZ:
894                 chip_name = "topaz";
895                 break;
896         case CHIP_TONGA:
897                 chip_name = "tonga";
898                 break;
899         case CHIP_CARRIZO:
900                 chip_name = "carrizo";
901                 break;
902         case CHIP_FIJI:
903                 chip_name = "fiji";
904                 break;
905         case CHIP_POLARIS11:
906                 chip_name = "polaris11";
907                 break;
908         case CHIP_POLARIS10:
909                 chip_name = "polaris10";
910                 break;
911         case CHIP_POLARIS12:
912                 chip_name = "polaris12";
913                 break;
914         case CHIP_STONEY:
915                 chip_name = "stoney";
916                 break;
917         default:
918                 BUG();
919         }
920
921         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
922         err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
923         if (err)
924                 goto out;
925         err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
926         if (err)
927                 goto out;
928         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
929         adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
930         adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
931
932         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
933         err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
934         if (err)
935                 goto out;
936         err = amdgpu_ucode_validate(adev->gfx.me_fw);
937         if (err)
938                 goto out;
939         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
940         adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
941
942         adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
943
944         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
945         err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
946         if (err)
947                 goto out;
948         err = amdgpu_ucode_validate(adev->gfx.ce_fw);
949         if (err)
950                 goto out;
951         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
952         adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
953         adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
954
955         /*
956          * Support for MCBP/Virtualization in combination with chained IBs is
957          * formal released on feature version #46
958          */
959         if (adev->gfx.ce_feature_version >= 46 &&
960             adev->gfx.pfp_feature_version >= 46) {
961                 adev->virt.chained_ib_support = true;
962                 DRM_INFO("Chained IB support enabled!\n");
963         } else
964                 adev->virt.chained_ib_support = false;
965
966         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
967         err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
968         if (err)
969                 goto out;
970         err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
971         rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
972         adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
973         adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
974
975         adev->gfx.rlc.save_and_restore_offset =
976                         le32_to_cpu(rlc_hdr->save_and_restore_offset);
977         adev->gfx.rlc.clear_state_descriptor_offset =
978                         le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
979         adev->gfx.rlc.avail_scratch_ram_locations =
980                         le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
981         adev->gfx.rlc.reg_restore_list_size =
982                         le32_to_cpu(rlc_hdr->reg_restore_list_size);
983         adev->gfx.rlc.reg_list_format_start =
984                         le32_to_cpu(rlc_hdr->reg_list_format_start);
985         adev->gfx.rlc.reg_list_format_separate_start =
986                         le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
987         adev->gfx.rlc.starting_offsets_start =
988                         le32_to_cpu(rlc_hdr->starting_offsets_start);
989         adev->gfx.rlc.reg_list_format_size_bytes =
990                         le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
991         adev->gfx.rlc.reg_list_size_bytes =
992                         le32_to_cpu(rlc_hdr->reg_list_size_bytes);
993
994         adev->gfx.rlc.register_list_format =
995                         kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
996                                         adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
997
998         if (!adev->gfx.rlc.register_list_format) {
999                 err = -ENOMEM;
1000                 goto out;
1001         }
1002
1003         tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1004                         le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
1005         for (i = 0 ; i < (rlc_hdr->reg_list_format_size_bytes >> 2); i++)
1006                 adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]);
1007
1008         adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
1009
1010         tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1011                         le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
1012         for (i = 0 ; i < (rlc_hdr->reg_list_size_bytes >> 2); i++)
1013                 adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
1014
1015         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
1016         err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1017         if (err)
1018                 goto out;
1019         err = amdgpu_ucode_validate(adev->gfx.mec_fw);
1020         if (err)
1021                 goto out;
1022         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1023         adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1024         adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1025
1026         if ((adev->asic_type != CHIP_STONEY) &&
1027             (adev->asic_type != CHIP_TOPAZ)) {
1028                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1029                 err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1030                 if (!err) {
1031                         err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
1032                         if (err)
1033                                 goto out;
1034                         cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1035                                 adev->gfx.mec2_fw->data;
1036                         adev->gfx.mec2_fw_version =
1037                                 le32_to_cpu(cp_hdr->header.ucode_version);
1038                         adev->gfx.mec2_feature_version =
1039                                 le32_to_cpu(cp_hdr->ucode_feature_version);
1040                 } else {
1041                         err = 0;
1042                         adev->gfx.mec2_fw = NULL;
1043                 }
1044         }
1045
1046         if (adev->firmware.load_type == AMDGPU_FW_LOAD_SMU) {
1047                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
1048                 info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
1049                 info->fw = adev->gfx.pfp_fw;
1050                 header = (const struct common_firmware_header *)info->fw->data;
1051                 adev->firmware.fw_size +=
1052                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1053
1054                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
1055                 info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
1056                 info->fw = adev->gfx.me_fw;
1057                 header = (const struct common_firmware_header *)info->fw->data;
1058                 adev->firmware.fw_size +=
1059                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1060
1061                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
1062                 info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
1063                 info->fw = adev->gfx.ce_fw;
1064                 header = (const struct common_firmware_header *)info->fw->data;
1065                 adev->firmware.fw_size +=
1066                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1067
1068                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
1069                 info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
1070                 info->fw = adev->gfx.rlc_fw;
1071                 header = (const struct common_firmware_header *)info->fw->data;
1072                 adev->firmware.fw_size +=
1073                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1074
1075                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
1076                 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
1077                 info->fw = adev->gfx.mec_fw;
1078                 header = (const struct common_firmware_header *)info->fw->data;
1079                 adev->firmware.fw_size +=
1080                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1081
1082                 /* we need account JT in */
1083                 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1084                 adev->firmware.fw_size +=
1085                         ALIGN(le32_to_cpu(cp_hdr->jt_size) << 2, PAGE_SIZE);
1086
1087                 if (amdgpu_sriov_vf(adev)) {
1088                         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_STORAGE];
1089                         info->ucode_id = AMDGPU_UCODE_ID_STORAGE;
1090                         info->fw = adev->gfx.mec_fw;
1091                         adev->firmware.fw_size +=
1092                                 ALIGN(le32_to_cpu(64 * PAGE_SIZE), PAGE_SIZE);
1093                 }
1094
1095                 if (adev->gfx.mec2_fw) {
1096                         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
1097                         info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
1098                         info->fw = adev->gfx.mec2_fw;
1099                         header = (const struct common_firmware_header *)info->fw->data;
1100                         adev->firmware.fw_size +=
1101                                 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1102                 }
1103
1104         }
1105
1106 out:
1107         if (err) {
1108                 dev_err(adev->dev,
1109                         "gfx8: Failed to load firmware \"%s\"\n",
1110                         fw_name);
1111                 release_firmware(adev->gfx.pfp_fw);
1112                 adev->gfx.pfp_fw = NULL;
1113                 release_firmware(adev->gfx.me_fw);
1114                 adev->gfx.me_fw = NULL;
1115                 release_firmware(adev->gfx.ce_fw);
1116                 adev->gfx.ce_fw = NULL;
1117                 release_firmware(adev->gfx.rlc_fw);
1118                 adev->gfx.rlc_fw = NULL;
1119                 release_firmware(adev->gfx.mec_fw);
1120                 adev->gfx.mec_fw = NULL;
1121                 release_firmware(adev->gfx.mec2_fw);
1122                 adev->gfx.mec2_fw = NULL;
1123         }
1124         return err;
1125 }
1126
1127 static void gfx_v8_0_get_csb_buffer(struct amdgpu_device *adev,
1128                                     volatile u32 *buffer)
1129 {
1130         u32 count = 0, i;
1131         const struct cs_section_def *sect = NULL;
1132         const struct cs_extent_def *ext = NULL;
1133
1134         if (adev->gfx.rlc.cs_data == NULL)
1135                 return;
1136         if (buffer == NULL)
1137                 return;
1138
1139         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1140         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1141
1142         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
1143         buffer[count++] = cpu_to_le32(0x80000000);
1144         buffer[count++] = cpu_to_le32(0x80000000);
1145
1146         for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
1147                 for (ext = sect->section; ext->extent != NULL; ++ext) {
1148                         if (sect->id == SECT_CONTEXT) {
1149                                 buffer[count++] =
1150                                         cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
1151                                 buffer[count++] = cpu_to_le32(ext->reg_index -
1152                                                 PACKET3_SET_CONTEXT_REG_START);
1153                                 for (i = 0; i < ext->reg_count; i++)
1154                                         buffer[count++] = cpu_to_le32(ext->extent[i]);
1155                         } else {
1156                                 return;
1157                         }
1158                 }
1159         }
1160
1161         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
1162         buffer[count++] = cpu_to_le32(mmPA_SC_RASTER_CONFIG -
1163                         PACKET3_SET_CONTEXT_REG_START);
1164         buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config);
1165         buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config_1);
1166
1167         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1168         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
1169
1170         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
1171         buffer[count++] = cpu_to_le32(0);
1172 }
1173
1174 static void cz_init_cp_jump_table(struct amdgpu_device *adev)
1175 {
1176         const __le32 *fw_data;
1177         volatile u32 *dst_ptr;
1178         int me, i, max_me = 4;
1179         u32 bo_offset = 0;
1180         u32 table_offset, table_size;
1181
1182         if (adev->asic_type == CHIP_CARRIZO)
1183                 max_me = 5;
1184
1185         /* write the cp table buffer */
1186         dst_ptr = adev->gfx.rlc.cp_table_ptr;
1187         for (me = 0; me < max_me; me++) {
1188                 if (me == 0) {
1189                         const struct gfx_firmware_header_v1_0 *hdr =
1190                                 (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
1191                         fw_data = (const __le32 *)
1192                                 (adev->gfx.ce_fw->data +
1193                                  le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1194                         table_offset = le32_to_cpu(hdr->jt_offset);
1195                         table_size = le32_to_cpu(hdr->jt_size);
1196                 } else if (me == 1) {
1197                         const struct gfx_firmware_header_v1_0 *hdr =
1198                                 (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
1199                         fw_data = (const __le32 *)
1200                                 (adev->gfx.pfp_fw->data +
1201                                  le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1202                         table_offset = le32_to_cpu(hdr->jt_offset);
1203                         table_size = le32_to_cpu(hdr->jt_size);
1204                 } else if (me == 2) {
1205                         const struct gfx_firmware_header_v1_0 *hdr =
1206                                 (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
1207                         fw_data = (const __le32 *)
1208                                 (adev->gfx.me_fw->data +
1209                                  le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1210                         table_offset = le32_to_cpu(hdr->jt_offset);
1211                         table_size = le32_to_cpu(hdr->jt_size);
1212                 } else if (me == 3) {
1213                         const struct gfx_firmware_header_v1_0 *hdr =
1214                                 (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1215                         fw_data = (const __le32 *)
1216                                 (adev->gfx.mec_fw->data +
1217                                  le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1218                         table_offset = le32_to_cpu(hdr->jt_offset);
1219                         table_size = le32_to_cpu(hdr->jt_size);
1220                 } else  if (me == 4) {
1221                         const struct gfx_firmware_header_v1_0 *hdr =
1222                                 (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
1223                         fw_data = (const __le32 *)
1224                                 (adev->gfx.mec2_fw->data +
1225                                  le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1226                         table_offset = le32_to_cpu(hdr->jt_offset);
1227                         table_size = le32_to_cpu(hdr->jt_size);
1228                 }
1229
1230                 for (i = 0; i < table_size; i ++) {
1231                         dst_ptr[bo_offset + i] =
1232                                 cpu_to_le32(le32_to_cpu(fw_data[table_offset + i]));
1233                 }
1234
1235                 bo_offset += table_size;
1236         }
1237 }
1238
1239 static void gfx_v8_0_rlc_fini(struct amdgpu_device *adev)
1240 {
1241         int r;
1242
1243         /* clear state block */
1244         if (adev->gfx.rlc.clear_state_obj) {
1245                 r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, true);
1246                 if (unlikely(r != 0))
1247                         dev_warn(adev->dev, "(%d) reserve RLC cbs bo failed\n", r);
1248                 amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj);
1249                 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1250                 amdgpu_bo_unref(&adev->gfx.rlc.clear_state_obj);
1251                 adev->gfx.rlc.clear_state_obj = NULL;
1252         }
1253
1254         /* jump table block */
1255         if (adev->gfx.rlc.cp_table_obj) {
1256                 r = amdgpu_bo_reserve(adev->gfx.rlc.cp_table_obj, true);
1257                 if (unlikely(r != 0))
1258                         dev_warn(adev->dev, "(%d) reserve RLC cp table bo failed\n", r);
1259                 amdgpu_bo_unpin(adev->gfx.rlc.cp_table_obj);
1260                 amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
1261                 amdgpu_bo_unref(&adev->gfx.rlc.cp_table_obj);
1262                 adev->gfx.rlc.cp_table_obj = NULL;
1263         }
1264 }
1265
1266 static int gfx_v8_0_rlc_init(struct amdgpu_device *adev)
1267 {
1268         volatile u32 *dst_ptr;
1269         u32 dws;
1270         const struct cs_section_def *cs_data;
1271         int r;
1272
1273         adev->gfx.rlc.cs_data = vi_cs_data;
1274
1275         cs_data = adev->gfx.rlc.cs_data;
1276
1277         if (cs_data) {
1278                 /* clear state block */
1279                 adev->gfx.rlc.clear_state_size = dws = gfx_v8_0_get_csb_size(adev);
1280
1281                 if (adev->gfx.rlc.clear_state_obj == NULL) {
1282                         r = amdgpu_bo_create(adev, dws * 4, PAGE_SIZE, true,
1283                                              AMDGPU_GEM_DOMAIN_VRAM,
1284                                              AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
1285                                              AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS,
1286                                              NULL, NULL,
1287                                              &adev->gfx.rlc.clear_state_obj);
1288                         if (r) {
1289                                 dev_warn(adev->dev, "(%d) create RLC c bo failed\n", r);
1290                                 gfx_v8_0_rlc_fini(adev);
1291                                 return r;
1292                         }
1293                 }
1294                 r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false);
1295                 if (unlikely(r != 0)) {
1296                         gfx_v8_0_rlc_fini(adev);
1297                         return r;
1298                 }
1299                 r = amdgpu_bo_pin(adev->gfx.rlc.clear_state_obj, AMDGPU_GEM_DOMAIN_VRAM,
1300                                   &adev->gfx.rlc.clear_state_gpu_addr);
1301                 if (r) {
1302                         amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1303                         dev_warn(adev->dev, "(%d) pin RLC cbs bo failed\n", r);
1304                         gfx_v8_0_rlc_fini(adev);
1305                         return r;
1306                 }
1307
1308                 r = amdgpu_bo_kmap(adev->gfx.rlc.clear_state_obj, (void **)&adev->gfx.rlc.cs_ptr);
1309                 if (r) {
1310                         dev_warn(adev->dev, "(%d) map RLC cbs bo failed\n", r);
1311                         gfx_v8_0_rlc_fini(adev);
1312                         return r;
1313                 }
1314                 /* set up the cs buffer */
1315                 dst_ptr = adev->gfx.rlc.cs_ptr;
1316                 gfx_v8_0_get_csb_buffer(adev, dst_ptr);
1317                 amdgpu_bo_kunmap(adev->gfx.rlc.clear_state_obj);
1318                 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1319         }
1320
1321         if ((adev->asic_type == CHIP_CARRIZO) ||
1322             (adev->asic_type == CHIP_STONEY)) {
1323                 adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
1324                 if (adev->gfx.rlc.cp_table_obj == NULL) {
1325                         r = amdgpu_bo_create(adev, adev->gfx.rlc.cp_table_size, PAGE_SIZE, true,
1326                                              AMDGPU_GEM_DOMAIN_VRAM,
1327                                              AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
1328                                              AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS,
1329                                              NULL, NULL,
1330                                              &adev->gfx.rlc.cp_table_obj);
1331                         if (r) {
1332                                 dev_warn(adev->dev, "(%d) create RLC cp table bo failed\n", r);
1333                                 return r;
1334                         }
1335                 }
1336
1337                 r = amdgpu_bo_reserve(adev->gfx.rlc.cp_table_obj, false);
1338                 if (unlikely(r != 0)) {
1339                         dev_warn(adev->dev, "(%d) reserve RLC cp table bo failed\n", r);
1340                         return r;
1341                 }
1342                 r = amdgpu_bo_pin(adev->gfx.rlc.cp_table_obj, AMDGPU_GEM_DOMAIN_VRAM,
1343                                   &adev->gfx.rlc.cp_table_gpu_addr);
1344                 if (r) {
1345                         amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
1346                         dev_warn(adev->dev, "(%d) pin RLC cp table bo failed\n", r);
1347                         return r;
1348                 }
1349                 r = amdgpu_bo_kmap(adev->gfx.rlc.cp_table_obj, (void **)&adev->gfx.rlc.cp_table_ptr);
1350                 if (r) {
1351                         dev_warn(adev->dev, "(%d) map RLC cp table bo failed\n", r);
1352                         return r;
1353                 }
1354
1355                 cz_init_cp_jump_table(adev);
1356
1357                 amdgpu_bo_kunmap(adev->gfx.rlc.cp_table_obj);
1358                 amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
1359         }
1360
1361         return 0;
1362 }
1363
1364 static void gfx_v8_0_mec_fini(struct amdgpu_device *adev)
1365 {
1366         int r;
1367
1368         if (adev->gfx.mec.hpd_eop_obj) {
1369                 r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, true);
1370                 if (unlikely(r != 0))
1371                         dev_warn(adev->dev, "(%d) reserve HPD EOP bo failed\n", r);
1372                 amdgpu_bo_unpin(adev->gfx.mec.hpd_eop_obj);
1373                 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1374                 amdgpu_bo_unref(&adev->gfx.mec.hpd_eop_obj);
1375                 adev->gfx.mec.hpd_eop_obj = NULL;
1376         }
1377 }
1378
1379 static int gfx_v8_0_mec_init(struct amdgpu_device *adev)
1380 {
1381         int r;
1382         u32 *hpd;
1383         size_t mec_hpd_size;
1384
1385         bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
1386
1387         /* take ownership of the relevant compute queues */
1388         amdgpu_gfx_compute_queue_acquire(adev);
1389
1390         mec_hpd_size = adev->gfx.num_compute_rings * GFX8_MEC_HPD_SIZE;
1391
1392         if (adev->gfx.mec.hpd_eop_obj == NULL) {
1393                 r = amdgpu_bo_create(adev,
1394                                      mec_hpd_size,
1395                                      PAGE_SIZE, true,
1396                                      AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL,
1397                                      &adev->gfx.mec.hpd_eop_obj);
1398                 if (r) {
1399                         dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
1400                         return r;
1401                 }
1402         }
1403
1404         r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false);
1405         if (unlikely(r != 0)) {
1406                 gfx_v8_0_mec_fini(adev);
1407                 return r;
1408         }
1409         r = amdgpu_bo_pin(adev->gfx.mec.hpd_eop_obj, AMDGPU_GEM_DOMAIN_GTT,
1410                           &adev->gfx.mec.hpd_eop_gpu_addr);
1411         if (r) {
1412                 dev_warn(adev->dev, "(%d) pin HDP EOP bo failed\n", r);
1413                 gfx_v8_0_mec_fini(adev);
1414                 return r;
1415         }
1416         r = amdgpu_bo_kmap(adev->gfx.mec.hpd_eop_obj, (void **)&hpd);
1417         if (r) {
1418                 dev_warn(adev->dev, "(%d) map HDP EOP bo failed\n", r);
1419                 gfx_v8_0_mec_fini(adev);
1420                 return r;
1421         }
1422
1423         memset(hpd, 0, mec_hpd_size);
1424
1425         amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
1426         amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1427
1428         return 0;
1429 }
1430
1431 static const u32 vgpr_init_compute_shader[] =
1432 {
1433         0x7e000209, 0x7e020208,
1434         0x7e040207, 0x7e060206,
1435         0x7e080205, 0x7e0a0204,
1436         0x7e0c0203, 0x7e0e0202,
1437         0x7e100201, 0x7e120200,
1438         0x7e140209, 0x7e160208,
1439         0x7e180207, 0x7e1a0206,
1440         0x7e1c0205, 0x7e1e0204,
1441         0x7e200203, 0x7e220202,
1442         0x7e240201, 0x7e260200,
1443         0x7e280209, 0x7e2a0208,
1444         0x7e2c0207, 0x7e2e0206,
1445         0x7e300205, 0x7e320204,
1446         0x7e340203, 0x7e360202,
1447         0x7e380201, 0x7e3a0200,
1448         0x7e3c0209, 0x7e3e0208,
1449         0x7e400207, 0x7e420206,
1450         0x7e440205, 0x7e460204,
1451         0x7e480203, 0x7e4a0202,
1452         0x7e4c0201, 0x7e4e0200,
1453         0x7e500209, 0x7e520208,
1454         0x7e540207, 0x7e560206,
1455         0x7e580205, 0x7e5a0204,
1456         0x7e5c0203, 0x7e5e0202,
1457         0x7e600201, 0x7e620200,
1458         0x7e640209, 0x7e660208,
1459         0x7e680207, 0x7e6a0206,
1460         0x7e6c0205, 0x7e6e0204,
1461         0x7e700203, 0x7e720202,
1462         0x7e740201, 0x7e760200,
1463         0x7e780209, 0x7e7a0208,
1464         0x7e7c0207, 0x7e7e0206,
1465         0xbf8a0000, 0xbf810000,
1466 };
1467
1468 static const u32 sgpr_init_compute_shader[] =
1469 {
1470         0xbe8a0100, 0xbe8c0102,
1471         0xbe8e0104, 0xbe900106,
1472         0xbe920108, 0xbe940100,
1473         0xbe960102, 0xbe980104,
1474         0xbe9a0106, 0xbe9c0108,
1475         0xbe9e0100, 0xbea00102,
1476         0xbea20104, 0xbea40106,
1477         0xbea60108, 0xbea80100,
1478         0xbeaa0102, 0xbeac0104,
1479         0xbeae0106, 0xbeb00108,
1480         0xbeb20100, 0xbeb40102,
1481         0xbeb60104, 0xbeb80106,
1482         0xbeba0108, 0xbebc0100,
1483         0xbebe0102, 0xbec00104,
1484         0xbec20106, 0xbec40108,
1485         0xbec60100, 0xbec80102,
1486         0xbee60004, 0xbee70005,
1487         0xbeea0006, 0xbeeb0007,
1488         0xbee80008, 0xbee90009,
1489         0xbefc0000, 0xbf8a0000,
1490         0xbf810000, 0x00000000,
1491 };
1492
1493 static const u32 vgpr_init_regs[] =
1494 {
1495         mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xffffffff,
1496         mmCOMPUTE_RESOURCE_LIMITS, 0,
1497         mmCOMPUTE_NUM_THREAD_X, 256*4,
1498         mmCOMPUTE_NUM_THREAD_Y, 1,
1499         mmCOMPUTE_NUM_THREAD_Z, 1,
1500         mmCOMPUTE_PGM_RSRC2, 20,
1501         mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1502         mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1503         mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1504         mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1505         mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1506         mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1507         mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1508         mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1509         mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1510         mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1511 };
1512
1513 static const u32 sgpr1_init_regs[] =
1514 {
1515         mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0x0f,
1516         mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1517         mmCOMPUTE_NUM_THREAD_X, 256*5,
1518         mmCOMPUTE_NUM_THREAD_Y, 1,
1519         mmCOMPUTE_NUM_THREAD_Z, 1,
1520         mmCOMPUTE_PGM_RSRC2, 20,
1521         mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1522         mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1523         mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1524         mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1525         mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1526         mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1527         mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1528         mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1529         mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1530         mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1531 };
1532
1533 static const u32 sgpr2_init_regs[] =
1534 {
1535         mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xf0,
1536         mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1537         mmCOMPUTE_NUM_THREAD_X, 256*5,
1538         mmCOMPUTE_NUM_THREAD_Y, 1,
1539         mmCOMPUTE_NUM_THREAD_Z, 1,
1540         mmCOMPUTE_PGM_RSRC2, 20,
1541         mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1542         mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1543         mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1544         mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1545         mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1546         mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1547         mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1548         mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1549         mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1550         mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1551 };
1552
1553 static const u32 sec_ded_counter_registers[] =
1554 {
1555         mmCPC_EDC_ATC_CNT,
1556         mmCPC_EDC_SCRATCH_CNT,
1557         mmCPC_EDC_UCODE_CNT,
1558         mmCPF_EDC_ATC_CNT,
1559         mmCPF_EDC_ROQ_CNT,
1560         mmCPF_EDC_TAG_CNT,
1561         mmCPG_EDC_ATC_CNT,
1562         mmCPG_EDC_DMA_CNT,
1563         mmCPG_EDC_TAG_CNT,
1564         mmDC_EDC_CSINVOC_CNT,
1565         mmDC_EDC_RESTORE_CNT,
1566         mmDC_EDC_STATE_CNT,
1567         mmGDS_EDC_CNT,
1568         mmGDS_EDC_GRBM_CNT,
1569         mmGDS_EDC_OA_DED,
1570         mmSPI_EDC_CNT,
1571         mmSQC_ATC_EDC_GATCL1_CNT,
1572         mmSQC_EDC_CNT,
1573         mmSQ_EDC_DED_CNT,
1574         mmSQ_EDC_INFO,
1575         mmSQ_EDC_SEC_CNT,
1576         mmTCC_EDC_CNT,
1577         mmTCP_ATC_EDC_GATCL1_CNT,
1578         mmTCP_EDC_CNT,
1579         mmTD_EDC_CNT
1580 };
1581
1582 static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
1583 {
1584         struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
1585         struct amdgpu_ib ib;
1586         struct dma_fence *f = NULL;
1587         int r, i;
1588         u32 tmp;
1589         unsigned total_size, vgpr_offset, sgpr_offset;
1590         u64 gpu_addr;
1591
1592         /* only supported on CZ */
1593         if (adev->asic_type != CHIP_CARRIZO)
1594                 return 0;
1595
1596         /* bail if the compute ring is not ready */
1597         if (!ring->ready)
1598                 return 0;
1599
1600         tmp = RREG32(mmGB_EDC_MODE);
1601         WREG32(mmGB_EDC_MODE, 0);
1602
1603         total_size =
1604                 (((ARRAY_SIZE(vgpr_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1605         total_size +=
1606                 (((ARRAY_SIZE(sgpr1_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1607         total_size +=
1608                 (((ARRAY_SIZE(sgpr2_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1609         total_size = ALIGN(total_size, 256);
1610         vgpr_offset = total_size;
1611         total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256);
1612         sgpr_offset = total_size;
1613         total_size += sizeof(sgpr_init_compute_shader);
1614
1615         /* allocate an indirect buffer to put the commands in */
1616         memset(&ib, 0, sizeof(ib));
1617         r = amdgpu_ib_get(adev, NULL, total_size, &ib);
1618         if (r) {
1619                 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
1620                 return r;
1621         }
1622
1623         /* load the compute shaders */
1624         for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++)
1625                 ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i];
1626
1627         for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
1628                 ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
1629
1630         /* init the ib length to 0 */
1631         ib.length_dw = 0;
1632
1633         /* VGPR */
1634         /* write the register state for the compute dispatch */
1635         for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i += 2) {
1636                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1637                 ib.ptr[ib.length_dw++] = vgpr_init_regs[i] - PACKET3_SET_SH_REG_START;
1638                 ib.ptr[ib.length_dw++] = vgpr_init_regs[i + 1];
1639         }
1640         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1641         gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
1642         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1643         ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1644         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1645         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1646
1647         /* write dispatch packet */
1648         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1649         ib.ptr[ib.length_dw++] = 8; /* x */
1650         ib.ptr[ib.length_dw++] = 1; /* y */
1651         ib.ptr[ib.length_dw++] = 1; /* z */
1652         ib.ptr[ib.length_dw++] =
1653                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1654
1655         /* write CS partial flush packet */
1656         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1657         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1658
1659         /* SGPR1 */
1660         /* write the register state for the compute dispatch */
1661         for (i = 0; i < ARRAY_SIZE(sgpr1_init_regs); i += 2) {
1662                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1663                 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i] - PACKET3_SET_SH_REG_START;
1664                 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i + 1];
1665         }
1666         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1667         gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1668         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1669         ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1670         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1671         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1672
1673         /* write dispatch packet */
1674         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1675         ib.ptr[ib.length_dw++] = 8; /* x */
1676         ib.ptr[ib.length_dw++] = 1; /* y */
1677         ib.ptr[ib.length_dw++] = 1; /* z */
1678         ib.ptr[ib.length_dw++] =
1679                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1680
1681         /* write CS partial flush packet */
1682         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1683         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1684
1685         /* SGPR2 */
1686         /* write the register state for the compute dispatch */
1687         for (i = 0; i < ARRAY_SIZE(sgpr2_init_regs); i += 2) {
1688                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1689                 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i] - PACKET3_SET_SH_REG_START;
1690                 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i + 1];
1691         }
1692         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1693         gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1694         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1695         ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1696         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1697         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1698
1699         /* write dispatch packet */
1700         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1701         ib.ptr[ib.length_dw++] = 8; /* x */
1702         ib.ptr[ib.length_dw++] = 1; /* y */
1703         ib.ptr[ib.length_dw++] = 1; /* z */
1704         ib.ptr[ib.length_dw++] =
1705                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1706
1707         /* write CS partial flush packet */
1708         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1709         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1710
1711         /* shedule the ib on the ring */
1712         r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
1713         if (r) {
1714                 DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
1715                 goto fail;
1716         }
1717
1718         /* wait for the GPU to finish processing the IB */
1719         r = dma_fence_wait(f, false);
1720         if (r) {
1721                 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
1722                 goto fail;
1723         }
1724
1725         tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, DED_MODE, 2);
1726         tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, PROP_FED, 1);
1727         WREG32(mmGB_EDC_MODE, tmp);
1728
1729         tmp = RREG32(mmCC_GC_EDC_CONFIG);
1730         tmp = REG_SET_FIELD(tmp, CC_GC_EDC_CONFIG, DIS_EDC, 0) | 1;
1731         WREG32(mmCC_GC_EDC_CONFIG, tmp);
1732
1733
1734         /* read back registers to clear the counters */
1735         for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++)
1736                 RREG32(sec_ded_counter_registers[i]);
1737
1738 fail:
1739         amdgpu_ib_free(adev, &ib, NULL);
1740         dma_fence_put(f);
1741
1742         return r;
1743 }
1744
1745 static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev)
1746 {
1747         u32 gb_addr_config;
1748         u32 mc_shared_chmap, mc_arb_ramcfg;
1749         u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map;
1750         u32 tmp;
1751         int ret;
1752
1753         switch (adev->asic_type) {
1754         case CHIP_TOPAZ:
1755                 adev->gfx.config.max_shader_engines = 1;
1756                 adev->gfx.config.max_tile_pipes = 2;
1757                 adev->gfx.config.max_cu_per_sh = 6;
1758                 adev->gfx.config.max_sh_per_se = 1;
1759                 adev->gfx.config.max_backends_per_se = 2;
1760                 adev->gfx.config.max_texture_channel_caches = 2;
1761                 adev->gfx.config.max_gprs = 256;
1762                 adev->gfx.config.max_gs_threads = 32;
1763                 adev->gfx.config.max_hw_contexts = 8;
1764
1765                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1766                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1767                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1768                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1769                 gb_addr_config = TOPAZ_GB_ADDR_CONFIG_GOLDEN;
1770                 break;
1771         case CHIP_FIJI:
1772                 adev->gfx.config.max_shader_engines = 4;
1773                 adev->gfx.config.max_tile_pipes = 16;
1774                 adev->gfx.config.max_cu_per_sh = 16;
1775                 adev->gfx.config.max_sh_per_se = 1;
1776                 adev->gfx.config.max_backends_per_se = 4;
1777                 adev->gfx.config.max_texture_channel_caches = 16;
1778                 adev->gfx.config.max_gprs = 256;
1779                 adev->gfx.config.max_gs_threads = 32;
1780                 adev->gfx.config.max_hw_contexts = 8;
1781
1782                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1783                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1784                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1785                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1786                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1787                 break;
1788         case CHIP_POLARIS11:
1789         case CHIP_POLARIS12:
1790                 ret = amdgpu_atombios_get_gfx_info(adev);
1791                 if (ret)
1792                         return ret;
1793                 adev->gfx.config.max_gprs = 256;
1794                 adev->gfx.config.max_gs_threads = 32;
1795                 adev->gfx.config.max_hw_contexts = 8;
1796
1797                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1798                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1799                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1800                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1801                 gb_addr_config = POLARIS11_GB_ADDR_CONFIG_GOLDEN;
1802                 break;
1803         case CHIP_POLARIS10:
1804                 ret = amdgpu_atombios_get_gfx_info(adev);
1805                 if (ret)
1806                         return ret;
1807                 adev->gfx.config.max_gprs = 256;
1808                 adev->gfx.config.max_gs_threads = 32;
1809                 adev->gfx.config.max_hw_contexts = 8;
1810
1811                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1812                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1813                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1814                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1815                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1816                 break;
1817         case CHIP_TONGA:
1818                 adev->gfx.config.max_shader_engines = 4;
1819                 adev->gfx.config.max_tile_pipes = 8;
1820                 adev->gfx.config.max_cu_per_sh = 8;
1821                 adev->gfx.config.max_sh_per_se = 1;
1822                 adev->gfx.config.max_backends_per_se = 2;
1823                 adev->gfx.config.max_texture_channel_caches = 8;
1824                 adev->gfx.config.max_gprs = 256;
1825                 adev->gfx.config.max_gs_threads = 32;
1826                 adev->gfx.config.max_hw_contexts = 8;
1827
1828                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1829                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1830                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1831                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1832                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1833                 break;
1834         case CHIP_CARRIZO:
1835                 adev->gfx.config.max_shader_engines = 1;
1836                 adev->gfx.config.max_tile_pipes = 2;
1837                 adev->gfx.config.max_sh_per_se = 1;
1838                 adev->gfx.config.max_backends_per_se = 2;
1839                 adev->gfx.config.max_cu_per_sh = 8;
1840                 adev->gfx.config.max_texture_channel_caches = 2;
1841                 adev->gfx.config.max_gprs = 256;
1842                 adev->gfx.config.max_gs_threads = 32;
1843                 adev->gfx.config.max_hw_contexts = 8;
1844
1845                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1846                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1847                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1848                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1849                 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1850                 break;
1851         case CHIP_STONEY:
1852                 adev->gfx.config.max_shader_engines = 1;
1853                 adev->gfx.config.max_tile_pipes = 2;
1854                 adev->gfx.config.max_sh_per_se = 1;
1855                 adev->gfx.config.max_backends_per_se = 1;
1856                 adev->gfx.config.max_cu_per_sh = 3;
1857                 adev->gfx.config.max_texture_channel_caches = 2;
1858                 adev->gfx.config.max_gprs = 256;
1859                 adev->gfx.config.max_gs_threads = 16;
1860                 adev->gfx.config.max_hw_contexts = 8;
1861
1862                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1863                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1864                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1865                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1866                 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1867                 break;
1868         default:
1869                 adev->gfx.config.max_shader_engines = 2;
1870                 adev->gfx.config.max_tile_pipes = 4;
1871                 adev->gfx.config.max_cu_per_sh = 2;
1872                 adev->gfx.config.max_sh_per_se = 1;
1873                 adev->gfx.config.max_backends_per_se = 2;
1874                 adev->gfx.config.max_texture_channel_caches = 4;
1875                 adev->gfx.config.max_gprs = 256;
1876                 adev->gfx.config.max_gs_threads = 32;
1877                 adev->gfx.config.max_hw_contexts = 8;
1878
1879                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1880                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1881                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1882                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1883                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1884                 break;
1885         }
1886
1887         mc_shared_chmap = RREG32(mmMC_SHARED_CHMAP);
1888         adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG);
1889         mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg;
1890
1891         adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes;
1892         adev->gfx.config.mem_max_burst_length_bytes = 256;
1893         if (adev->flags & AMD_IS_APU) {
1894                 /* Get memory bank mapping mode. */
1895                 tmp = RREG32(mmMC_FUS_DRAM0_BANK_ADDR_MAPPING);
1896                 dimm00_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1897                 dimm01_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1898
1899                 tmp = RREG32(mmMC_FUS_DRAM1_BANK_ADDR_MAPPING);
1900                 dimm10_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1901                 dimm11_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1902
1903                 /* Validate settings in case only one DIMM installed. */
1904                 if ((dimm00_addr_map == 0) || (dimm00_addr_map == 3) || (dimm00_addr_map == 4) || (dimm00_addr_map > 12))
1905                         dimm00_addr_map = 0;
1906                 if ((dimm01_addr_map == 0) || (dimm01_addr_map == 3) || (dimm01_addr_map == 4) || (dimm01_addr_map > 12))
1907                         dimm01_addr_map = 0;
1908                 if ((dimm10_addr_map == 0) || (dimm10_addr_map == 3) || (dimm10_addr_map == 4) || (dimm10_addr_map > 12))
1909                         dimm10_addr_map = 0;
1910                 if ((dimm11_addr_map == 0) || (dimm11_addr_map == 3) || (dimm11_addr_map == 4) || (dimm11_addr_map > 12))
1911                         dimm11_addr_map = 0;
1912
1913                 /* If DIMM Addr map is 8GB, ROW size should be 2KB. Otherwise 1KB. */
1914                 /* If ROW size(DIMM1) != ROW size(DMIMM0), ROW size should be larger one. */
1915                 if ((dimm00_addr_map == 11) || (dimm01_addr_map == 11) || (dimm10_addr_map == 11) || (dimm11_addr_map == 11))
1916                         adev->gfx.config.mem_row_size_in_kb = 2;
1917                 else
1918                         adev->gfx.config.mem_row_size_in_kb = 1;
1919         } else {
1920                 tmp = REG_GET_FIELD(mc_arb_ramcfg, MC_ARB_RAMCFG, NOOFCOLS);
1921                 adev->gfx.config.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
1922                 if (adev->gfx.config.mem_row_size_in_kb > 4)
1923                         adev->gfx.config.mem_row_size_in_kb = 4;
1924         }
1925
1926         adev->gfx.config.shader_engine_tile_size = 32;
1927         adev->gfx.config.num_gpus = 1;
1928         adev->gfx.config.multi_gpu_tile_size = 64;
1929
1930         /* fix up row size */
1931         switch (adev->gfx.config.mem_row_size_in_kb) {
1932         case 1:
1933         default:
1934                 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 0);
1935                 break;
1936         case 2:
1937                 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 1);
1938                 break;
1939         case 4:
1940                 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 2);
1941                 break;
1942         }
1943         adev->gfx.config.gb_addr_config = gb_addr_config;
1944
1945         return 0;
1946 }
1947
1948 static int gfx_v8_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
1949                                         int mec, int pipe, int queue)
1950 {
1951         int r;
1952         unsigned irq_type;
1953         struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
1954
1955         ring = &adev->gfx.compute_ring[ring_id];
1956
1957         /* mec0 is me1 */
1958         ring->me = mec + 1;
1959         ring->pipe = pipe;
1960         ring->queue = queue;
1961
1962         ring->ring_obj = NULL;
1963         ring->use_doorbell = true;
1964         ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + ring_id;
1965         ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
1966                                 + (ring_id * GFX8_MEC_HPD_SIZE);
1967         sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
1968
1969         irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
1970                 + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
1971                 + ring->pipe;
1972
1973         /* type-2 packets are deprecated on MEC, use type-3 instead */
1974         r = amdgpu_ring_init(adev, ring, 1024,
1975                         &adev->gfx.eop_irq, irq_type);
1976         if (r)
1977                 return r;
1978
1979
1980         return 0;
1981 }
1982
1983 static int gfx_v8_0_sw_init(void *handle)
1984 {
1985         int i, j, k, r, ring_id;
1986         struct amdgpu_ring *ring;
1987         struct amdgpu_kiq *kiq;
1988         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1989
1990         switch (adev->asic_type) {
1991         case CHIP_FIJI:
1992         case CHIP_TONGA:
1993         case CHIP_POLARIS11:
1994         case CHIP_POLARIS12:
1995         case CHIP_POLARIS10:
1996         case CHIP_CARRIZO:
1997                 adev->gfx.mec.num_mec = 2;
1998                 break;
1999         case CHIP_TOPAZ:
2000         case CHIP_STONEY:
2001         default:
2002                 adev->gfx.mec.num_mec = 1;
2003                 break;
2004         }
2005
2006         adev->gfx.mec.num_pipe_per_mec = 4;
2007         adev->gfx.mec.num_queue_per_pipe = 8;
2008
2009         /* KIQ event */
2010         r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 178, &adev->gfx.kiq.irq);
2011         if (r)
2012                 return r;
2013
2014         /* EOP Event */
2015         r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 181, &adev->gfx.eop_irq);
2016         if (r)
2017                 return r;
2018
2019         /* Privileged reg */
2020         r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 184,
2021                               &adev->gfx.priv_reg_irq);
2022         if (r)
2023                 return r;
2024
2025         /* Privileged inst */
2026         r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 185,
2027                               &adev->gfx.priv_inst_irq);
2028         if (r)
2029                 return r;
2030
2031         adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
2032
2033         gfx_v8_0_scratch_init(adev);
2034
2035         r = gfx_v8_0_init_microcode(adev);
2036         if (r) {
2037                 DRM_ERROR("Failed to load gfx firmware!\n");
2038                 return r;
2039         }
2040
2041         r = gfx_v8_0_rlc_init(adev);
2042         if (r) {
2043                 DRM_ERROR("Failed to init rlc BOs!\n");
2044                 return r;
2045         }
2046
2047         r = gfx_v8_0_mec_init(adev);
2048         if (r) {
2049                 DRM_ERROR("Failed to init MEC BOs!\n");
2050                 return r;
2051         }
2052
2053         /* set up the gfx ring */
2054         for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
2055                 ring = &adev->gfx.gfx_ring[i];
2056                 ring->ring_obj = NULL;
2057                 sprintf(ring->name, "gfx");
2058                 /* no gfx doorbells on iceland */
2059                 if (adev->asic_type != CHIP_TOPAZ) {
2060                         ring->use_doorbell = true;
2061                         ring->doorbell_index = AMDGPU_DOORBELL_GFX_RING0;
2062                 }
2063
2064                 r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
2065                                      AMDGPU_CP_IRQ_GFX_EOP);
2066                 if (r)
2067                         return r;
2068         }
2069
2070
2071         /* set up the compute queues - allocate horizontally across pipes */
2072         ring_id = 0;
2073         for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
2074                 for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
2075                         for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
2076                                 if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, j))
2077                                         continue;
2078
2079                                 r = gfx_v8_0_compute_ring_init(adev,
2080                                                                 ring_id,
2081                                                                 i, k, j);
2082                                 if (r)
2083                                         return r;
2084
2085                                 ring_id++;
2086                         }
2087                 }
2088         }
2089
2090         r = amdgpu_gfx_kiq_init(adev, GFX8_MEC_HPD_SIZE);
2091         if (r) {
2092                 DRM_ERROR("Failed to init KIQ BOs!\n");
2093                 return r;
2094         }
2095
2096         kiq = &adev->gfx.kiq;
2097         r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
2098         if (r)
2099                 return r;
2100
2101         /* create MQD for all compute queues as well as KIQ for SRIOV case */
2102         r = amdgpu_gfx_compute_mqd_sw_init(adev, sizeof(struct vi_mqd_allocation));
2103         if (r)
2104                 return r;
2105
2106         /* reserve GDS, GWS and OA resource for gfx */
2107         r = amdgpu_bo_create_kernel(adev, adev->gds.mem.gfx_partition_size,
2108                                     PAGE_SIZE, AMDGPU_GEM_DOMAIN_GDS,
2109                                     &adev->gds.gds_gfx_bo, NULL, NULL);
2110         if (r)
2111                 return r;
2112
2113         r = amdgpu_bo_create_kernel(adev, adev->gds.gws.gfx_partition_size,
2114                                     PAGE_SIZE, AMDGPU_GEM_DOMAIN_GWS,
2115                                     &adev->gds.gws_gfx_bo, NULL, NULL);
2116         if (r)
2117                 return r;
2118
2119         r = amdgpu_bo_create_kernel(adev, adev->gds.oa.gfx_partition_size,
2120                                     PAGE_SIZE, AMDGPU_GEM_DOMAIN_OA,
2121                                     &adev->gds.oa_gfx_bo, NULL, NULL);
2122         if (r)
2123                 return r;
2124
2125         adev->gfx.ce_ram_size = 0x8000;
2126
2127         r = gfx_v8_0_gpu_early_init(adev);
2128         if (r)
2129                 return r;
2130
2131         return 0;
2132 }
2133
2134 static int gfx_v8_0_sw_fini(void *handle)
2135 {
2136         int i;
2137         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2138
2139         amdgpu_bo_free_kernel(&adev->gds.oa_gfx_bo, NULL, NULL);
2140         amdgpu_bo_free_kernel(&adev->gds.gws_gfx_bo, NULL, NULL);
2141         amdgpu_bo_free_kernel(&adev->gds.gds_gfx_bo, NULL, NULL);
2142
2143         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2144                 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
2145         for (i = 0; i < adev->gfx.num_compute_rings; i++)
2146                 amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
2147
2148         amdgpu_gfx_compute_mqd_sw_fini(adev);
2149         amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq);
2150         amdgpu_gfx_kiq_fini(adev);
2151
2152         gfx_v8_0_mec_fini(adev);
2153         gfx_v8_0_rlc_fini(adev);
2154         gfx_v8_0_free_microcode(adev);
2155
2156         return 0;
2157 }
2158
2159 static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev)
2160 {
2161         uint32_t *modearray, *mod2array;
2162         const u32 num_tile_mode_states = ARRAY_SIZE(adev->gfx.config.tile_mode_array);
2163         const u32 num_secondary_tile_mode_states = ARRAY_SIZE(adev->gfx.config.macrotile_mode_array);
2164         u32 reg_offset;
2165
2166         modearray = adev->gfx.config.tile_mode_array;
2167         mod2array = adev->gfx.config.macrotile_mode_array;
2168
2169         for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2170                 modearray[reg_offset] = 0;
2171
2172         for (reg_offset = 0; reg_offset <  num_secondary_tile_mode_states; reg_offset++)
2173                 mod2array[reg_offset] = 0;
2174
2175         switch (adev->asic_type) {
2176         case CHIP_TOPAZ:
2177                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2178                                 PIPE_CONFIG(ADDR_SURF_P2) |
2179                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2180                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2181                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2182                                 PIPE_CONFIG(ADDR_SURF_P2) |
2183                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2184                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2185                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2186                                 PIPE_CONFIG(ADDR_SURF_P2) |
2187                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2188                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2189                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2190                                 PIPE_CONFIG(ADDR_SURF_P2) |
2191                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2192                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2193                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2194                                 PIPE_CONFIG(ADDR_SURF_P2) |
2195                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2196                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2197                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2198                                 PIPE_CONFIG(ADDR_SURF_P2) |
2199                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2200                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2201                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2202                                 PIPE_CONFIG(ADDR_SURF_P2) |
2203                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2204                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2205                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2206                                 PIPE_CONFIG(ADDR_SURF_P2));
2207                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2208                                 PIPE_CONFIG(ADDR_SURF_P2) |
2209                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2210                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2211                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2212                                  PIPE_CONFIG(ADDR_SURF_P2) |
2213                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2214                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2215                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2216                                  PIPE_CONFIG(ADDR_SURF_P2) |
2217                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2218                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2219                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2220                                  PIPE_CONFIG(ADDR_SURF_P2) |
2221                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2222                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2223                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2224                                  PIPE_CONFIG(ADDR_SURF_P2) |
2225                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2226                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2227                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2228                                  PIPE_CONFIG(ADDR_SURF_P2) |
2229                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2230                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2231                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2232                                  PIPE_CONFIG(ADDR_SURF_P2) |
2233                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2234                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2235                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2236                                  PIPE_CONFIG(ADDR_SURF_P2) |
2237                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2238                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2239                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2240                                  PIPE_CONFIG(ADDR_SURF_P2) |
2241                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2242                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2243                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2244                                  PIPE_CONFIG(ADDR_SURF_P2) |
2245                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2246                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2247                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2248                                  PIPE_CONFIG(ADDR_SURF_P2) |
2249                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2250                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2251                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2252                                  PIPE_CONFIG(ADDR_SURF_P2) |
2253                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2254                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2255                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2256                                  PIPE_CONFIG(ADDR_SURF_P2) |
2257                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2258                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2259                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2260                                  PIPE_CONFIG(ADDR_SURF_P2) |
2261                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2262                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2263                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2264                                  PIPE_CONFIG(ADDR_SURF_P2) |
2265                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2266                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2267                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2268                                  PIPE_CONFIG(ADDR_SURF_P2) |
2269                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2270                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2271                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2272                                  PIPE_CONFIG(ADDR_SURF_P2) |
2273                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2274                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2275                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2276                                  PIPE_CONFIG(ADDR_SURF_P2) |
2277                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2278                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2279
2280                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2281                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2282                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2283                                 NUM_BANKS(ADDR_SURF_8_BANK));
2284                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2285                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2286                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2287                                 NUM_BANKS(ADDR_SURF_8_BANK));
2288                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2289                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2290                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2291                                 NUM_BANKS(ADDR_SURF_8_BANK));
2292                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2293                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2294                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2295                                 NUM_BANKS(ADDR_SURF_8_BANK));
2296                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2297                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2298                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2299                                 NUM_BANKS(ADDR_SURF_8_BANK));
2300                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2301                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2302                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2303                                 NUM_BANKS(ADDR_SURF_8_BANK));
2304                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2305                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2306                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2307                                 NUM_BANKS(ADDR_SURF_8_BANK));
2308                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2309                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2310                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2311                                 NUM_BANKS(ADDR_SURF_16_BANK));
2312                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2313                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2314                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2315                                 NUM_BANKS(ADDR_SURF_16_BANK));
2316                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2317                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2318                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2319                                  NUM_BANKS(ADDR_SURF_16_BANK));
2320                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2321                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2322                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2323                                  NUM_BANKS(ADDR_SURF_16_BANK));
2324                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2325                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2326                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2327                                  NUM_BANKS(ADDR_SURF_16_BANK));
2328                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2329                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2330                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2331                                  NUM_BANKS(ADDR_SURF_16_BANK));
2332                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2333                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2334                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2335                                  NUM_BANKS(ADDR_SURF_8_BANK));
2336
2337                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2338                         if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
2339                             reg_offset != 23)
2340                                 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2341
2342                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2343                         if (reg_offset != 7)
2344                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2345
2346                 break;
2347         case CHIP_FIJI:
2348                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2349                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2350                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2351                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2352                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2353                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2354                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2355                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2356                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2357                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2358                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2359                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2360                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2361                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2362                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2363                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2364                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2365                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2366                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2367                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2368                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2369                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2370                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2371                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2372                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2373                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2374                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2375                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2376                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2377                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2378                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2379                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2380                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2381                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2382                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2383                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2384                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2385                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2386                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2387                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2388                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2389                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2390                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2391                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2392                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2393                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2394                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2395                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2396                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2397                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2398                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2399                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2400                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2401                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2402                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2403                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2404                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2405                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2406                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2407                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2408                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2409                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2410                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2411                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2412                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2413                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2414                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2415                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2416                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2417                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2418                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2419                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2420                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2421                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2422                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2423                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2424                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2425                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2426                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2427                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2428                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2429                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2430                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2431                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2432                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2433                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2434                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2435                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2436                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2437                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2438                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2439                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2440                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2441                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2442                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2443                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2444                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2445                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2446                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2447                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2448                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2449                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2450                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2451                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2452                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2453                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2454                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2455                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2456                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2457                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2458                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2459                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2460                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2461                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2462                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2463                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2464                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2465                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2466                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2467                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2468                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2469                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2470
2471                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2472                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2473                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2474                                 NUM_BANKS(ADDR_SURF_8_BANK));
2475                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2476                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2477                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2478                                 NUM_BANKS(ADDR_SURF_8_BANK));
2479                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2480                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2481                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2482                                 NUM_BANKS(ADDR_SURF_8_BANK));
2483                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2484                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2485                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2486                                 NUM_BANKS(ADDR_SURF_8_BANK));
2487                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2488                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2489                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2490                                 NUM_BANKS(ADDR_SURF_8_BANK));
2491                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2492                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2493                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2494                                 NUM_BANKS(ADDR_SURF_8_BANK));
2495                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2496                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2497                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2498                                 NUM_BANKS(ADDR_SURF_8_BANK));
2499                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2500                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2501                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2502                                 NUM_BANKS(ADDR_SURF_8_BANK));
2503                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2504                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2505                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2506                                 NUM_BANKS(ADDR_SURF_8_BANK));
2507                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2508                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2509                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2510                                  NUM_BANKS(ADDR_SURF_8_BANK));
2511                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2512                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2513                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2514                                  NUM_BANKS(ADDR_SURF_8_BANK));
2515                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2516                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2517                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2518                                  NUM_BANKS(ADDR_SURF_8_BANK));
2519                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2520                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2521                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2522                                  NUM_BANKS(ADDR_SURF_8_BANK));
2523                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2524                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2525                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2526                                  NUM_BANKS(ADDR_SURF_4_BANK));
2527
2528                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2529                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2530
2531                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2532                         if (reg_offset != 7)
2533                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2534
2535                 break;
2536         case CHIP_TONGA:
2537                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2538                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2539                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2540                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2541                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2542                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2543                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2544                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2545                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2546                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2547                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2548                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2549                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2550                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2551                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2552                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2553                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2554                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2555                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2556                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2557                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2558                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2559                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2560                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2561                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2562                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2563                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2564                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2565                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2566                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2567                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2568                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2569                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2570                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2571                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2572                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2573                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2574                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2575                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2576                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2577                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2578                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2579                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2580                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2581                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2582                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2583                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2584                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2585                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2586                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2587                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2588                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2589                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2590                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2591                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2592                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2593                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2594                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2595                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2596                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2597                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2598                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2599                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2600                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2601                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2602                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2603                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2604                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2605                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2606                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2607                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2608                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2609                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2610                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2611                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2612                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2613                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2614                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2615                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2616                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2617                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2618                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2619                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2620                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2621                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2622                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2623                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2624                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2625                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2626                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2627                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2628                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2629                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2630                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2631                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2632                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2633                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2634                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2635                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2636                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2637                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2638                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2639                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2640                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2641                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2642                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2643                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2644                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2645                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2646                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2647                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2648                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2649                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2650                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2651                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2652                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2653                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2654                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2655                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2656                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2657                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2658                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2659
2660                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2661                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2662                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2663                                 NUM_BANKS(ADDR_SURF_16_BANK));
2664                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2665                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2666                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2667                                 NUM_BANKS(ADDR_SURF_16_BANK));
2668                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2669                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2670                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2671                                 NUM_BANKS(ADDR_SURF_16_BANK));
2672                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2673                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2674                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2675                                 NUM_BANKS(ADDR_SURF_16_BANK));
2676                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2677                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2678                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2679                                 NUM_BANKS(ADDR_SURF_16_BANK));
2680                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2681                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2682                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2683                                 NUM_BANKS(ADDR_SURF_16_BANK));
2684                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2685                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2686                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2687                                 NUM_BANKS(ADDR_SURF_16_BANK));
2688                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2689                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2690                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2691                                 NUM_BANKS(ADDR_SURF_16_BANK));
2692                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2693                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2694                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2695                                 NUM_BANKS(ADDR_SURF_16_BANK));
2696                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2697                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2698                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2699                                  NUM_BANKS(ADDR_SURF_16_BANK));
2700                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2701                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2702                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2703                                  NUM_BANKS(ADDR_SURF_16_BANK));
2704                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2705                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2706                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2707                                  NUM_BANKS(ADDR_SURF_8_BANK));
2708                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2709                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2710                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2711                                  NUM_BANKS(ADDR_SURF_4_BANK));
2712                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2713                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2714                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2715                                  NUM_BANKS(ADDR_SURF_4_BANK));
2716
2717                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2718                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2719
2720                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2721                         if (reg_offset != 7)
2722                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2723
2724                 break;
2725         case CHIP_POLARIS11:
2726         case CHIP_POLARIS12:
2727                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2728                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2729                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2730                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2731                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2732                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2733                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2734                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2735                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2736                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2737                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2738                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2739                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2740                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2741                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2742                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2743                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2744                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2745                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2746                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2747                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2748                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2749                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2750                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2751                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2752                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2753                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2754                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2755                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2756                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2757                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2758                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2759                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2760                                 PIPE_CONFIG(ADDR_SURF_P4_16x16));
2761                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2762                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2763                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2764                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2765                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2766                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2767                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2768                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2769                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2770                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2771                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2772                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2773                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2774                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2775                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2776                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2777                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2778                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2779                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2780                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2781                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2782                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2783                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2784                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2785                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2786                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2787                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2788                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2789                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2790                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2791                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2792                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2793                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2794                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2795                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2796                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2797                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2798                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2799                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2800                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2801                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2802                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2803                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2804                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2805                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2806                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2807                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2808                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2809                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2810                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2811                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2812                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2813                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2814                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2815                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2816                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2817                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2818                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2819                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2820                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2821                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2822                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2823                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2824                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2825                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2826                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2827                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2828                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2829                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2830                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2831                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2832                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2833                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2834                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2835                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2836                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2837                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2838                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2839                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2840                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2841                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2842                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2843                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2844                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2845                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2846                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2847                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2848                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2849
2850                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2851                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2852                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2853                                 NUM_BANKS(ADDR_SURF_16_BANK));
2854
2855                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2856                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2857                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2858                                 NUM_BANKS(ADDR_SURF_16_BANK));
2859
2860                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2861                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2862                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2863                                 NUM_BANKS(ADDR_SURF_16_BANK));
2864
2865                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2866                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2867                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2868                                 NUM_BANKS(ADDR_SURF_16_BANK));
2869
2870                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2871                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2872                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2873                                 NUM_BANKS(ADDR_SURF_16_BANK));
2874
2875                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2876                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2877                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2878                                 NUM_BANKS(ADDR_SURF_16_BANK));
2879
2880                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2881                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2882                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2883                                 NUM_BANKS(ADDR_SURF_16_BANK));
2884
2885                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2886                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2887                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2888                                 NUM_BANKS(ADDR_SURF_16_BANK));
2889
2890                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2891                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2892                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2893                                 NUM_BANKS(ADDR_SURF_16_BANK));
2894
2895                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2896                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2897                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2898                                 NUM_BANKS(ADDR_SURF_16_BANK));
2899
2900                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2901                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2902                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2903                                 NUM_BANKS(ADDR_SURF_16_BANK));
2904
2905                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2906                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2907                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2908                                 NUM_BANKS(ADDR_SURF_16_BANK));
2909
2910                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2911                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2912                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2913                                 NUM_BANKS(ADDR_SURF_8_BANK));
2914
2915                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2916                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2917                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2918                                 NUM_BANKS(ADDR_SURF_4_BANK));
2919
2920                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2921                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2922
2923                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2924                         if (reg_offset != 7)
2925                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2926
2927                 break;
2928         case CHIP_POLARIS10:
2929                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2930                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2931                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2932                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2933                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2934                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2935                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2936                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2937                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2938                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2939                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2940                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2941                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2942                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2943                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2944                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2945                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2946                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2947                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2948                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2949                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2950                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2951                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2952                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2953                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2954                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2955                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2956                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2957                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2958                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2959                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2960                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2961                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2962                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2963                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2964                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2965                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2966                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2967                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2968                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2969                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2970                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2971                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2972                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2973                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2974                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2975                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2976                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2977                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2978                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2979                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2980                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2981                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2982                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2983                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2984                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2985                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2986                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2987                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2988                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2989                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2990                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2991                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2992                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2993                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2994                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2995                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2996                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2997                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2998                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2999                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3000                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3001                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3002                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3003                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3004                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3005                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3006                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3007                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3008                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3009                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3010                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3011                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3012                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3013                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3014                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3015                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3016                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3017                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3018                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3019                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3020                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3021                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3022                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3023                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3024                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3025                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3026                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3027                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3028                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3029                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3030                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3031                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3032                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3033                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3034                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3035                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3036                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3037                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3038                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3039                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3040                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3041                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3042                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3043                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3044                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3045                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3046                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3047                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3048                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3049                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3050                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3051
3052                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3053                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3054                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3055                                 NUM_BANKS(ADDR_SURF_16_BANK));
3056
3057                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3058                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3059                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3060                                 NUM_BANKS(ADDR_SURF_16_BANK));
3061
3062                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3063                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3064                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3065                                 NUM_BANKS(ADDR_SURF_16_BANK));
3066
3067                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3068                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3069                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3070                                 NUM_BANKS(ADDR_SURF_16_BANK));
3071
3072                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3073                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3074                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3075                                 NUM_BANKS(ADDR_SURF_16_BANK));
3076
3077                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3078                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3079                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3080                                 NUM_BANKS(ADDR_SURF_16_BANK));
3081
3082                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3083                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3084                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3085                                 NUM_BANKS(ADDR_SURF_16_BANK));
3086
3087                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3088                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3089                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3090                                 NUM_BANKS(ADDR_SURF_16_BANK));
3091
3092                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3093                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3094                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3095                                 NUM_BANKS(ADDR_SURF_16_BANK));
3096
3097                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3098                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3099                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3100                                 NUM_BANKS(ADDR_SURF_16_BANK));
3101
3102                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3103                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3104                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3105                                 NUM_BANKS(ADDR_SURF_16_BANK));
3106
3107                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3108                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3109                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3110                                 NUM_BANKS(ADDR_SURF_8_BANK));
3111
3112                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3113                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3114                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3115                                 NUM_BANKS(ADDR_SURF_4_BANK));
3116
3117                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3118                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3119                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3120                                 NUM_BANKS(ADDR_SURF_4_BANK));
3121
3122                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3123                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3124
3125                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3126                         if (reg_offset != 7)
3127                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3128
3129                 break;
3130         case CHIP_STONEY:
3131                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3132                                 PIPE_CONFIG(ADDR_SURF_P2) |
3133                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3134                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3135                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3136                                 PIPE_CONFIG(ADDR_SURF_P2) |
3137                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3138                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3139                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3140                                 PIPE_CONFIG(ADDR_SURF_P2) |
3141                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3142                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3143                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3144                                 PIPE_CONFIG(ADDR_SURF_P2) |
3145                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3146                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3147                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3148                                 PIPE_CONFIG(ADDR_SURF_P2) |
3149                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3150                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3151                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3152                                 PIPE_CONFIG(ADDR_SURF_P2) |
3153                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3154                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3155                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3156                                 PIPE_CONFIG(ADDR_SURF_P2) |
3157                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3158                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3159                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3160                                 PIPE_CONFIG(ADDR_SURF_P2));
3161                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3162                                 PIPE_CONFIG(ADDR_SURF_P2) |
3163                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3164                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3165                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3166                                  PIPE_CONFIG(ADDR_SURF_P2) |
3167                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3168                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3169                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3170                                  PIPE_CONFIG(ADDR_SURF_P2) |
3171                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3172                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3173                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3174                                  PIPE_CONFIG(ADDR_SURF_P2) |
3175                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3176                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3177                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3178                                  PIPE_CONFIG(ADDR_SURF_P2) |
3179                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3180                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3181                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3182                                  PIPE_CONFIG(ADDR_SURF_P2) |
3183                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3184                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3185                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3186                                  PIPE_CONFIG(ADDR_SURF_P2) |
3187                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3188                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3189                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3190                                  PIPE_CONFIG(ADDR_SURF_P2) |
3191                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3192                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3193                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3194                                  PIPE_CONFIG(ADDR_SURF_P2) |
3195                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3196                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3197                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3198                                  PIPE_CONFIG(ADDR_SURF_P2) |
3199                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3200                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3201                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3202                                  PIPE_CONFIG(ADDR_SURF_P2) |
3203                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3204                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3205                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3206                                  PIPE_CONFIG(ADDR_SURF_P2) |
3207                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3208                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3209                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3210                                  PIPE_CONFIG(ADDR_SURF_P2) |
3211                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3212                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3213                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3214                                  PIPE_CONFIG(ADDR_SURF_P2) |
3215                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3216                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3217                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3218                                  PIPE_CONFIG(ADDR_SURF_P2) |
3219                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3220                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3221                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3222                                  PIPE_CONFIG(ADDR_SURF_P2) |
3223                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3224                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3225                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3226                                  PIPE_CONFIG(ADDR_SURF_P2) |
3227                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3228                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3229                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3230                                  PIPE_CONFIG(ADDR_SURF_P2) |
3231                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3232                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3233
3234                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3235                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3236                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3237                                 NUM_BANKS(ADDR_SURF_8_BANK));
3238                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3239                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3240                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3241                                 NUM_BANKS(ADDR_SURF_8_BANK));
3242                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3243                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3244                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3245                                 NUM_BANKS(ADDR_SURF_8_BANK));
3246                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3247                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3248                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3249                                 NUM_BANKS(ADDR_SURF_8_BANK));
3250                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3251                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3252                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3253                                 NUM_BANKS(ADDR_SURF_8_BANK));
3254                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3255                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3256                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3257                                 NUM_BANKS(ADDR_SURF_8_BANK));
3258                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3259                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3260                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3261                                 NUM_BANKS(ADDR_SURF_8_BANK));
3262                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3263                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3264                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3265                                 NUM_BANKS(ADDR_SURF_16_BANK));
3266                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3267                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3268                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3269                                 NUM_BANKS(ADDR_SURF_16_BANK));
3270                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3271                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3272                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3273                                  NUM_BANKS(ADDR_SURF_16_BANK));
3274                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3275                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3276                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3277                                  NUM_BANKS(ADDR_SURF_16_BANK));
3278                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3279                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3280                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3281                                  NUM_BANKS(ADDR_SURF_16_BANK));
3282                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3283                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3284                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3285                                  NUM_BANKS(ADDR_SURF_16_BANK));
3286                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3287                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3288                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3289                                  NUM_BANKS(ADDR_SURF_8_BANK));
3290
3291                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3292                         if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3293                             reg_offset != 23)
3294                                 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3295
3296                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3297                         if (reg_offset != 7)
3298                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3299
3300                 break;
3301         default:
3302                 dev_warn(adev->dev,
3303                          "Unknown chip type (%d) in function gfx_v8_0_tiling_mode_table_init() falling through to CHIP_CARRIZO\n",
3304                          adev->asic_type);
3305
3306         case CHIP_CARRIZO:
3307                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3308                                 PIPE_CONFIG(ADDR_SURF_P2) |
3309                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3310                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3311                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3312                                 PIPE_CONFIG(ADDR_SURF_P2) |
3313                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3314                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3315                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3316                                 PIPE_CONFIG(ADDR_SURF_P2) |
3317                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3318                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3319                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3320                                 PIPE_CONFIG(ADDR_SURF_P2) |
3321                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3322                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3323                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3324                                 PIPE_CONFIG(ADDR_SURF_P2) |
3325                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3326                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3327                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3328                                 PIPE_CONFIG(ADDR_SURF_P2) |
3329                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3330                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3331                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3332                                 PIPE_CONFIG(ADDR_SURF_P2) |
3333                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3334                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3335                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3336                                 PIPE_CONFIG(ADDR_SURF_P2));
3337                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3338                                 PIPE_CONFIG(ADDR_SURF_P2) |
3339                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3340                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3341                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3342                                  PIPE_CONFIG(ADDR_SURF_P2) |
3343                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3344                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3345                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3346                                  PIPE_CONFIG(ADDR_SURF_P2) |
3347                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3348                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3349                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3350                                  PIPE_CONFIG(ADDR_SURF_P2) |
3351                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3352                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3353                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3354                                  PIPE_CONFIG(ADDR_SURF_P2) |
3355                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3356                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3357                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3358                                  PIPE_CONFIG(ADDR_SURF_P2) |
3359                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3360                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3361                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3362                                  PIPE_CONFIG(ADDR_SURF_P2) |
3363                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3364                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3365                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3366                                  PIPE_CONFIG(ADDR_SURF_P2) |
3367                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3368                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3369                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3370                                  PIPE_CONFIG(ADDR_SURF_P2) |
3371                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3372                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3373                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3374                                  PIPE_CONFIG(ADDR_SURF_P2) |
3375                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3376                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3377                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3378                                  PIPE_CONFIG(ADDR_SURF_P2) |
3379                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3380                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3381                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3382                                  PIPE_CONFIG(ADDR_SURF_P2) |
3383                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3384                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3385                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3386                                  PIPE_CONFIG(ADDR_SURF_P2) |
3387                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3388                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3389                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3390                                  PIPE_CONFIG(ADDR_SURF_P2) |
3391                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3392                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3393                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3394                                  PIPE_CONFIG(ADDR_SURF_P2) |
3395                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3396                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3397                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3398                                  PIPE_CONFIG(ADDR_SURF_P2) |
3399                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3400                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3401                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3402                                  PIPE_CONFIG(ADDR_SURF_P2) |
3403                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3404                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3405                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3406                                  PIPE_CONFIG(ADDR_SURF_P2) |
3407                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3408                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3409
3410                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3411                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3412                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3413                                 NUM_BANKS(ADDR_SURF_8_BANK));
3414                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3415                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3416                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3417                                 NUM_BANKS(ADDR_SURF_8_BANK));
3418                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3419                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3420                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3421                                 NUM_BANKS(ADDR_SURF_8_BANK));
3422                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3423                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3424                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3425                                 NUM_BANKS(ADDR_SURF_8_BANK));
3426                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3427                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3428                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3429                                 NUM_BANKS(ADDR_SURF_8_BANK));
3430                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3431                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3432                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3433                                 NUM_BANKS(ADDR_SURF_8_BANK));
3434                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3435                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3436                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3437                                 NUM_BANKS(ADDR_SURF_8_BANK));
3438                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3439                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3440                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3441                                 NUM_BANKS(ADDR_SURF_16_BANK));
3442                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3443                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3444                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3445                                 NUM_BANKS(ADDR_SURF_16_BANK));
3446                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3447                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3448                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3449                                  NUM_BANKS(ADDR_SURF_16_BANK));
3450                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3451                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3452                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3453                                  NUM_BANKS(ADDR_SURF_16_BANK));
3454                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3455                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3456                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3457                                  NUM_BANKS(ADDR_SURF_16_BANK));
3458                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3459                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3460                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3461                                  NUM_BANKS(ADDR_SURF_16_BANK));
3462                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3463                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3464                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3465                                  NUM_BANKS(ADDR_SURF_8_BANK));
3466
3467                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3468                         if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3469                             reg_offset != 23)
3470                                 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3471
3472                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3473                         if (reg_offset != 7)
3474                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3475
3476                 break;
3477         }
3478 }
3479
3480 static void gfx_v8_0_select_se_sh(struct amdgpu_device *adev,
3481                                   u32 se_num, u32 sh_num, u32 instance)
3482 {
3483         u32 data;
3484
3485         if (instance == 0xffffffff)
3486                 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
3487         else
3488                 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
3489
3490         if (se_num == 0xffffffff)
3491                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
3492         else
3493                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
3494
3495         if (sh_num == 0xffffffff)
3496                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
3497         else
3498                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
3499
3500         WREG32(mmGRBM_GFX_INDEX, data);
3501 }
3502
3503 static u32 gfx_v8_0_get_rb_active_bitmap(struct amdgpu_device *adev)
3504 {
3505         u32 data, mask;
3506
3507         data =  RREG32(mmCC_RB_BACKEND_DISABLE) |
3508                 RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3509
3510         data = REG_GET_FIELD(data, GC_USER_RB_BACKEND_DISABLE, BACKEND_DISABLE);
3511
3512         mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se /
3513                                          adev->gfx.config.max_sh_per_se);
3514
3515         return (~data) & mask;
3516 }
3517
3518 static void
3519 gfx_v8_0_raster_config(struct amdgpu_device *adev, u32 *rconf, u32 *rconf1)
3520 {
3521         switch (adev->asic_type) {
3522         case CHIP_FIJI:
3523                 *rconf |= RB_MAP_PKR0(2) | RB_MAP_PKR1(2) |
3524                           RB_XSEL2(1) | PKR_MAP(2) |
3525                           PKR_XSEL(1) | PKR_YSEL(1) |
3526                           SE_MAP(2) | SE_XSEL(2) | SE_YSEL(3);
3527                 *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(3) |
3528                            SE_PAIR_YSEL(2);
3529                 break;
3530         case CHIP_TONGA:
3531         case CHIP_POLARIS10:
3532                 *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3533                           SE_XSEL(1) | SE_YSEL(1);
3534                 *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(2) |
3535                            SE_PAIR_YSEL(2);
3536                 break;
3537         case CHIP_TOPAZ:
3538         case CHIP_CARRIZO:
3539                 *rconf |= RB_MAP_PKR0(2);
3540                 *rconf1 |= 0x0;
3541                 break;
3542         case CHIP_POLARIS11:
3543         case CHIP_POLARIS12:
3544                 *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3545                           SE_XSEL(1) | SE_YSEL(1);
3546                 *rconf1 |= 0x0;
3547                 break;
3548         case CHIP_STONEY:
3549                 *rconf |= 0x0;
3550                 *rconf1 |= 0x0;
3551                 break;
3552         default:
3553                 DRM_ERROR("unknown asic: 0x%x\n", adev->asic_type);
3554                 break;
3555         }
3556 }
3557
3558 static void
3559 gfx_v8_0_write_harvested_raster_configs(struct amdgpu_device *adev,
3560                                         u32 raster_config, u32 raster_config_1,
3561                                         unsigned rb_mask, unsigned num_rb)
3562 {
3563         unsigned sh_per_se = max_t(unsigned, adev->gfx.config.max_sh_per_se, 1);
3564         unsigned num_se = max_t(unsigned, adev->gfx.config.max_shader_engines, 1);
3565         unsigned rb_per_pkr = min_t(unsigned, num_rb / num_se / sh_per_se, 2);
3566         unsigned rb_per_se = num_rb / num_se;
3567         unsigned se_mask[4];
3568         unsigned se;
3569
3570         se_mask[0] = ((1 << rb_per_se) - 1) & rb_mask;
3571         se_mask[1] = (se_mask[0] << rb_per_se) & rb_mask;
3572         se_mask[2] = (se_mask[1] << rb_per_se) & rb_mask;
3573         se_mask[3] = (se_mask[2] << rb_per_se) & rb_mask;
3574
3575         WARN_ON(!(num_se == 1 || num_se == 2 || num_se == 4));
3576         WARN_ON(!(sh_per_se == 1 || sh_per_se == 2));
3577         WARN_ON(!(rb_per_pkr == 1 || rb_per_pkr == 2));
3578
3579         if ((num_se > 2) && ((!se_mask[0] && !se_mask[1]) ||
3580                              (!se_mask[2] && !se_mask[3]))) {
3581                 raster_config_1 &= ~SE_PAIR_MAP_MASK;
3582
3583                 if (!se_mask[0] && !se_mask[1]) {
3584                         raster_config_1 |=
3585                                 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_3);
3586                 } else {
3587                         raster_config_1 |=
3588                                 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_0);
3589                 }
3590         }
3591
3592         for (se = 0; se < num_se; se++) {
3593                 unsigned raster_config_se = raster_config;
3594                 unsigned pkr0_mask = ((1 << rb_per_pkr) - 1) << (se * rb_per_se);
3595                 unsigned pkr1_mask = pkr0_mask << rb_per_pkr;
3596                 int idx = (se / 2) * 2;
3597
3598                 if ((num_se > 1) && (!se_mask[idx] || !se_mask[idx + 1])) {
3599                         raster_config_se &= ~SE_MAP_MASK;
3600
3601                         if (!se_mask[idx]) {
3602                                 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_3);
3603                         } else {
3604                                 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_0);
3605                         }
3606                 }
3607
3608                 pkr0_mask &= rb_mask;
3609                 pkr1_mask &= rb_mask;
3610                 if (rb_per_se > 2 && (!pkr0_mask || !pkr1_mask)) {
3611                         raster_config_se &= ~PKR_MAP_MASK;
3612
3613                         if (!pkr0_mask) {
3614                                 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_3);
3615                         } else {
3616                                 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_0);
3617                         }
3618                 }
3619
3620                 if (rb_per_se >= 2) {
3621                         unsigned rb0_mask = 1 << (se * rb_per_se);
3622                         unsigned rb1_mask = rb0_mask << 1;
3623
3624                         rb0_mask &= rb_mask;
3625                         rb1_mask &= rb_mask;
3626                         if (!rb0_mask || !rb1_mask) {
3627                                 raster_config_se &= ~RB_MAP_PKR0_MASK;
3628
3629                                 if (!rb0_mask) {
3630                                         raster_config_se |=
3631                                                 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_3);
3632                                 } else {
3633                                         raster_config_se |=
3634                                                 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_0);
3635                                 }
3636                         }
3637
3638                         if (rb_per_se > 2) {
3639                                 rb0_mask = 1 << (se * rb_per_se + rb_per_pkr);
3640                                 rb1_mask = rb0_mask << 1;
3641                                 rb0_mask &= rb_mask;
3642                                 rb1_mask &= rb_mask;
3643                                 if (!rb0_mask || !rb1_mask) {
3644                                         raster_config_se &= ~RB_MAP_PKR1_MASK;
3645
3646                                         if (!rb0_mask) {
3647                                                 raster_config_se |=
3648                                                         RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_3);
3649                                         } else {
3650                                                 raster_config_se |=
3651                                                         RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_0);
3652                                         }
3653                                 }
3654                         }
3655                 }
3656
3657                 /* GRBM_GFX_INDEX has a different offset on VI */
3658                 gfx_v8_0_select_se_sh(adev, se, 0xffffffff, 0xffffffff);
3659                 WREG32(mmPA_SC_RASTER_CONFIG, raster_config_se);
3660                 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3661         }
3662
3663         /* GRBM_GFX_INDEX has a different offset on VI */
3664         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3665 }
3666
3667 static void gfx_v8_0_setup_rb(struct amdgpu_device *adev)
3668 {
3669         int i, j;
3670         u32 data;
3671         u32 raster_config = 0, raster_config_1 = 0;
3672         u32 active_rbs = 0;
3673         u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
3674                                         adev->gfx.config.max_sh_per_se;
3675         unsigned num_rb_pipes;
3676
3677         mutex_lock(&adev->grbm_idx_mutex);
3678         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3679                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3680                         gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3681                         data = gfx_v8_0_get_rb_active_bitmap(adev);
3682                         active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
3683                                                rb_bitmap_width_per_sh);
3684                 }
3685         }
3686         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3687
3688         adev->gfx.config.backend_enable_mask = active_rbs;
3689         adev->gfx.config.num_rbs = hweight32(active_rbs);
3690
3691         num_rb_pipes = min_t(unsigned, adev->gfx.config.max_backends_per_se *
3692                              adev->gfx.config.max_shader_engines, 16);
3693
3694         gfx_v8_0_raster_config(adev, &raster_config, &raster_config_1);
3695
3696         if (!adev->gfx.config.backend_enable_mask ||
3697                         adev->gfx.config.num_rbs >= num_rb_pipes) {
3698                 WREG32(mmPA_SC_RASTER_CONFIG, raster_config);
3699                 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3700         } else {
3701                 gfx_v8_0_write_harvested_raster_configs(adev, raster_config, raster_config_1,
3702                                                         adev->gfx.config.backend_enable_mask,
3703                                                         num_rb_pipes);
3704         }
3705
3706         /* cache the values for userspace */
3707         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3708                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3709                         gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3710                         adev->gfx.config.rb_config[i][j].rb_backend_disable =
3711                                 RREG32(mmCC_RB_BACKEND_DISABLE);
3712                         adev->gfx.config.rb_config[i][j].user_rb_backend_disable =
3713                                 RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3714                         adev->gfx.config.rb_config[i][j].raster_config =
3715                                 RREG32(mmPA_SC_RASTER_CONFIG);
3716                         adev->gfx.config.rb_config[i][j].raster_config_1 =
3717                                 RREG32(mmPA_SC_RASTER_CONFIG_1);
3718                 }
3719         }
3720         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3721         mutex_unlock(&adev->grbm_idx_mutex);
3722 }
3723
3724 /**
3725  * gfx_v8_0_init_compute_vmid - gart enable
3726  *
3727  * @adev: amdgpu_device pointer
3728  *
3729  * Initialize compute vmid sh_mem registers
3730  *
3731  */
3732 #define DEFAULT_SH_MEM_BASES    (0x6000)
3733 #define FIRST_COMPUTE_VMID      (8)
3734 #define LAST_COMPUTE_VMID       (16)
3735 static void gfx_v8_0_init_compute_vmid(struct amdgpu_device *adev)
3736 {
3737         int i;
3738         uint32_t sh_mem_config;
3739         uint32_t sh_mem_bases;
3740
3741         /*
3742          * Configure apertures:
3743          * LDS:         0x60000000'00000000 - 0x60000001'00000000 (4GB)
3744          * Scratch:     0x60000001'00000000 - 0x60000002'00000000 (4GB)
3745          * GPUVM:       0x60010000'00000000 - 0x60020000'00000000 (1TB)
3746          */
3747         sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
3748
3749         sh_mem_config = SH_MEM_ADDRESS_MODE_HSA64 <<
3750                         SH_MEM_CONFIG__ADDRESS_MODE__SHIFT |
3751                         SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
3752                         SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT |
3753                         MTYPE_CC << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT |
3754                         SH_MEM_CONFIG__PRIVATE_ATC_MASK;
3755
3756         mutex_lock(&adev->srbm_mutex);
3757         for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
3758                 vi_srbm_select(adev, 0, 0, 0, i);
3759                 /* CP and shaders */
3760                 WREG32(mmSH_MEM_CONFIG, sh_mem_config);
3761                 WREG32(mmSH_MEM_APE1_BASE, 1);
3762                 WREG32(mmSH_MEM_APE1_LIMIT, 0);
3763                 WREG32(mmSH_MEM_BASES, sh_mem_bases);
3764         }
3765         vi_srbm_select(adev, 0, 0, 0, 0);
3766         mutex_unlock(&adev->srbm_mutex);
3767 }
3768
3769 static void gfx_v8_0_config_init(struct amdgpu_device *adev)
3770 {
3771         switch (adev->asic_type) {
3772         default:
3773                 adev->gfx.config.double_offchip_lds_buf = 1;
3774                 break;
3775         case CHIP_CARRIZO:
3776         case CHIP_STONEY:
3777                 adev->gfx.config.double_offchip_lds_buf = 0;
3778                 break;
3779         }
3780 }
3781
3782 static void gfx_v8_0_gpu_init(struct amdgpu_device *adev)
3783 {
3784         u32 tmp, sh_static_mem_cfg;
3785         int i;
3786
3787         WREG32_FIELD(GRBM_CNTL, READ_TIMEOUT, 0xFF);
3788         WREG32(mmGB_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3789         WREG32(mmHDP_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3790         WREG32(mmDMIF_ADDR_CALC, adev->gfx.config.gb_addr_config);
3791
3792         gfx_v8_0_tiling_mode_table_init(adev);
3793         gfx_v8_0_setup_rb(adev);
3794         gfx_v8_0_get_cu_info(adev);
3795         gfx_v8_0_config_init(adev);
3796
3797         /* XXX SH_MEM regs */
3798         /* where to put LDS, scratch, GPUVM in FSA64 space */
3799         sh_static_mem_cfg = REG_SET_FIELD(0, SH_STATIC_MEM_CONFIG,
3800                                    SWIZZLE_ENABLE, 1);
3801         sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG,
3802                                    ELEMENT_SIZE, 1);
3803         sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG,
3804                                    INDEX_STRIDE, 3);
3805         mutex_lock(&adev->srbm_mutex);
3806         for (i = 0; i < adev->vm_manager.id_mgr[0].num_ids; i++) {
3807                 vi_srbm_select(adev, 0, 0, 0, i);
3808                 /* CP and shaders */
3809                 if (i == 0) {
3810                         tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_UC);
3811                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
3812                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3813                                             SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3814                         WREG32(mmSH_MEM_CONFIG, tmp);
3815                         WREG32(mmSH_MEM_BASES, 0);
3816                 } else {
3817                         tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_NC);
3818                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
3819                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3820                                             SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3821                         WREG32(mmSH_MEM_CONFIG, tmp);
3822                         tmp = adev->mc.shared_aperture_start >> 48;
3823                         WREG32(mmSH_MEM_BASES, tmp);
3824                 }
3825
3826                 WREG32(mmSH_MEM_APE1_BASE, 1);
3827                 WREG32(mmSH_MEM_APE1_LIMIT, 0);
3828                 WREG32(mmSH_STATIC_MEM_CONFIG, sh_static_mem_cfg);
3829         }
3830         vi_srbm_select(adev, 0, 0, 0, 0);
3831         mutex_unlock(&adev->srbm_mutex);
3832
3833         gfx_v8_0_init_compute_vmid(adev);
3834
3835         mutex_lock(&adev->grbm_idx_mutex);
3836         /*
3837          * making sure that the following register writes will be broadcasted
3838          * to all the shaders
3839          */
3840         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3841
3842         WREG32(mmPA_SC_FIFO_SIZE,
3843                    (adev->gfx.config.sc_prim_fifo_size_frontend <<
3844                         PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) |
3845                    (adev->gfx.config.sc_prim_fifo_size_backend <<
3846                         PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT) |
3847                    (adev->gfx.config.sc_hiz_tile_fifo_size <<
3848                         PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) |
3849                    (adev->gfx.config.sc_earlyz_tile_fifo_size <<
3850                         PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT));
3851
3852         tmp = RREG32(mmSPI_ARB_PRIORITY);
3853         tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS0, 2);
3854         tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS1, 2);
3855         tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS2, 2);
3856         tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS3, 2);
3857         WREG32(mmSPI_ARB_PRIORITY, tmp);
3858
3859         mutex_unlock(&adev->grbm_idx_mutex);
3860
3861 }
3862
3863 static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
3864 {
3865         u32 i, j, k;
3866         u32 mask;
3867
3868         mutex_lock(&adev->grbm_idx_mutex);
3869         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3870                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3871                         gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3872                         for (k = 0; k < adev->usec_timeout; k++) {
3873                                 if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0)
3874                                         break;
3875                                 udelay(1);
3876                         }
3877                 }
3878         }
3879         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3880         mutex_unlock(&adev->grbm_idx_mutex);
3881
3882         mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
3883                 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
3884                 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
3885                 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
3886         for (k = 0; k < adev->usec_timeout; k++) {
3887                 if ((RREG32(mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
3888                         break;
3889                 udelay(1);
3890         }
3891 }
3892
3893 static void gfx_v8_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
3894                                                bool enable)
3895 {
3896         u32 tmp = RREG32(mmCP_INT_CNTL_RING0);
3897
3898         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
3899         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
3900         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
3901         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
3902
3903         WREG32(mmCP_INT_CNTL_RING0, tmp);
3904 }
3905
3906 static void gfx_v8_0_init_csb(struct amdgpu_device *adev)
3907 {
3908         /* csib */
3909         WREG32(mmRLC_CSIB_ADDR_HI,
3910                         adev->gfx.rlc.clear_state_gpu_addr >> 32);
3911         WREG32(mmRLC_CSIB_ADDR_LO,
3912                         adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
3913         WREG32(mmRLC_CSIB_LENGTH,
3914                         adev->gfx.rlc.clear_state_size);
3915 }
3916
3917 static void gfx_v8_0_parse_ind_reg_list(int *register_list_format,
3918                                 int ind_offset,
3919                                 int list_size,
3920                                 int *unique_indices,
3921                                 int *indices_count,
3922                                 int max_indices,
3923                                 int *ind_start_offsets,
3924                                 int *offset_count,
3925                                 int max_offset)
3926 {
3927         int indices;
3928         bool new_entry = true;
3929
3930         for (; ind_offset < list_size; ind_offset++) {
3931
3932                 if (new_entry) {
3933                         new_entry = false;
3934                         ind_start_offsets[*offset_count] = ind_offset;
3935                         *offset_count = *offset_count + 1;
3936                         BUG_ON(*offset_count >= max_offset);
3937                 }
3938
3939                 if (register_list_format[ind_offset] == 0xFFFFFFFF) {
3940                         new_entry = true;
3941                         continue;
3942                 }
3943
3944                 ind_offset += 2;
3945
3946                 /* look for the matching indice */
3947                 for (indices = 0;
3948                         indices < *indices_count;
3949                         indices++) {
3950                         if (unique_indices[indices] ==
3951                                 register_list_format[ind_offset])
3952                                 break;
3953                 }
3954
3955                 if (indices >= *indices_count) {
3956                         unique_indices[*indices_count] =
3957                                 register_list_format[ind_offset];
3958                         indices = *indices_count;
3959                         *indices_count = *indices_count + 1;
3960                         BUG_ON(*indices_count >= max_indices);
3961                 }
3962
3963                 register_list_format[ind_offset] = indices;
3964         }
3965 }
3966
3967 static int gfx_v8_0_init_save_restore_list(struct amdgpu_device *adev)
3968 {
3969         int i, temp, data;
3970         int unique_indices[] = {0, 0, 0, 0, 0, 0, 0, 0};
3971         int indices_count = 0;
3972         int indirect_start_offsets[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
3973         int offset_count = 0;
3974
3975         int list_size;
3976         unsigned int *register_list_format =
3977                 kmalloc(adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
3978         if (!register_list_format)
3979                 return -ENOMEM;
3980         memcpy(register_list_format, adev->gfx.rlc.register_list_format,
3981                         adev->gfx.rlc.reg_list_format_size_bytes);
3982
3983         gfx_v8_0_parse_ind_reg_list(register_list_format,
3984                                 RLC_FormatDirectRegListLength,
3985                                 adev->gfx.rlc.reg_list_format_size_bytes >> 2,
3986                                 unique_indices,
3987                                 &indices_count,
3988                                 sizeof(unique_indices) / sizeof(int),
3989                                 indirect_start_offsets,
3990                                 &offset_count,
3991                                 sizeof(indirect_start_offsets)/sizeof(int));
3992
3993         /* save and restore list */
3994         WREG32_FIELD(RLC_SRM_CNTL, AUTO_INCR_ADDR, 1);
3995
3996         WREG32(mmRLC_SRM_ARAM_ADDR, 0);
3997         for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
3998                 WREG32(mmRLC_SRM_ARAM_DATA, adev->gfx.rlc.register_restore[i]);
3999
4000         /* indirect list */
4001         WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_list_format_start);
4002         for (i = 0; i < adev->gfx.rlc.reg_list_format_size_bytes >> 2; i++)
4003                 WREG32(mmRLC_GPM_SCRATCH_DATA, register_list_format[i]);
4004
4005         list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
4006         list_size = list_size >> 1;
4007         WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_restore_list_size);
4008         WREG32(mmRLC_GPM_SCRATCH_DATA, list_size);
4009
4010         /* starting offsets starts */
4011         WREG32(mmRLC_GPM_SCRATCH_ADDR,
4012                 adev->gfx.rlc.starting_offsets_start);
4013         for (i = 0; i < sizeof(indirect_start_offsets)/sizeof(int); i++)
4014                 WREG32(mmRLC_GPM_SCRATCH_DATA,
4015                                 indirect_start_offsets[i]);
4016
4017         /* unique indices */
4018         temp = mmRLC_SRM_INDEX_CNTL_ADDR_0;
4019         data = mmRLC_SRM_INDEX_CNTL_DATA_0;
4020         for (i = 0; i < sizeof(unique_indices) / sizeof(int); i++) {
4021                 if (unique_indices[i] != 0) {
4022                         WREG32(temp + i, unique_indices[i] & 0x3FFFF);
4023                         WREG32(data + i, unique_indices[i] >> 20);
4024                 }
4025         }
4026         kfree(register_list_format);
4027
4028         return 0;
4029 }
4030
4031 static void gfx_v8_0_enable_save_restore_machine(struct amdgpu_device *adev)
4032 {
4033         WREG32_FIELD(RLC_SRM_CNTL, SRM_ENABLE, 1);
4034 }
4035
4036 static void gfx_v8_0_init_power_gating(struct amdgpu_device *adev)
4037 {
4038         uint32_t data;
4039
4040         WREG32_FIELD(CP_RB_WPTR_POLL_CNTL, IDLE_POLL_COUNT, 0x60);
4041
4042         data = REG_SET_FIELD(0, RLC_PG_DELAY, POWER_UP_DELAY, 0x10);
4043         data = REG_SET_FIELD(data, RLC_PG_DELAY, POWER_DOWN_DELAY, 0x10);
4044         data = REG_SET_FIELD(data, RLC_PG_DELAY, CMD_PROPAGATE_DELAY, 0x10);
4045         data = REG_SET_FIELD(data, RLC_PG_DELAY, MEM_SLEEP_DELAY, 0x10);
4046         WREG32(mmRLC_PG_DELAY, data);
4047
4048         WREG32_FIELD(RLC_PG_DELAY_2, SERDES_CMD_DELAY, 0x3);
4049         WREG32_FIELD(RLC_AUTO_PG_CTRL, GRBM_REG_SAVE_GFX_IDLE_THRESHOLD, 0x55f0);
4050
4051 }
4052
4053 static void cz_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
4054                                                 bool enable)
4055 {
4056         WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PU_ENABLE, enable ? 1 : 0);
4057 }
4058
4059 static void cz_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
4060                                                   bool enable)
4061 {
4062         WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PD_ENABLE, enable ? 1 : 0);
4063 }
4064
4065 static void cz_enable_cp_power_gating(struct amdgpu_device *adev, bool enable)
4066 {
4067         WREG32_FIELD(RLC_PG_CNTL, CP_PG_DISABLE, enable ? 0 : 1);
4068 }
4069
4070 static void gfx_v8_0_init_pg(struct amdgpu_device *adev)
4071 {
4072         if ((adev->asic_type == CHIP_CARRIZO) ||
4073             (adev->asic_type == CHIP_STONEY)) {
4074                 gfx_v8_0_init_csb(adev);
4075                 gfx_v8_0_init_save_restore_list(adev);
4076                 gfx_v8_0_enable_save_restore_machine(adev);
4077                 WREG32(mmRLC_JUMP_TABLE_RESTORE, adev->gfx.rlc.cp_table_gpu_addr >> 8);
4078                 gfx_v8_0_init_power_gating(adev);
4079                 WREG32(mmRLC_PG_ALWAYS_ON_CU_MASK, adev->gfx.cu_info.ao_cu_mask);
4080         } else if ((adev->asic_type == CHIP_POLARIS11) ||
4081                    (adev->asic_type == CHIP_POLARIS12)) {
4082                 gfx_v8_0_init_csb(adev);
4083                 gfx_v8_0_init_save_restore_list(adev);
4084                 gfx_v8_0_enable_save_restore_machine(adev);
4085                 gfx_v8_0_init_power_gating(adev);
4086         }
4087
4088 }
4089
4090 static void gfx_v8_0_rlc_stop(struct amdgpu_device *adev)
4091 {
4092         WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 0);
4093
4094         gfx_v8_0_enable_gui_idle_interrupt(adev, false);
4095         gfx_v8_0_wait_for_rlc_serdes(adev);
4096 }
4097
4098 static void gfx_v8_0_rlc_reset(struct amdgpu_device *adev)
4099 {
4100         WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
4101         udelay(50);
4102
4103         WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
4104         udelay(50);
4105 }
4106
4107 static void gfx_v8_0_rlc_start(struct amdgpu_device *adev)
4108 {
4109         WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 1);
4110
4111         /* carrizo do enable cp interrupt after cp inited */
4112         if (!(adev->flags & AMD_IS_APU))
4113                 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
4114
4115         udelay(50);
4116 }
4117
4118 static int gfx_v8_0_rlc_load_microcode(struct amdgpu_device *adev)
4119 {
4120         const struct rlc_firmware_header_v2_0 *hdr;
4121         const __le32 *fw_data;
4122         unsigned i, fw_size;
4123
4124         if (!adev->gfx.rlc_fw)
4125                 return -EINVAL;
4126
4127         hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
4128         amdgpu_ucode_print_rlc_hdr(&hdr->header);
4129
4130         fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
4131                            le32_to_cpu(hdr->header.ucode_array_offset_bytes));
4132         fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
4133
4134         WREG32(mmRLC_GPM_UCODE_ADDR, 0);
4135         for (i = 0; i < fw_size; i++)
4136                 WREG32(mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
4137         WREG32(mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
4138
4139         return 0;
4140 }
4141
4142 static int gfx_v8_0_rlc_resume(struct amdgpu_device *adev)
4143 {
4144         int r;
4145         u32 tmp;
4146
4147         gfx_v8_0_rlc_stop(adev);
4148
4149         /* disable CG */
4150         tmp = RREG32(mmRLC_CGCG_CGLS_CTRL);
4151         tmp &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
4152                  RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
4153         WREG32(mmRLC_CGCG_CGLS_CTRL, tmp);
4154         if (adev->asic_type == CHIP_POLARIS11 ||
4155             adev->asic_type == CHIP_POLARIS10 ||
4156             adev->asic_type == CHIP_POLARIS12) {
4157                 tmp = RREG32(mmRLC_CGCG_CGLS_CTRL_3D);
4158                 tmp &= ~0x3;
4159                 WREG32(mmRLC_CGCG_CGLS_CTRL_3D, tmp);
4160         }
4161
4162         /* disable PG */
4163         WREG32(mmRLC_PG_CNTL, 0);
4164
4165         gfx_v8_0_rlc_reset(adev);
4166         gfx_v8_0_init_pg(adev);
4167
4168         if (!adev->pp_enabled) {
4169                 if (adev->firmware.load_type != AMDGPU_FW_LOAD_SMU) {
4170                         /* legacy rlc firmware loading */
4171                         r = gfx_v8_0_rlc_load_microcode(adev);
4172                         if (r)
4173                                 return r;
4174                 } else {
4175                         r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
4176                                                         AMDGPU_UCODE_ID_RLC_G);
4177                         if (r)
4178                                 return -EINVAL;
4179                 }
4180         }
4181
4182         gfx_v8_0_rlc_start(adev);
4183
4184         return 0;
4185 }
4186
4187 static void gfx_v8_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
4188 {
4189         int i;
4190         u32 tmp = RREG32(mmCP_ME_CNTL);
4191
4192         if (enable) {
4193                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 0);
4194                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 0);
4195                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 0);
4196         } else {
4197                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 1);
4198                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 1);
4199                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 1);
4200                 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
4201                         adev->gfx.gfx_ring[i].ready = false;
4202         }
4203         WREG32(mmCP_ME_CNTL, tmp);
4204         udelay(50);
4205 }
4206
4207 static int gfx_v8_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
4208 {
4209         const struct gfx_firmware_header_v1_0 *pfp_hdr;
4210         const struct gfx_firmware_header_v1_0 *ce_hdr;
4211         const struct gfx_firmware_header_v1_0 *me_hdr;
4212         const __le32 *fw_data;
4213         unsigned i, fw_size;
4214
4215         if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
4216                 return -EINVAL;
4217
4218         pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
4219                 adev->gfx.pfp_fw->data;
4220         ce_hdr = (const struct gfx_firmware_header_v1_0 *)
4221                 adev->gfx.ce_fw->data;
4222         me_hdr = (const struct gfx_firmware_header_v1_0 *)
4223                 adev->gfx.me_fw->data;
4224
4225         amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
4226         amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
4227         amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
4228
4229         gfx_v8_0_cp_gfx_enable(adev, false);
4230
4231         /* PFP */
4232         fw_data = (const __le32 *)
4233                 (adev->gfx.pfp_fw->data +
4234                  le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
4235         fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
4236         WREG32(mmCP_PFP_UCODE_ADDR, 0);
4237         for (i = 0; i < fw_size; i++)
4238                 WREG32(mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
4239         WREG32(mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
4240
4241         /* CE */
4242         fw_data = (const __le32 *)
4243                 (adev->gfx.ce_fw->data +
4244                  le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
4245         fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
4246         WREG32(mmCP_CE_UCODE_ADDR, 0);
4247         for (i = 0; i < fw_size; i++)
4248                 WREG32(mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
4249         WREG32(mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
4250
4251         /* ME */
4252         fw_data = (const __le32 *)
4253                 (adev->gfx.me_fw->data +
4254                  le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
4255         fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
4256         WREG32(mmCP_ME_RAM_WADDR, 0);
4257         for (i = 0; i < fw_size; i++)
4258                 WREG32(mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
4259         WREG32(mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version);
4260
4261         return 0;
4262 }
4263
4264 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev)
4265 {
4266         u32 count = 0;
4267         const struct cs_section_def *sect = NULL;
4268         const struct cs_extent_def *ext = NULL;
4269
4270         /* begin clear state */
4271         count += 2;
4272         /* context control state */
4273         count += 3;
4274
4275         for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4276                 for (ext = sect->section; ext->extent != NULL; ++ext) {
4277                         if (sect->id == SECT_CONTEXT)
4278                                 count += 2 + ext->reg_count;
4279                         else
4280                                 return 0;
4281                 }
4282         }
4283         /* pa_sc_raster_config/pa_sc_raster_config1 */
4284         count += 4;
4285         /* end clear state */
4286         count += 2;
4287         /* clear state */
4288         count += 2;
4289
4290         return count;
4291 }
4292
4293 static int gfx_v8_0_cp_gfx_start(struct amdgpu_device *adev)
4294 {
4295         struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
4296         const struct cs_section_def *sect = NULL;
4297         const struct cs_extent_def *ext = NULL;
4298         int r, i;
4299
4300         /* init the CP */
4301         WREG32(mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
4302         WREG32(mmCP_ENDIAN_SWAP, 0);
4303         WREG32(mmCP_DEVICE_ID, 1);
4304
4305         gfx_v8_0_cp_gfx_enable(adev, true);
4306
4307         r = amdgpu_ring_alloc(ring, gfx_v8_0_get_csb_size(adev) + 4);
4308         if (r) {
4309                 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
4310                 return r;
4311         }
4312
4313         /* clear state buffer */
4314         amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4315         amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
4316
4317         amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4318         amdgpu_ring_write(ring, 0x80000000);
4319         amdgpu_ring_write(ring, 0x80000000);
4320
4321         for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4322                 for (ext = sect->section; ext->extent != NULL; ++ext) {
4323                         if (sect->id == SECT_CONTEXT) {
4324                                 amdgpu_ring_write(ring,
4325                                        PACKET3(PACKET3_SET_CONTEXT_REG,
4326                                                ext->reg_count));
4327                                 amdgpu_ring_write(ring,
4328                                        ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
4329                                 for (i = 0; i < ext->reg_count; i++)
4330                                         amdgpu_ring_write(ring, ext->extent[i]);
4331                         }
4332                 }
4333         }
4334
4335         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
4336         amdgpu_ring_write(ring, mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
4337         switch (adev->asic_type) {
4338         case CHIP_TONGA:
4339         case CHIP_POLARIS10:
4340                 amdgpu_ring_write(ring, 0x16000012);
4341                 amdgpu_ring_write(ring, 0x0000002A);
4342                 break;
4343         case CHIP_POLARIS11:
4344         case CHIP_POLARIS12:
4345                 amdgpu_ring_write(ring, 0x16000012);
4346                 amdgpu_ring_write(ring, 0x00000000);
4347                 break;
4348         case CHIP_FIJI:
4349                 amdgpu_ring_write(ring, 0x3a00161a);
4350                 amdgpu_ring_write(ring, 0x0000002e);
4351                 break;
4352         case CHIP_CARRIZO:
4353                 amdgpu_ring_write(ring, 0x00000002);
4354                 amdgpu_ring_write(ring, 0x00000000);
4355                 break;
4356         case CHIP_TOPAZ:
4357                 amdgpu_ring_write(ring, adev->gfx.config.num_rbs == 1 ?
4358                                 0x00000000 : 0x00000002);
4359                 amdgpu_ring_write(ring, 0x00000000);
4360                 break;
4361         case CHIP_STONEY:
4362                 amdgpu_ring_write(ring, 0x00000000);
4363                 amdgpu_ring_write(ring, 0x00000000);
4364                 break;
4365         default:
4366                 BUG();
4367         }
4368
4369         amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4370         amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
4371
4372         amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
4373         amdgpu_ring_write(ring, 0);
4374
4375         /* init the CE partitions */
4376         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
4377         amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
4378         amdgpu_ring_write(ring, 0x8000);
4379         amdgpu_ring_write(ring, 0x8000);
4380
4381         amdgpu_ring_commit(ring);
4382
4383         return 0;
4384 }
4385 static void gfx_v8_0_set_cpg_door_bell(struct amdgpu_device *adev, struct amdgpu_ring *ring)
4386 {
4387         u32 tmp;
4388         /* no gfx doorbells on iceland */
4389         if (adev->asic_type == CHIP_TOPAZ)
4390                 return;
4391
4392         tmp = RREG32(mmCP_RB_DOORBELL_CONTROL);
4393
4394         if (ring->use_doorbell) {
4395                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4396                                 DOORBELL_OFFSET, ring->doorbell_index);
4397                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4398                                                 DOORBELL_HIT, 0);
4399                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4400                                             DOORBELL_EN, 1);
4401         } else {
4402                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0);
4403         }
4404
4405         WREG32(mmCP_RB_DOORBELL_CONTROL, tmp);
4406
4407         if (adev->flags & AMD_IS_APU)
4408                 return;
4409
4410         tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
4411                                         DOORBELL_RANGE_LOWER,
4412                                         AMDGPU_DOORBELL_GFX_RING0);
4413         WREG32(mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
4414
4415         WREG32(mmCP_RB_DOORBELL_RANGE_UPPER,
4416                 CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
4417 }
4418
4419 static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev)
4420 {
4421         struct amdgpu_ring *ring;
4422         u32 tmp;
4423         u32 rb_bufsz;
4424         u64 rb_addr, rptr_addr, wptr_gpu_addr;
4425         int r;
4426
4427         /* Set the write pointer delay */
4428         WREG32(mmCP_RB_WPTR_DELAY, 0);
4429
4430         /* set the RB to use vmid 0 */
4431         WREG32(mmCP_RB_VMID, 0);
4432
4433         /* Set ring buffer size */
4434         ring = &adev->gfx.gfx_ring[0];
4435         rb_bufsz = order_base_2(ring->ring_size / 8);
4436         tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
4437         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
4438         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MTYPE, 3);
4439         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MIN_IB_AVAILSZ, 1);
4440 #ifdef __BIG_ENDIAN
4441         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
4442 #endif
4443         WREG32(mmCP_RB0_CNTL, tmp);
4444
4445         /* Initialize the ring buffer's read and write pointers */
4446         WREG32(mmCP_RB0_CNTL, tmp | CP_RB0_CNTL__RB_RPTR_WR_ENA_MASK);
4447         ring->wptr = 0;
4448         WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
4449
4450         /* set the wb address wether it's enabled or not */
4451         rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4452         WREG32(mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
4453         WREG32(mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 0xFF);
4454
4455         wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4456         WREG32(mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr));
4457         WREG32(mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr));
4458         mdelay(1);
4459         WREG32(mmCP_RB0_CNTL, tmp);
4460
4461         rb_addr = ring->gpu_addr >> 8;
4462         WREG32(mmCP_RB0_BASE, rb_addr);
4463         WREG32(mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
4464
4465         gfx_v8_0_set_cpg_door_bell(adev, ring);
4466         /* start the ring */
4467         amdgpu_ring_clear_ring(ring);
4468         gfx_v8_0_cp_gfx_start(adev);
4469         ring->ready = true;
4470         r = amdgpu_ring_test_ring(ring);
4471         if (r)
4472                 ring->ready = false;
4473
4474         return r;
4475 }
4476
4477 static void gfx_v8_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
4478 {
4479         int i;
4480
4481         if (enable) {
4482                 WREG32(mmCP_MEC_CNTL, 0);
4483         } else {
4484                 WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
4485                 for (i = 0; i < adev->gfx.num_compute_rings; i++)
4486                         adev->gfx.compute_ring[i].ready = false;
4487                 adev->gfx.kiq.ring.ready = false;
4488         }
4489         udelay(50);
4490 }
4491
4492 static int gfx_v8_0_cp_compute_load_microcode(struct amdgpu_device *adev)
4493 {
4494         const struct gfx_firmware_header_v1_0 *mec_hdr;
4495         const __le32 *fw_data;
4496         unsigned i, fw_size;
4497
4498         if (!adev->gfx.mec_fw)
4499                 return -EINVAL;
4500
4501         gfx_v8_0_cp_compute_enable(adev, false);
4502
4503         mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
4504         amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
4505
4506         fw_data = (const __le32 *)
4507                 (adev->gfx.mec_fw->data +
4508                  le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
4509         fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
4510
4511         /* MEC1 */
4512         WREG32(mmCP_MEC_ME1_UCODE_ADDR, 0);
4513         for (i = 0; i < fw_size; i++)
4514                 WREG32(mmCP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data+i));
4515         WREG32(mmCP_MEC_ME1_UCODE_ADDR, adev->gfx.mec_fw_version);
4516
4517         /* Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */
4518         if (adev->gfx.mec2_fw) {
4519                 const struct gfx_firmware_header_v1_0 *mec2_hdr;
4520
4521                 mec2_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
4522                 amdgpu_ucode_print_gfx_hdr(&mec2_hdr->header);
4523
4524                 fw_data = (const __le32 *)
4525                         (adev->gfx.mec2_fw->data +
4526                          le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes));
4527                 fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4;
4528
4529                 WREG32(mmCP_MEC_ME2_UCODE_ADDR, 0);
4530                 for (i = 0; i < fw_size; i++)
4531                         WREG32(mmCP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data+i));
4532                 WREG32(mmCP_MEC_ME2_UCODE_ADDR, adev->gfx.mec2_fw_version);
4533         }
4534
4535         return 0;
4536 }
4537
4538 /* KIQ functions */
4539 static void gfx_v8_0_kiq_setting(struct amdgpu_ring *ring)
4540 {
4541         uint32_t tmp;
4542         struct amdgpu_device *adev = ring->adev;
4543
4544         /* tell RLC which is KIQ queue */
4545         tmp = RREG32(mmRLC_CP_SCHEDULERS);
4546         tmp &= 0xffffff00;
4547         tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
4548         WREG32(mmRLC_CP_SCHEDULERS, tmp);
4549         tmp |= 0x80;
4550         WREG32(mmRLC_CP_SCHEDULERS, tmp);
4551 }
4552
4553 static int gfx_v8_0_kiq_kcq_enable(struct amdgpu_device *adev)
4554 {
4555         struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
4556         uint32_t scratch, tmp = 0;
4557         uint64_t queue_mask = 0;
4558         int r, i;
4559
4560         for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) {
4561                 if (!test_bit(i, adev->gfx.mec.queue_bitmap))
4562                         continue;
4563
4564                 /* This situation may be hit in the future if a new HW
4565                  * generation exposes more than 64 queues. If so, the
4566                  * definition of queue_mask needs updating */
4567                 if (WARN_ON(i > (sizeof(queue_mask)*8))) {
4568                         DRM_ERROR("Invalid KCQ enabled: %d\n", i);
4569                         break;
4570                 }
4571
4572                 queue_mask |= (1ull << i);
4573         }
4574
4575         r = amdgpu_gfx_scratch_get(adev, &scratch);
4576         if (r) {
4577                 DRM_ERROR("Failed to get scratch reg (%d).\n", r);
4578                 return r;
4579         }
4580         WREG32(scratch, 0xCAFEDEAD);
4581
4582         r = amdgpu_ring_alloc(kiq_ring, (8 * adev->gfx.num_compute_rings) + 11);
4583         if (r) {
4584                 DRM_ERROR("Failed to lock KIQ (%d).\n", r);
4585                 amdgpu_gfx_scratch_free(adev, scratch);
4586                 return r;
4587         }
4588         /* set resources */
4589         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
4590         amdgpu_ring_write(kiq_ring, 0); /* vmid_mask:0 queue_type:0 (KIQ) */
4591         amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask)); /* queue mask lo */
4592         amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask)); /* queue mask hi */
4593         amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */
4594         amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */
4595         amdgpu_ring_write(kiq_ring, 0); /* oac mask */
4596         amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */
4597         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4598                 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4599                 uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
4600                 uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4601
4602                 /* map queues */
4603                 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
4604                 /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
4605                 amdgpu_ring_write(kiq_ring,
4606                                   PACKET3_MAP_QUEUES_NUM_QUEUES(1));
4607                 amdgpu_ring_write(kiq_ring,
4608                                   PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index) |
4609                                   PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
4610                                   PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
4611                                   PACKET3_MAP_QUEUES_ME(ring->me == 1 ? 0 : 1)); /* doorbell */
4612                 amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
4613                 amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
4614                 amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
4615                 amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
4616         }
4617         /* write to scratch for completion */
4618         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
4619         amdgpu_ring_write(kiq_ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
4620         amdgpu_ring_write(kiq_ring, 0xDEADBEEF);
4621         amdgpu_ring_commit(kiq_ring);
4622
4623         for (i = 0; i < adev->usec_timeout; i++) {
4624                 tmp = RREG32(scratch);
4625                 if (tmp == 0xDEADBEEF)
4626                         break;
4627                 DRM_UDELAY(1);
4628         }
4629         if (i >= adev->usec_timeout) {
4630                 DRM_ERROR("KCQ enable failed (scratch(0x%04X)=0x%08X)\n",
4631                           scratch, tmp);
4632                 r = -EINVAL;
4633         }
4634         amdgpu_gfx_scratch_free(adev, scratch);
4635
4636         return r;
4637 }
4638
4639 static int gfx_v8_0_deactivate_hqd(struct amdgpu_device *adev, u32 req)
4640 {
4641         int i, r = 0;
4642
4643         if (RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK) {
4644                 WREG32_FIELD(CP_HQD_DEQUEUE_REQUEST, DEQUEUE_REQ, req);
4645                 for (i = 0; i < adev->usec_timeout; i++) {
4646                         if (!(RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK))
4647                                 break;
4648                         udelay(1);
4649                 }
4650                 if (i == adev->usec_timeout)
4651                         r = -ETIMEDOUT;
4652         }
4653         WREG32(mmCP_HQD_DEQUEUE_REQUEST, 0);
4654         WREG32(mmCP_HQD_PQ_RPTR, 0);
4655         WREG32(mmCP_HQD_PQ_WPTR, 0);
4656
4657         return r;
4658 }
4659
4660 static int gfx_v8_0_mqd_init(struct amdgpu_ring *ring)
4661 {
4662         struct amdgpu_device *adev = ring->adev;
4663         struct vi_mqd *mqd = ring->mqd_ptr;
4664         uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
4665         uint32_t tmp;
4666
4667         mqd->header = 0xC0310800;
4668         mqd->compute_pipelinestat_enable = 0x00000001;
4669         mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
4670         mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
4671         mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
4672         mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
4673         mqd->compute_misc_reserved = 0x00000003;
4674         if (!(adev->flags & AMD_IS_APU)) {
4675                 mqd->dynamic_cu_mask_addr_lo = lower_32_bits(ring->mqd_gpu_addr
4676                                              + offsetof(struct vi_mqd_allocation, dyamic_cu_mask));
4677                 mqd->dynamic_cu_mask_addr_hi = upper_32_bits(ring->mqd_gpu_addr
4678                                              + offsetof(struct vi_mqd_allocation, dyamic_cu_mask));
4679         }
4680         eop_base_addr = ring->eop_gpu_addr >> 8;
4681         mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
4682         mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
4683
4684         /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4685         tmp = RREG32(mmCP_HQD_EOP_CONTROL);
4686         tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
4687                         (order_base_2(GFX8_MEC_HPD_SIZE / 4) - 1));
4688
4689         mqd->cp_hqd_eop_control = tmp;
4690
4691         /* enable doorbell? */
4692         tmp = REG_SET_FIELD(RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL),
4693                             CP_HQD_PQ_DOORBELL_CONTROL,
4694                             DOORBELL_EN,
4695                             ring->use_doorbell ? 1 : 0);
4696
4697         mqd->cp_hqd_pq_doorbell_control = tmp;
4698
4699         /* set the pointer to the MQD */
4700         mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
4701         mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
4702
4703         /* set MQD vmid to 0 */
4704         tmp = RREG32(mmCP_MQD_CONTROL);
4705         tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
4706         mqd->cp_mqd_control = tmp;
4707
4708         /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4709         hqd_gpu_addr = ring->gpu_addr >> 8;
4710         mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
4711         mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4712
4713         /* set up the HQD, this is similar to CP_RB0_CNTL */
4714         tmp = RREG32(mmCP_HQD_PQ_CONTROL);
4715         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
4716                             (order_base_2(ring->ring_size / 4) - 1));
4717         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
4718                         ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
4719 #ifdef __BIG_ENDIAN
4720         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
4721 #endif
4722         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
4723         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
4724         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
4725         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
4726         mqd->cp_hqd_pq_control = tmp;
4727
4728         /* set the wb address whether it's enabled or not */
4729         wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4730         mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
4731         mqd->cp_hqd_pq_rptr_report_addr_hi =
4732                 upper_32_bits(wb_gpu_addr) & 0xffff;
4733
4734         /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
4735         wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4736         mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
4737         mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4738
4739         tmp = 0;
4740         /* enable the doorbell if requested */
4741         if (ring->use_doorbell) {
4742                 tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
4743                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4744                                 DOORBELL_OFFSET, ring->doorbell_index);
4745
4746                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4747                                          DOORBELL_EN, 1);
4748                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4749                                          DOORBELL_SOURCE, 0);
4750                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4751                                          DOORBELL_HIT, 0);
4752         }
4753
4754         mqd->cp_hqd_pq_doorbell_control = tmp;
4755
4756         /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4757         ring->wptr = 0;
4758         mqd->cp_hqd_pq_wptr = ring->wptr;
4759         mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
4760
4761         /* set the vmid for the queue */
4762         mqd->cp_hqd_vmid = 0;
4763
4764         tmp = RREG32(mmCP_HQD_PERSISTENT_STATE);
4765         tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
4766         mqd->cp_hqd_persistent_state = tmp;
4767
4768         /* set MTYPE */
4769         tmp = RREG32(mmCP_HQD_IB_CONTROL);
4770         tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
4771         tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MTYPE, 3);
4772         mqd->cp_hqd_ib_control = tmp;
4773
4774         tmp = RREG32(mmCP_HQD_IQ_TIMER);
4775         tmp = REG_SET_FIELD(tmp, CP_HQD_IQ_TIMER, MTYPE, 3);
4776         mqd->cp_hqd_iq_timer = tmp;
4777
4778         tmp = RREG32(mmCP_HQD_CTX_SAVE_CONTROL);
4779         tmp = REG_SET_FIELD(tmp, CP_HQD_CTX_SAVE_CONTROL, MTYPE, 3);
4780         mqd->cp_hqd_ctx_save_control = tmp;
4781
4782         /* defaults */
4783         mqd->cp_hqd_eop_rptr = RREG32(mmCP_HQD_EOP_RPTR);
4784         mqd->cp_hqd_eop_wptr = RREG32(mmCP_HQD_EOP_WPTR);
4785         mqd->cp_hqd_pipe_priority = RREG32(mmCP_HQD_PIPE_PRIORITY);
4786         mqd->cp_hqd_queue_priority = RREG32(mmCP_HQD_QUEUE_PRIORITY);
4787         mqd->cp_hqd_quantum = RREG32(mmCP_HQD_QUANTUM);
4788         mqd->cp_hqd_ctx_save_base_addr_lo = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_LO);
4789         mqd->cp_hqd_ctx_save_base_addr_hi = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_HI);
4790         mqd->cp_hqd_cntl_stack_offset = RREG32(mmCP_HQD_CNTL_STACK_OFFSET);
4791         mqd->cp_hqd_cntl_stack_size = RREG32(mmCP_HQD_CNTL_STACK_SIZE);
4792         mqd->cp_hqd_wg_state_offset = RREG32(mmCP_HQD_WG_STATE_OFFSET);
4793         mqd->cp_hqd_ctx_save_size = RREG32(mmCP_HQD_CTX_SAVE_SIZE);
4794         mqd->cp_hqd_eop_done_events = RREG32(mmCP_HQD_EOP_EVENTS);
4795         mqd->cp_hqd_error = RREG32(mmCP_HQD_ERROR);
4796         mqd->cp_hqd_eop_wptr_mem = RREG32(mmCP_HQD_EOP_WPTR_MEM);
4797         mqd->cp_hqd_eop_dones = RREG32(mmCP_HQD_EOP_DONES);
4798
4799         /* activate the queue */
4800         mqd->cp_hqd_active = 1;
4801
4802         return 0;
4803 }
4804
4805 int gfx_v8_0_mqd_commit(struct amdgpu_device *adev,
4806                         struct vi_mqd *mqd)
4807 {
4808         uint32_t mqd_reg;
4809         uint32_t *mqd_data;
4810
4811         /* HQD registers extend from mmCP_MQD_BASE_ADDR to mmCP_HQD_ERROR */
4812         mqd_data = &mqd->cp_mqd_base_addr_lo;
4813
4814         /* disable wptr polling */
4815         WREG32_FIELD(CP_PQ_WPTR_POLL_CNTL, EN, 0);
4816
4817         /* program all HQD registers */
4818         for (mqd_reg = mmCP_HQD_VMID; mqd_reg <= mmCP_HQD_EOP_CONTROL; mqd_reg++)
4819                 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4820
4821         /* Tonga errata: EOP RPTR/WPTR should be left unmodified.
4822          * This is safe since EOP RPTR==WPTR for any inactive HQD
4823          * on ASICs that do not support context-save.
4824          * EOP writes/reads can start anywhere in the ring.
4825          */
4826         if (adev->asic_type != CHIP_TONGA) {
4827                 WREG32(mmCP_HQD_EOP_RPTR, mqd->cp_hqd_eop_rptr);
4828                 WREG32(mmCP_HQD_EOP_WPTR, mqd->cp_hqd_eop_wptr);
4829                 WREG32(mmCP_HQD_EOP_WPTR_MEM, mqd->cp_hqd_eop_wptr_mem);
4830         }
4831
4832         for (mqd_reg = mmCP_HQD_EOP_EVENTS; mqd_reg <= mmCP_HQD_ERROR; mqd_reg++)
4833                 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4834
4835         /* activate the HQD */
4836         for (mqd_reg = mmCP_MQD_BASE_ADDR; mqd_reg <= mmCP_HQD_ACTIVE; mqd_reg++)
4837                 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4838
4839         return 0;
4840 }
4841
4842 static int gfx_v8_0_kiq_init_queue(struct amdgpu_ring *ring)
4843 {
4844         struct amdgpu_device *adev = ring->adev;
4845         struct vi_mqd *mqd = ring->mqd_ptr;
4846         int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS;
4847
4848         gfx_v8_0_kiq_setting(ring);
4849
4850         if (adev->gfx.in_reset) { /* for GPU_RESET case */
4851                 /* reset MQD to a clean status */
4852                 if (adev->gfx.mec.mqd_backup[mqd_idx])
4853                         memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation));
4854
4855                 /* reset ring buffer */
4856                 ring->wptr = 0;
4857                 amdgpu_ring_clear_ring(ring);
4858                 mutex_lock(&adev->srbm_mutex);
4859                 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4860                 gfx_v8_0_mqd_commit(adev, mqd);
4861                 vi_srbm_select(adev, 0, 0, 0, 0);
4862                 mutex_unlock(&adev->srbm_mutex);
4863         } else {
4864                 memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation));
4865                 ((struct vi_mqd_allocation *)mqd)->dyamic_cu_mask = 0xFFFFFFFF;
4866                 ((struct vi_mqd_allocation *)mqd)->dyamic_rb_mask = 0xFFFFFFFF;
4867                 mutex_lock(&adev->srbm_mutex);
4868                 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4869                 gfx_v8_0_mqd_init(ring);
4870                 gfx_v8_0_mqd_commit(adev, mqd);
4871                 vi_srbm_select(adev, 0, 0, 0, 0);
4872                 mutex_unlock(&adev->srbm_mutex);
4873
4874                 if (adev->gfx.mec.mqd_backup[mqd_idx])
4875                         memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation));
4876         }
4877
4878         return 0;
4879 }
4880
4881 static int gfx_v8_0_kcq_init_queue(struct amdgpu_ring *ring)
4882 {
4883         struct amdgpu_device *adev = ring->adev;
4884         struct vi_mqd *mqd = ring->mqd_ptr;
4885         int mqd_idx = ring - &adev->gfx.compute_ring[0];
4886
4887         if (!adev->gfx.in_reset && !adev->gfx.in_suspend) {
4888                 memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation));
4889                 ((struct vi_mqd_allocation *)mqd)->dyamic_cu_mask = 0xFFFFFFFF;
4890                 ((struct vi_mqd_allocation *)mqd)->dyamic_rb_mask = 0xFFFFFFFF;
4891                 mutex_lock(&adev->srbm_mutex);
4892                 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4893                 gfx_v8_0_mqd_init(ring);
4894                 vi_srbm_select(adev, 0, 0, 0, 0);
4895                 mutex_unlock(&adev->srbm_mutex);
4896
4897                 if (adev->gfx.mec.mqd_backup[mqd_idx])
4898                         memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation));
4899         } else if (adev->gfx.in_reset) { /* for GPU_RESET case */
4900                 /* reset MQD to a clean status */
4901                 if (adev->gfx.mec.mqd_backup[mqd_idx])
4902                         memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation));
4903                 /* reset ring buffer */
4904                 ring->wptr = 0;
4905                 amdgpu_ring_clear_ring(ring);
4906         } else {
4907                 amdgpu_ring_clear_ring(ring);
4908         }
4909         return 0;
4910 }
4911
4912 static void gfx_v8_0_set_mec_doorbell_range(struct amdgpu_device *adev)
4913 {
4914         if (adev->asic_type > CHIP_TONGA) {
4915                 WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER, AMDGPU_DOORBELL_KIQ << 2);
4916                 WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER, AMDGPU_DOORBELL_MEC_RING7 << 2);
4917         }
4918         /* enable doorbells */
4919         WREG32_FIELD(CP_PQ_STATUS, DOORBELL_ENABLE, 1);
4920 }
4921
4922 static int gfx_v8_0_kiq_resume(struct amdgpu_device *adev)
4923 {
4924         struct amdgpu_ring *ring = NULL;
4925         int r = 0, i;
4926
4927         gfx_v8_0_cp_compute_enable(adev, true);
4928
4929         ring = &adev->gfx.kiq.ring;
4930
4931         r = amdgpu_bo_reserve(ring->mqd_obj, false);
4932         if (unlikely(r != 0))
4933                 goto done;
4934
4935         r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr);
4936         if (!r) {
4937                 r = gfx_v8_0_kiq_init_queue(ring);
4938                 amdgpu_bo_kunmap(ring->mqd_obj);
4939                 ring->mqd_ptr = NULL;
4940         }
4941         amdgpu_bo_unreserve(ring->mqd_obj);
4942         if (r)
4943                 goto done;
4944
4945         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4946                 ring = &adev->gfx.compute_ring[i];
4947
4948                 r = amdgpu_bo_reserve(ring->mqd_obj, false);
4949                 if (unlikely(r != 0))
4950                         goto done;
4951                 r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr);
4952                 if (!r) {
4953                         r = gfx_v8_0_kcq_init_queue(ring);
4954                         amdgpu_bo_kunmap(ring->mqd_obj);
4955                         ring->mqd_ptr = NULL;
4956                 }
4957                 amdgpu_bo_unreserve(ring->mqd_obj);
4958                 if (r)
4959                         goto done;
4960         }
4961
4962         gfx_v8_0_set_mec_doorbell_range(adev);
4963
4964         r = gfx_v8_0_kiq_kcq_enable(adev);
4965         if (r)
4966                 goto done;
4967
4968         /* Test KIQ */
4969         ring = &adev->gfx.kiq.ring;
4970         ring->ready = true;
4971         r = amdgpu_ring_test_ring(ring);
4972         if (r) {
4973                 ring->ready = false;
4974                 goto done;
4975         }
4976
4977         /* Test KCQs */
4978         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4979                 ring = &adev->gfx.compute_ring[i];
4980                 ring->ready = true;
4981                 r = amdgpu_ring_test_ring(ring);
4982                 if (r)
4983                         ring->ready = false;
4984         }
4985
4986 done:
4987         return r;
4988 }
4989
4990 static int gfx_v8_0_cp_resume(struct amdgpu_device *adev)
4991 {
4992         int r;
4993
4994         if (!(adev->flags & AMD_IS_APU))
4995                 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
4996
4997         if (!adev->pp_enabled) {
4998                 if (adev->firmware.load_type != AMDGPU_FW_LOAD_SMU) {
4999                         /* legacy firmware loading */
5000                         r = gfx_v8_0_cp_gfx_load_microcode(adev);
5001                         if (r)
5002                                 return r;
5003
5004                         r = gfx_v8_0_cp_compute_load_microcode(adev);
5005                         if (r)
5006                                 return r;
5007                 } else {
5008                         r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
5009                                                         AMDGPU_UCODE_ID_CP_CE);
5010                         if (r)
5011                                 return -EINVAL;
5012
5013                         r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
5014                                                         AMDGPU_UCODE_ID_CP_PFP);
5015                         if (r)
5016                                 return -EINVAL;
5017
5018                         r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
5019                                                         AMDGPU_UCODE_ID_CP_ME);
5020                         if (r)
5021                                 return -EINVAL;
5022
5023                         if (adev->asic_type == CHIP_TOPAZ) {
5024                                 r = gfx_v8_0_cp_compute_load_microcode(adev);
5025                                 if (r)
5026                                         return r;
5027                         } else {
5028                                 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
5029                                                                                  AMDGPU_UCODE_ID_CP_MEC1);
5030                                 if (r)
5031                                         return -EINVAL;
5032                         }
5033                 }
5034         }
5035
5036         r = gfx_v8_0_cp_gfx_resume(adev);
5037         if (r)
5038                 return r;
5039
5040         r = gfx_v8_0_kiq_resume(adev);
5041         if (r)
5042                 return r;
5043
5044         gfx_v8_0_enable_gui_idle_interrupt(adev, true);
5045
5046         return 0;
5047 }
5048
5049 static void gfx_v8_0_cp_enable(struct amdgpu_device *adev, bool enable)
5050 {
5051         gfx_v8_0_cp_gfx_enable(adev, enable);
5052         gfx_v8_0_cp_compute_enable(adev, enable);
5053 }
5054
5055 static int gfx_v8_0_hw_init(void *handle)
5056 {
5057         int r;
5058         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5059
5060         gfx_v8_0_init_golden_registers(adev);
5061         gfx_v8_0_gpu_init(adev);
5062
5063         r = gfx_v8_0_rlc_resume(adev);
5064         if (r)
5065                 return r;
5066
5067         r = gfx_v8_0_cp_resume(adev);
5068
5069         return r;
5070 }
5071
5072 static int gfx_v8_0_hw_fini(void *handle)
5073 {
5074         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5075
5076         amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
5077         amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
5078         if (amdgpu_sriov_vf(adev)) {
5079                 pr_debug("For SRIOV client, shouldn't do anything.\n");
5080                 return 0;
5081         }
5082         gfx_v8_0_cp_enable(adev, false);
5083         gfx_v8_0_rlc_stop(adev);
5084
5085         amdgpu_set_powergating_state(adev,
5086                         AMD_IP_BLOCK_TYPE_GFX, AMD_PG_STATE_UNGATE);
5087
5088         return 0;
5089 }
5090
5091 static int gfx_v8_0_suspend(void *handle)
5092 {
5093         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5094         adev->gfx.in_suspend = true;
5095         return gfx_v8_0_hw_fini(adev);
5096 }
5097
5098 static int gfx_v8_0_resume(void *handle)
5099 {
5100         int r;
5101         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5102
5103         r = gfx_v8_0_hw_init(adev);
5104         adev->gfx.in_suspend = false;
5105         return r;
5106 }
5107
5108 static bool gfx_v8_0_is_idle(void *handle)
5109 {
5110         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5111
5112         if (REG_GET_FIELD(RREG32(mmGRBM_STATUS), GRBM_STATUS, GUI_ACTIVE))
5113                 return false;
5114         else
5115                 return true;
5116 }
5117
5118 static int gfx_v8_0_wait_for_idle(void *handle)
5119 {
5120         unsigned i;
5121         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5122
5123         for (i = 0; i < adev->usec_timeout; i++) {
5124                 if (gfx_v8_0_is_idle(handle))
5125                         return 0;
5126
5127                 udelay(1);
5128         }
5129         return -ETIMEDOUT;
5130 }
5131
5132 static bool gfx_v8_0_check_soft_reset(void *handle)
5133 {
5134         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5135         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5136         u32 tmp;
5137
5138         /* GRBM_STATUS */
5139         tmp = RREG32(mmGRBM_STATUS);
5140         if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
5141                    GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
5142                    GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
5143                    GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
5144                    GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
5145                    GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK |
5146                    GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
5147                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5148                                                 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
5149                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5150                                                 GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
5151                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5152                                                 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
5153         }
5154
5155         /* GRBM_STATUS2 */
5156         tmp = RREG32(mmGRBM_STATUS2);
5157         if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
5158                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5159                                                 GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
5160
5161         if (REG_GET_FIELD(tmp, GRBM_STATUS2, CPF_BUSY) ||
5162             REG_GET_FIELD(tmp, GRBM_STATUS2, CPC_BUSY) ||
5163             REG_GET_FIELD(tmp, GRBM_STATUS2, CPG_BUSY)) {
5164                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5165                                                 SOFT_RESET_CPF, 1);
5166                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5167                                                 SOFT_RESET_CPC, 1);
5168                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5169                                                 SOFT_RESET_CPG, 1);
5170                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET,
5171                                                 SOFT_RESET_GRBM, 1);
5172         }
5173
5174         /* SRBM_STATUS */
5175         tmp = RREG32(mmSRBM_STATUS);
5176         if (REG_GET_FIELD(tmp, SRBM_STATUS, GRBM_RQ_PENDING))
5177                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5178                                                 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
5179         if (REG_GET_FIELD(tmp, SRBM_STATUS, SEM_BUSY))
5180                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5181                                                 SRBM_SOFT_RESET, SOFT_RESET_SEM, 1);
5182
5183         if (grbm_soft_reset || srbm_soft_reset) {
5184                 adev->gfx.grbm_soft_reset = grbm_soft_reset;
5185                 adev->gfx.srbm_soft_reset = srbm_soft_reset;
5186                 return true;
5187         } else {
5188                 adev->gfx.grbm_soft_reset = 0;
5189                 adev->gfx.srbm_soft_reset = 0;
5190                 return false;
5191         }
5192 }
5193
5194 static int gfx_v8_0_pre_soft_reset(void *handle)
5195 {
5196         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5197         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5198
5199         if ((!adev->gfx.grbm_soft_reset) &&
5200             (!adev->gfx.srbm_soft_reset))
5201                 return 0;
5202
5203         grbm_soft_reset = adev->gfx.grbm_soft_reset;
5204         srbm_soft_reset = adev->gfx.srbm_soft_reset;
5205
5206         /* stop the rlc */
5207         gfx_v8_0_rlc_stop(adev);
5208
5209         if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5210             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
5211                 /* Disable GFX parsing/prefetching */
5212                 gfx_v8_0_cp_gfx_enable(adev, false);
5213
5214         if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5215             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
5216             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
5217             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
5218                 int i;
5219
5220                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5221                         struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5222
5223                         mutex_lock(&adev->srbm_mutex);
5224                         vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5225                         gfx_v8_0_deactivate_hqd(adev, 2);
5226                         vi_srbm_select(adev, 0, 0, 0, 0);
5227                         mutex_unlock(&adev->srbm_mutex);
5228                 }
5229                 /* Disable MEC parsing/prefetching */
5230                 gfx_v8_0_cp_compute_enable(adev, false);
5231         }
5232
5233        return 0;
5234 }
5235
5236 static int gfx_v8_0_soft_reset(void *handle)
5237 {
5238         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5239         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5240         u32 tmp;
5241
5242         if ((!adev->gfx.grbm_soft_reset) &&
5243             (!adev->gfx.srbm_soft_reset))
5244                 return 0;
5245
5246         grbm_soft_reset = adev->gfx.grbm_soft_reset;
5247         srbm_soft_reset = adev->gfx.srbm_soft_reset;
5248
5249         if (grbm_soft_reset || srbm_soft_reset) {
5250                 tmp = RREG32(mmGMCON_DEBUG);
5251                 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 1);
5252                 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 1);
5253                 WREG32(mmGMCON_DEBUG, tmp);
5254                 udelay(50);
5255         }
5256
5257         if (grbm_soft_reset) {
5258                 tmp = RREG32(mmGRBM_SOFT_RESET);
5259                 tmp |= grbm_soft_reset;
5260                 dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
5261                 WREG32(mmGRBM_SOFT_RESET, tmp);
5262                 tmp = RREG32(mmGRBM_SOFT_RESET);
5263
5264                 udelay(50);
5265
5266                 tmp &= ~grbm_soft_reset;
5267                 WREG32(mmGRBM_SOFT_RESET, tmp);
5268                 tmp = RREG32(mmGRBM_SOFT_RESET);
5269         }
5270
5271         if (srbm_soft_reset) {
5272                 tmp = RREG32(mmSRBM_SOFT_RESET);
5273                 tmp |= srbm_soft_reset;
5274                 dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
5275                 WREG32(mmSRBM_SOFT_RESET, tmp);
5276                 tmp = RREG32(mmSRBM_SOFT_RESET);
5277
5278                 udelay(50);
5279
5280                 tmp &= ~srbm_soft_reset;
5281                 WREG32(mmSRBM_SOFT_RESET, tmp);
5282                 tmp = RREG32(mmSRBM_SOFT_RESET);
5283         }
5284
5285         if (grbm_soft_reset || srbm_soft_reset) {
5286                 tmp = RREG32(mmGMCON_DEBUG);
5287                 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 0);
5288                 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 0);
5289                 WREG32(mmGMCON_DEBUG, tmp);
5290         }
5291
5292         /* Wait a little for things to settle down */
5293         udelay(50);
5294
5295         return 0;
5296 }
5297
5298 static int gfx_v8_0_post_soft_reset(void *handle)
5299 {
5300         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5301         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5302
5303         if ((!adev->gfx.grbm_soft_reset) &&
5304             (!adev->gfx.srbm_soft_reset))
5305                 return 0;
5306
5307         grbm_soft_reset = adev->gfx.grbm_soft_reset;
5308         srbm_soft_reset = adev->gfx.srbm_soft_reset;
5309
5310         if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5311             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
5312                 gfx_v8_0_cp_gfx_resume(adev);
5313
5314         if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5315             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
5316             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
5317             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
5318                 int i;
5319
5320                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5321                         struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5322
5323                         mutex_lock(&adev->srbm_mutex);
5324                         vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5325                         gfx_v8_0_deactivate_hqd(adev, 2);
5326                         vi_srbm_select(adev, 0, 0, 0, 0);
5327                         mutex_unlock(&adev->srbm_mutex);
5328                 }
5329                 gfx_v8_0_kiq_resume(adev);
5330         }
5331         gfx_v8_0_rlc_start(adev);
5332
5333         return 0;
5334 }
5335
5336 /**
5337  * gfx_v8_0_get_gpu_clock_counter - return GPU clock counter snapshot
5338  *
5339  * @adev: amdgpu_device pointer
5340  *
5341  * Fetches a GPU clock counter snapshot.
5342  * Returns the 64 bit clock counter snapshot.
5343  */
5344 static uint64_t gfx_v8_0_get_gpu_clock_counter(struct amdgpu_device *adev)
5345 {
5346         uint64_t clock;
5347
5348         mutex_lock(&adev->gfx.gpu_clock_mutex);
5349         WREG32(mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
5350         clock = (uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_LSB) |
5351                 ((uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
5352         mutex_unlock(&adev->gfx.gpu_clock_mutex);
5353         return clock;
5354 }
5355
5356 static void gfx_v8_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
5357                                           uint32_t vmid,
5358                                           uint32_t gds_base, uint32_t gds_size,
5359                                           uint32_t gws_base, uint32_t gws_size,
5360                                           uint32_t oa_base, uint32_t oa_size)
5361 {
5362         gds_base = gds_base >> AMDGPU_GDS_SHIFT;
5363         gds_size = gds_size >> AMDGPU_GDS_SHIFT;
5364
5365         gws_base = gws_base >> AMDGPU_GWS_SHIFT;
5366         gws_size = gws_size >> AMDGPU_GWS_SHIFT;
5367
5368         oa_base = oa_base >> AMDGPU_OA_SHIFT;
5369         oa_size = oa_size >> AMDGPU_OA_SHIFT;
5370
5371         /* GDS Base */
5372         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5373         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5374                                 WRITE_DATA_DST_SEL(0)));
5375         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_base);
5376         amdgpu_ring_write(ring, 0);
5377         amdgpu_ring_write(ring, gds_base);
5378
5379         /* GDS Size */
5380         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5381         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5382                                 WRITE_DATA_DST_SEL(0)));
5383         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_size);
5384         amdgpu_ring_write(ring, 0);
5385         amdgpu_ring_write(ring, gds_size);
5386
5387         /* GWS */
5388         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5389         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5390                                 WRITE_DATA_DST_SEL(0)));
5391         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].gws);
5392         amdgpu_ring_write(ring, 0);
5393         amdgpu_ring_write(ring, gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
5394
5395         /* OA */
5396         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5397         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5398                                 WRITE_DATA_DST_SEL(0)));
5399         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].oa);
5400         amdgpu_ring_write(ring, 0);
5401         amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << oa_base));
5402 }
5403
5404 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
5405 {
5406         WREG32(mmSQ_IND_INDEX,
5407                 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
5408                 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
5409                 (address << SQ_IND_INDEX__INDEX__SHIFT) |
5410                 (SQ_IND_INDEX__FORCE_READ_MASK));
5411         return RREG32(mmSQ_IND_DATA);
5412 }
5413
5414 static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
5415                            uint32_t wave, uint32_t thread,
5416                            uint32_t regno, uint32_t num, uint32_t *out)
5417 {
5418         WREG32(mmSQ_IND_INDEX,
5419                 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
5420                 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
5421                 (regno << SQ_IND_INDEX__INDEX__SHIFT) |
5422                 (thread << SQ_IND_INDEX__THREAD_ID__SHIFT) |
5423                 (SQ_IND_INDEX__FORCE_READ_MASK) |
5424                 (SQ_IND_INDEX__AUTO_INCR_MASK));
5425         while (num--)
5426                 *(out++) = RREG32(mmSQ_IND_DATA);
5427 }
5428
5429 static void gfx_v8_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
5430 {
5431         /* type 0 wave data */
5432         dst[(*no_fields)++] = 0;
5433         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS);
5434         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO);
5435         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI);
5436         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO);
5437         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI);
5438         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID);
5439         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0);
5440         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1);
5441         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC);
5442         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC);
5443         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS);
5444         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS);
5445         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_LO);
5446         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_HI);
5447         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_LO);
5448         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_HI);
5449         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0);
5450         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0);
5451 }
5452
5453 static void gfx_v8_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
5454                                      uint32_t wave, uint32_t start,
5455                                      uint32_t size, uint32_t *dst)
5456 {
5457         wave_read_regs(
5458                 adev, simd, wave, 0,
5459                 start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
5460 }
5461
5462
5463 static const struct amdgpu_gfx_funcs gfx_v8_0_gfx_funcs = {
5464         .get_gpu_clock_counter = &gfx_v8_0_get_gpu_clock_counter,
5465         .select_se_sh = &gfx_v8_0_select_se_sh,
5466         .read_wave_data = &gfx_v8_0_read_wave_data,
5467         .read_wave_sgprs = &gfx_v8_0_read_wave_sgprs,
5468 };
5469
5470 static int gfx_v8_0_early_init(void *handle)
5471 {
5472         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5473
5474         adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS;
5475         adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
5476         adev->gfx.funcs = &gfx_v8_0_gfx_funcs;
5477         gfx_v8_0_set_ring_funcs(adev);
5478         gfx_v8_0_set_irq_funcs(adev);
5479         gfx_v8_0_set_gds_init(adev);
5480         gfx_v8_0_set_rlc_funcs(adev);
5481
5482         return 0;
5483 }
5484
5485 static int gfx_v8_0_late_init(void *handle)
5486 {
5487         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5488         int r;
5489
5490         r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
5491         if (r)
5492                 return r;
5493
5494         r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
5495         if (r)
5496                 return r;
5497
5498         /* requires IBs so do in late init after IB pool is initialized */
5499         r = gfx_v8_0_do_edc_gpr_workarounds(adev);
5500         if (r)
5501                 return r;
5502
5503         amdgpu_set_powergating_state(adev,
5504                         AMD_IP_BLOCK_TYPE_GFX, AMD_PG_STATE_GATE);
5505
5506         return 0;
5507 }
5508
5509 static void gfx_v8_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
5510                                                        bool enable)
5511 {
5512         if ((adev->asic_type == CHIP_POLARIS11) ||
5513             (adev->asic_type == CHIP_POLARIS12))
5514                 /* Send msg to SMU via Powerplay */
5515                 amdgpu_set_powergating_state(adev,
5516                                              AMD_IP_BLOCK_TYPE_SMC,
5517                                              enable ?
5518                                              AMD_PG_STATE_GATE : AMD_PG_STATE_UNGATE);
5519
5520         WREG32_FIELD(RLC_PG_CNTL, STATIC_PER_CU_PG_ENABLE, enable ? 1 : 0);
5521 }
5522
5523 static void gfx_v8_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
5524                                                         bool enable)
5525 {
5526         WREG32_FIELD(RLC_PG_CNTL, DYN_PER_CU_PG_ENABLE, enable ? 1 : 0);
5527 }
5528
5529 static void polaris11_enable_gfx_quick_mg_power_gating(struct amdgpu_device *adev,
5530                 bool enable)
5531 {
5532         WREG32_FIELD(RLC_PG_CNTL, QUICK_PG_ENABLE, enable ? 1 : 0);
5533 }
5534
5535 static void cz_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
5536                                           bool enable)
5537 {
5538         WREG32_FIELD(RLC_PG_CNTL, GFX_POWER_GATING_ENABLE, enable ? 1 : 0);
5539 }
5540
5541 static void cz_enable_gfx_pipeline_power_gating(struct amdgpu_device *adev,
5542                                                 bool enable)
5543 {
5544         WREG32_FIELD(RLC_PG_CNTL, GFX_PIPELINE_PG_ENABLE, enable ? 1 : 0);
5545
5546         /* Read any GFX register to wake up GFX. */
5547         if (!enable)
5548                 RREG32(mmDB_RENDER_CONTROL);
5549 }
5550
5551 static void cz_update_gfx_cg_power_gating(struct amdgpu_device *adev,
5552                                           bool enable)
5553 {
5554         if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
5555                 cz_enable_gfx_cg_power_gating(adev, true);
5556                 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
5557                         cz_enable_gfx_pipeline_power_gating(adev, true);
5558         } else {
5559                 cz_enable_gfx_cg_power_gating(adev, false);
5560                 cz_enable_gfx_pipeline_power_gating(adev, false);
5561         }
5562 }
5563
5564 static int gfx_v8_0_set_powergating_state(void *handle,
5565                                           enum amd_powergating_state state)
5566 {
5567         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5568         bool enable = (state == AMD_PG_STATE_GATE);
5569
5570         if (amdgpu_sriov_vf(adev))
5571                 return 0;
5572
5573         switch (adev->asic_type) {
5574         case CHIP_CARRIZO:
5575         case CHIP_STONEY:
5576
5577                 if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
5578                         cz_enable_sck_slow_down_on_power_up(adev, true);
5579                         cz_enable_sck_slow_down_on_power_down(adev, true);
5580                 } else {
5581                         cz_enable_sck_slow_down_on_power_up(adev, false);
5582                         cz_enable_sck_slow_down_on_power_down(adev, false);
5583                 }
5584                 if (adev->pg_flags & AMD_PG_SUPPORT_CP)
5585                         cz_enable_cp_power_gating(adev, true);
5586                 else
5587                         cz_enable_cp_power_gating(adev, false);
5588
5589                 cz_update_gfx_cg_power_gating(adev, enable);
5590
5591                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5592                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5593                 else
5594                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5595
5596                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5597                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5598                 else
5599                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5600                 break;
5601         case CHIP_POLARIS11:
5602         case CHIP_POLARIS12:
5603                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5604                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5605                 else
5606                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5607
5608                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5609                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5610                 else
5611                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5612
5613                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_QUICK_MG) && enable)
5614                         polaris11_enable_gfx_quick_mg_power_gating(adev, true);
5615                 else
5616                         polaris11_enable_gfx_quick_mg_power_gating(adev, false);
5617                 break;
5618         default:
5619                 break;
5620         }
5621
5622         return 0;
5623 }
5624
5625 static void gfx_v8_0_get_clockgating_state(void *handle, u32 *flags)
5626 {
5627         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5628         int data;
5629
5630         if (amdgpu_sriov_vf(adev))
5631                 *flags = 0;
5632
5633         /* AMD_CG_SUPPORT_GFX_MGCG */
5634         data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5635         if (!(data & RLC_CGTT_MGCG_OVERRIDE__CPF_MASK))
5636                 *flags |= AMD_CG_SUPPORT_GFX_MGCG;
5637
5638         /* AMD_CG_SUPPORT_GFX_CGLG */
5639         data = RREG32(mmRLC_CGCG_CGLS_CTRL);
5640         if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
5641                 *flags |= AMD_CG_SUPPORT_GFX_CGCG;
5642
5643         /* AMD_CG_SUPPORT_GFX_CGLS */
5644         if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
5645                 *flags |= AMD_CG_SUPPORT_GFX_CGLS;
5646
5647         /* AMD_CG_SUPPORT_GFX_CGTS */
5648         data = RREG32(mmCGTS_SM_CTRL_REG);
5649         if (!(data & CGTS_SM_CTRL_REG__OVERRIDE_MASK))
5650                 *flags |= AMD_CG_SUPPORT_GFX_CGTS;
5651
5652         /* AMD_CG_SUPPORT_GFX_CGTS_LS */
5653         if (!(data & CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK))
5654                 *flags |= AMD_CG_SUPPORT_GFX_CGTS_LS;
5655
5656         /* AMD_CG_SUPPORT_GFX_RLC_LS */
5657         data = RREG32(mmRLC_MEM_SLP_CNTL);
5658         if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK)
5659                 *flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS;
5660
5661         /* AMD_CG_SUPPORT_GFX_CP_LS */
5662         data = RREG32(mmCP_MEM_SLP_CNTL);
5663         if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK)
5664                 *flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS;
5665 }
5666
5667 static void gfx_v8_0_send_serdes_cmd(struct amdgpu_device *adev,
5668                                      uint32_t reg_addr, uint32_t cmd)
5669 {
5670         uint32_t data;
5671
5672         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
5673
5674         WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5675         WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5676
5677         data = RREG32(mmRLC_SERDES_WR_CTRL);
5678         if (adev->asic_type == CHIP_STONEY)
5679                 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5680                           RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5681                           RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5682                           RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5683                           RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5684                           RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5685                           RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5686                           RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5687                           RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5688         else
5689                 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5690                           RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5691                           RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5692                           RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5693                           RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5694                           RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5695                           RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5696                           RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5697                           RLC_SERDES_WR_CTRL__BPM_DATA_MASK |
5698                           RLC_SERDES_WR_CTRL__REG_ADDR_MASK |
5699                           RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5700         data |= (RLC_SERDES_WR_CTRL__RSVD_BPM_ADDR_MASK |
5701                  (cmd << RLC_SERDES_WR_CTRL__BPM_DATA__SHIFT) |
5702                  (reg_addr << RLC_SERDES_WR_CTRL__REG_ADDR__SHIFT) |
5703                  (0xff << RLC_SERDES_WR_CTRL__BPM_ADDR__SHIFT));
5704
5705         WREG32(mmRLC_SERDES_WR_CTRL, data);
5706 }
5707
5708 #define MSG_ENTER_RLC_SAFE_MODE     1
5709 #define MSG_EXIT_RLC_SAFE_MODE      0
5710 #define RLC_GPR_REG2__REQ_MASK 0x00000001
5711 #define RLC_GPR_REG2__REQ__SHIFT 0
5712 #define RLC_GPR_REG2__MESSAGE__SHIFT 0x00000001
5713 #define RLC_GPR_REG2__MESSAGE_MASK 0x0000001e
5714
5715 static void iceland_enter_rlc_safe_mode(struct amdgpu_device *adev)
5716 {
5717         u32 data;
5718         unsigned i;
5719
5720         data = RREG32(mmRLC_CNTL);
5721         if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK))
5722                 return;
5723
5724         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) {
5725                 data |= RLC_SAFE_MODE__CMD_MASK;
5726                 data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5727                 data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
5728                 WREG32(mmRLC_SAFE_MODE, data);
5729
5730                 for (i = 0; i < adev->usec_timeout; i++) {
5731                         if ((RREG32(mmRLC_GPM_STAT) &
5732                              (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5733                               RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) ==
5734                             (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5735                              RLC_GPM_STAT__GFX_POWER_STATUS_MASK))
5736                                 break;
5737                         udelay(1);
5738                 }
5739
5740                 for (i = 0; i < adev->usec_timeout; i++) {
5741                         if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
5742                                 break;
5743                         udelay(1);
5744                 }
5745                 adev->gfx.rlc.in_safe_mode = true;
5746         }
5747 }
5748
5749 static void iceland_exit_rlc_safe_mode(struct amdgpu_device *adev)
5750 {
5751         u32 data = 0;
5752         unsigned i;
5753
5754         data = RREG32(mmRLC_CNTL);
5755         if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK))
5756                 return;
5757
5758         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) {
5759                 if (adev->gfx.rlc.in_safe_mode) {
5760                         data |= RLC_SAFE_MODE__CMD_MASK;
5761                         data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5762                         WREG32(mmRLC_SAFE_MODE, data);
5763                         adev->gfx.rlc.in_safe_mode = false;
5764                 }
5765         }
5766
5767         for (i = 0; i < adev->usec_timeout; i++) {
5768                 if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
5769                         break;
5770                 udelay(1);
5771         }
5772 }
5773
5774 static const struct amdgpu_rlc_funcs iceland_rlc_funcs = {
5775         .enter_safe_mode = iceland_enter_rlc_safe_mode,
5776         .exit_safe_mode = iceland_exit_rlc_safe_mode
5777 };
5778
5779 static void gfx_v8_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
5780                                                       bool enable)
5781 {
5782         uint32_t temp, data;
5783
5784         adev->gfx.rlc.funcs->enter_safe_mode(adev);
5785
5786         /* It is disabled by HW by default */
5787         if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
5788                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5789                         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS)
5790                                 /* 1 - RLC memory Light sleep */
5791                                 WREG32_FIELD(RLC_MEM_SLP_CNTL, RLC_MEM_LS_EN, 1);
5792
5793                         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS)
5794                                 WREG32_FIELD(CP_MEM_SLP_CNTL, CP_MEM_LS_EN, 1);
5795                 }
5796
5797                 /* 3 - RLC_CGTT_MGCG_OVERRIDE */
5798                 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5799                 if (adev->flags & AMD_IS_APU)
5800                         data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5801                                   RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5802                                   RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK);
5803                 else
5804                         data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5805                                   RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5806                                   RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5807                                   RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5808
5809                 if (temp != data)
5810                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5811
5812                 /* 4 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5813                 gfx_v8_0_wait_for_rlc_serdes(adev);
5814
5815                 /* 5 - clear mgcg override */
5816                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5817
5818                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS) {
5819                         /* 6 - Enable CGTS(Tree Shade) MGCG /MGLS */
5820                         temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5821                         data &= ~(CGTS_SM_CTRL_REG__SM_MODE_MASK);
5822                         data |= (0x2 << CGTS_SM_CTRL_REG__SM_MODE__SHIFT);
5823                         data |= CGTS_SM_CTRL_REG__SM_MODE_ENABLE_MASK;
5824                         data &= ~CGTS_SM_CTRL_REG__OVERRIDE_MASK;
5825                         if ((adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) &&
5826                             (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS_LS))
5827                                 data &= ~CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK;
5828                         data |= CGTS_SM_CTRL_REG__ON_MONITOR_ADD_EN_MASK;
5829                         data |= (0x96 << CGTS_SM_CTRL_REG__ON_MONITOR_ADD__SHIFT);
5830                         if (temp != data)
5831                                 WREG32(mmCGTS_SM_CTRL_REG, data);
5832                 }
5833                 udelay(50);
5834
5835                 /* 7 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5836                 gfx_v8_0_wait_for_rlc_serdes(adev);
5837         } else {
5838                 /* 1 - MGCG_OVERRIDE[0] for CP and MGCG_OVERRIDE[1] for RLC */
5839                 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5840                 data |= (RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5841                                 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5842                                 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5843                                 RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5844                 if (temp != data)
5845                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5846
5847                 /* 2 - disable MGLS in RLC */
5848                 data = RREG32(mmRLC_MEM_SLP_CNTL);
5849                 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
5850                         data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
5851                         WREG32(mmRLC_MEM_SLP_CNTL, data);
5852                 }
5853
5854                 /* 3 - disable MGLS in CP */
5855                 data = RREG32(mmCP_MEM_SLP_CNTL);
5856                 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
5857                         data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
5858                         WREG32(mmCP_MEM_SLP_CNTL, data);
5859                 }
5860
5861                 /* 4 - Disable CGTS(Tree Shade) MGCG and MGLS */
5862                 temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5863                 data |= (CGTS_SM_CTRL_REG__OVERRIDE_MASK |
5864                                 CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK);
5865                 if (temp != data)
5866                         WREG32(mmCGTS_SM_CTRL_REG, data);
5867
5868                 /* 5 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5869                 gfx_v8_0_wait_for_rlc_serdes(adev);
5870
5871                 /* 6 - set mgcg override */
5872                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, SET_BPM_SERDES_CMD);
5873
5874                 udelay(50);
5875
5876                 /* 7- wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5877                 gfx_v8_0_wait_for_rlc_serdes(adev);
5878         }
5879
5880         adev->gfx.rlc.funcs->exit_safe_mode(adev);
5881 }
5882
5883 static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
5884                                                       bool enable)
5885 {
5886         uint32_t temp, temp1, data, data1;
5887
5888         temp = data = RREG32(mmRLC_CGCG_CGLS_CTRL);
5889
5890         adev->gfx.rlc.funcs->enter_safe_mode(adev);
5891
5892         if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
5893                 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5894                 data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK;
5895                 if (temp1 != data1)
5896                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5897
5898                 /* : wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5899                 gfx_v8_0_wait_for_rlc_serdes(adev);
5900
5901                 /* 2 - clear cgcg override */
5902                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5903
5904                 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5905                 gfx_v8_0_wait_for_rlc_serdes(adev);
5906
5907                 /* 3 - write cmd to set CGLS */
5908                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, SET_BPM_SERDES_CMD);
5909
5910                 /* 4 - enable cgcg */
5911                 data |= RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
5912
5913                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5914                         /* enable cgls*/
5915                         data |= RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5916
5917                         temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5918                         data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK;
5919
5920                         if (temp1 != data1)
5921                                 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5922                 } else {
5923                         data &= ~RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5924                 }
5925
5926                 if (temp != data)
5927                         WREG32(mmRLC_CGCG_CGLS_CTRL, data);
5928
5929                 /* 5 enable cntx_empty_int_enable/cntx_busy_int_enable/
5930                  * Cmp_busy/GFX_Idle interrupts
5931                  */
5932                 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
5933         } else {
5934                 /* disable cntx_empty_int_enable & GFX Idle interrupt */
5935                 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
5936
5937                 /* TEST CGCG */
5938                 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5939                 data1 |= (RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK |
5940                                 RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK);
5941                 if (temp1 != data1)
5942                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5943
5944                 /* read gfx register to wake up cgcg */
5945                 RREG32(mmCB_CGTT_SCLK_CTRL);
5946                 RREG32(mmCB_CGTT_SCLK_CTRL);
5947                 RREG32(mmCB_CGTT_SCLK_CTRL);
5948                 RREG32(mmCB_CGTT_SCLK_CTRL);
5949
5950                 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5951                 gfx_v8_0_wait_for_rlc_serdes(adev);
5952
5953                 /* write cmd to Set CGCG Overrride */
5954                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, SET_BPM_SERDES_CMD);
5955
5956                 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5957                 gfx_v8_0_wait_for_rlc_serdes(adev);
5958
5959                 /* write cmd to Clear CGLS */
5960                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, CLE_BPM_SERDES_CMD);
5961
5962                 /* disable cgcg, cgls should be disabled too. */
5963                 data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
5964                           RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
5965                 if (temp != data)
5966                         WREG32(mmRLC_CGCG_CGLS_CTRL, data);
5967                 /* enable interrupts again for PG */
5968                 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
5969         }
5970
5971         gfx_v8_0_wait_for_rlc_serdes(adev);
5972
5973         adev->gfx.rlc.funcs->exit_safe_mode(adev);
5974 }
5975 static int gfx_v8_0_update_gfx_clock_gating(struct amdgpu_device *adev,
5976                                             bool enable)
5977 {
5978         if (enable) {
5979                 /* CGCG/CGLS should be enabled after MGCG/MGLS/TS(CG/LS)
5980                  * ===  MGCG + MGLS + TS(CG/LS) ===
5981                  */
5982                 gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
5983                 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
5984         } else {
5985                 /* CGCG/CGLS should be disabled before MGCG/MGLS/TS(CG/LS)
5986                  * ===  CGCG + CGLS ===
5987                  */
5988                 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
5989                 gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
5990         }
5991         return 0;
5992 }
5993
5994 static int gfx_v8_0_tonga_update_gfx_clock_gating(struct amdgpu_device *adev,
5995                                           enum amd_clockgating_state state)
5996 {
5997         uint32_t msg_id, pp_state = 0;
5998         uint32_t pp_support_state = 0;
5999         void *pp_handle = adev->powerplay.pp_handle;
6000
6001         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) {
6002                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
6003                         pp_support_state = PP_STATE_SUPPORT_LS;
6004                         pp_state = PP_STATE_LS;
6005                 }
6006                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
6007                         pp_support_state |= PP_STATE_SUPPORT_CG;
6008                         pp_state |= PP_STATE_CG;
6009                 }
6010                 if (state == AMD_CG_STATE_UNGATE)
6011                         pp_state = 0;
6012
6013                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6014                                 PP_BLOCK_GFX_CG,
6015                                 pp_support_state,
6016                                 pp_state);
6017                 amd_set_clockgating_by_smu(pp_handle, msg_id);
6018         }
6019
6020         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) {
6021                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
6022                         pp_support_state = PP_STATE_SUPPORT_LS;
6023                         pp_state = PP_STATE_LS;
6024                 }
6025
6026                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
6027                         pp_support_state |= PP_STATE_SUPPORT_CG;
6028                         pp_state |= PP_STATE_CG;
6029                 }
6030
6031                 if (state == AMD_CG_STATE_UNGATE)
6032                         pp_state = 0;
6033
6034                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6035                                 PP_BLOCK_GFX_MG,
6036                                 pp_support_state,
6037                                 pp_state);
6038                 amd_set_clockgating_by_smu(pp_handle, msg_id);
6039         }
6040
6041         return 0;
6042 }
6043
6044 static int gfx_v8_0_polaris_update_gfx_clock_gating(struct amdgpu_device *adev,
6045                                           enum amd_clockgating_state state)
6046 {
6047
6048         uint32_t msg_id, pp_state = 0;
6049         uint32_t pp_support_state = 0;
6050         void *pp_handle = adev->powerplay.pp_handle;
6051
6052         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) {
6053                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
6054                         pp_support_state = PP_STATE_SUPPORT_LS;
6055                         pp_state = PP_STATE_LS;
6056                 }
6057                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
6058                         pp_support_state |= PP_STATE_SUPPORT_CG;
6059                         pp_state |= PP_STATE_CG;
6060                 }
6061                 if (state == AMD_CG_STATE_UNGATE)
6062                         pp_state = 0;
6063
6064                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6065                                 PP_BLOCK_GFX_CG,
6066                                 pp_support_state,
6067                                 pp_state);
6068                 amd_set_clockgating_by_smu(pp_handle, msg_id);
6069         }
6070
6071         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_3D_CGCG | AMD_CG_SUPPORT_GFX_3D_CGLS)) {
6072                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) {
6073                         pp_support_state = PP_STATE_SUPPORT_LS;
6074                         pp_state = PP_STATE_LS;
6075                 }
6076                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG) {
6077                         pp_support_state |= PP_STATE_SUPPORT_CG;
6078                         pp_state |= PP_STATE_CG;
6079                 }
6080                 if (state == AMD_CG_STATE_UNGATE)
6081                         pp_state = 0;
6082
6083                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6084                                 PP_BLOCK_GFX_3D,
6085                                 pp_support_state,
6086                                 pp_state);
6087                 amd_set_clockgating_by_smu(pp_handle, msg_id);
6088         }
6089
6090         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) {
6091                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
6092                         pp_support_state = PP_STATE_SUPPORT_LS;
6093                         pp_state = PP_STATE_LS;
6094                 }
6095
6096                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
6097                         pp_support_state |= PP_STATE_SUPPORT_CG;
6098                         pp_state |= PP_STATE_CG;
6099                 }
6100
6101                 if (state == AMD_CG_STATE_UNGATE)
6102                         pp_state = 0;
6103
6104                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6105                                 PP_BLOCK_GFX_MG,
6106                                 pp_support_state,
6107                                 pp_state);
6108                 amd_set_clockgating_by_smu(pp_handle, msg_id);
6109         }
6110
6111         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
6112                 pp_support_state = PP_STATE_SUPPORT_LS;
6113
6114                 if (state == AMD_CG_STATE_UNGATE)
6115                         pp_state = 0;
6116                 else
6117                         pp_state = PP_STATE_LS;
6118
6119                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6120                                 PP_BLOCK_GFX_RLC,
6121                                 pp_support_state,
6122                                 pp_state);
6123                 amd_set_clockgating_by_smu(pp_handle, msg_id);
6124         }
6125
6126         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
6127                 pp_support_state = PP_STATE_SUPPORT_LS;
6128
6129                 if (state == AMD_CG_STATE_UNGATE)
6130                         pp_state = 0;
6131                 else
6132                         pp_state = PP_STATE_LS;
6133                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6134                         PP_BLOCK_GFX_CP,
6135                         pp_support_state,
6136                         pp_state);
6137                 amd_set_clockgating_by_smu(pp_handle, msg_id);
6138         }
6139
6140         return 0;
6141 }
6142
6143 static int gfx_v8_0_set_clockgating_state(void *handle,
6144                                           enum amd_clockgating_state state)
6145 {
6146         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
6147
6148         if (amdgpu_sriov_vf(adev))
6149                 return 0;
6150
6151         switch (adev->asic_type) {
6152         case CHIP_FIJI:
6153         case CHIP_CARRIZO:
6154         case CHIP_STONEY:
6155                 gfx_v8_0_update_gfx_clock_gating(adev,
6156                                                  state == AMD_CG_STATE_GATE);
6157                 break;
6158         case CHIP_TONGA:
6159                 gfx_v8_0_tonga_update_gfx_clock_gating(adev, state);
6160                 break;
6161         case CHIP_POLARIS10:
6162         case CHIP_POLARIS11:
6163         case CHIP_POLARIS12:
6164                 gfx_v8_0_polaris_update_gfx_clock_gating(adev, state);
6165                 break;
6166         default:
6167                 break;
6168         }
6169         return 0;
6170 }
6171
6172 static u64 gfx_v8_0_ring_get_rptr(struct amdgpu_ring *ring)
6173 {
6174         return ring->adev->wb.wb[ring->rptr_offs];
6175 }
6176
6177 static u64 gfx_v8_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
6178 {
6179         struct amdgpu_device *adev = ring->adev;
6180
6181         if (ring->use_doorbell)
6182                 /* XXX check if swapping is necessary on BE */
6183                 return ring->adev->wb.wb[ring->wptr_offs];
6184         else
6185                 return RREG32(mmCP_RB0_WPTR);
6186 }
6187
6188 static void gfx_v8_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
6189 {
6190         struct amdgpu_device *adev = ring->adev;
6191
6192         if (ring->use_doorbell) {
6193                 /* XXX check if swapping is necessary on BE */
6194                 adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
6195                 WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
6196         } else {
6197                 WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
6198                 (void)RREG32(mmCP_RB0_WPTR);
6199         }
6200 }
6201
6202 static void gfx_v8_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
6203 {
6204         u32 ref_and_mask, reg_mem_engine;
6205
6206         if ((ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) ||
6207             (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)) {
6208                 switch (ring->me) {
6209                 case 1:
6210                         ref_and_mask = GPU_HDP_FLUSH_DONE__CP2_MASK << ring->pipe;
6211                         break;
6212                 case 2:
6213                         ref_and_mask = GPU_HDP_FLUSH_DONE__CP6_MASK << ring->pipe;
6214                         break;
6215                 default:
6216                         return;
6217                 }
6218                 reg_mem_engine = 0;
6219         } else {
6220                 ref_and_mask = GPU_HDP_FLUSH_DONE__CP0_MASK;
6221                 reg_mem_engine = WAIT_REG_MEM_ENGINE(1); /* pfp */
6222         }
6223
6224         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6225         amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
6226                                  WAIT_REG_MEM_FUNCTION(3) |  /* == */
6227                                  reg_mem_engine));
6228         amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_REQ);
6229         amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_DONE);
6230         amdgpu_ring_write(ring, ref_and_mask);
6231         amdgpu_ring_write(ring, ref_and_mask);
6232         amdgpu_ring_write(ring, 0x20); /* poll interval */
6233 }
6234
6235 static void gfx_v8_0_ring_emit_vgt_flush(struct amdgpu_ring *ring)
6236 {
6237         amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
6238         amdgpu_ring_write(ring, EVENT_TYPE(VS_PARTIAL_FLUSH) |
6239                 EVENT_INDEX(4));
6240
6241         amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
6242         amdgpu_ring_write(ring, EVENT_TYPE(VGT_FLUSH) |
6243                 EVENT_INDEX(0));
6244 }
6245
6246
6247 static void gfx_v8_0_ring_emit_hdp_invalidate(struct amdgpu_ring *ring)
6248 {
6249         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6250         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6251                                  WRITE_DATA_DST_SEL(0) |
6252                                  WR_CONFIRM));
6253         amdgpu_ring_write(ring, mmHDP_DEBUG0);
6254         amdgpu_ring_write(ring, 0);
6255         amdgpu_ring_write(ring, 1);
6256
6257 }
6258
6259 static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
6260                                       struct amdgpu_ib *ib,
6261                                       unsigned vm_id, bool ctx_switch)
6262 {
6263         u32 header, control = 0;
6264
6265         if (ib->flags & AMDGPU_IB_FLAG_CE)
6266                 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
6267         else
6268                 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
6269
6270         control |= ib->length_dw | (vm_id << 24);
6271
6272         if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) {
6273                 control |= INDIRECT_BUFFER_PRE_ENB(1);
6274
6275                 if (!(ib->flags & AMDGPU_IB_FLAG_CE))
6276                         gfx_v8_0_ring_emit_de_meta(ring);
6277         }
6278
6279         amdgpu_ring_write(ring, header);
6280         amdgpu_ring_write(ring,
6281 #ifdef __BIG_ENDIAN
6282                           (2 << 0) |
6283 #endif
6284                           (ib->gpu_addr & 0xFFFFFFFC));
6285         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6286         amdgpu_ring_write(ring, control);
6287 }
6288
6289 static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
6290                                           struct amdgpu_ib *ib,
6291                                           unsigned vm_id, bool ctx_switch)
6292 {
6293         u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vm_id << 24);
6294
6295         amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
6296         amdgpu_ring_write(ring,
6297 #ifdef __BIG_ENDIAN
6298                                 (2 << 0) |
6299 #endif
6300                                 (ib->gpu_addr & 0xFFFFFFFC));
6301         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6302         amdgpu_ring_write(ring, control);
6303 }
6304
6305 static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr,
6306                                          u64 seq, unsigned flags)
6307 {
6308         bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6309         bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6310
6311         /* EVENT_WRITE_EOP - flush caches, send int */
6312         amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
6313         amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6314                                  EOP_TC_ACTION_EN |
6315                                  EOP_TC_WB_ACTION_EN |
6316                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6317                                  EVENT_INDEX(5)));
6318         amdgpu_ring_write(ring, addr & 0xfffffffc);
6319         amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
6320                           DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
6321         amdgpu_ring_write(ring, lower_32_bits(seq));
6322         amdgpu_ring_write(ring, upper_32_bits(seq));
6323
6324 }
6325
6326 static void gfx_v8_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
6327 {
6328         int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
6329         uint32_t seq = ring->fence_drv.sync_seq;
6330         uint64_t addr = ring->fence_drv.gpu_addr;
6331
6332         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6333         amdgpu_ring_write(ring, (WAIT_REG_MEM_MEM_SPACE(1) | /* memory */
6334                                  WAIT_REG_MEM_FUNCTION(3) | /* equal */
6335                                  WAIT_REG_MEM_ENGINE(usepfp))); /* pfp or me */
6336         amdgpu_ring_write(ring, addr & 0xfffffffc);
6337         amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
6338         amdgpu_ring_write(ring, seq);
6339         amdgpu_ring_write(ring, 0xffffffff);
6340         amdgpu_ring_write(ring, 4); /* poll interval */
6341 }
6342
6343 static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
6344                                         unsigned vm_id, uint64_t pd_addr)
6345 {
6346         int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
6347
6348         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6349         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
6350                                  WRITE_DATA_DST_SEL(0)) |
6351                                  WR_CONFIRM);
6352         if (vm_id < 8) {
6353                 amdgpu_ring_write(ring,
6354                                   (mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + vm_id));
6355         } else {
6356                 amdgpu_ring_write(ring,
6357                                   (mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vm_id - 8));
6358         }
6359         amdgpu_ring_write(ring, 0);
6360         amdgpu_ring_write(ring, pd_addr >> 12);
6361
6362         /* bits 0-15 are the VM contexts0-15 */
6363         /* invalidate the cache */
6364         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6365         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6366                                  WRITE_DATA_DST_SEL(0)));
6367         amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
6368         amdgpu_ring_write(ring, 0);
6369         amdgpu_ring_write(ring, 1 << vm_id);
6370
6371         /* wait for the invalidate to complete */
6372         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6373         amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
6374                                  WAIT_REG_MEM_FUNCTION(0) |  /* always */
6375                                  WAIT_REG_MEM_ENGINE(0))); /* me */
6376         amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
6377         amdgpu_ring_write(ring, 0);
6378         amdgpu_ring_write(ring, 0); /* ref */
6379         amdgpu_ring_write(ring, 0); /* mask */
6380         amdgpu_ring_write(ring, 0x20); /* poll interval */
6381
6382         /* compute doesn't have PFP */
6383         if (usepfp) {
6384                 /* sync PFP to ME, otherwise we might get invalid PFP reads */
6385                 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
6386                 amdgpu_ring_write(ring, 0x0);
6387         }
6388 }
6389
6390 static u64 gfx_v8_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
6391 {
6392         return ring->adev->wb.wb[ring->wptr_offs];
6393 }
6394
6395 static void gfx_v8_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
6396 {
6397         struct amdgpu_device *adev = ring->adev;
6398
6399         /* XXX check if swapping is necessary on BE */
6400         adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
6401         WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
6402 }
6403
6404 static void gfx_v8_0_ring_emit_fence_compute(struct amdgpu_ring *ring,
6405                                              u64 addr, u64 seq,
6406                                              unsigned flags)
6407 {
6408         bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6409         bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6410
6411         /* RELEASE_MEM - flush caches, send int */
6412         amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
6413         amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6414                                  EOP_TC_ACTION_EN |
6415                                  EOP_TC_WB_ACTION_EN |
6416                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6417                                  EVENT_INDEX(5)));
6418         amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
6419         amdgpu_ring_write(ring, addr & 0xfffffffc);
6420         amdgpu_ring_write(ring, upper_32_bits(addr));
6421         amdgpu_ring_write(ring, lower_32_bits(seq));
6422         amdgpu_ring_write(ring, upper_32_bits(seq));
6423 }
6424
6425 static void gfx_v8_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
6426                                          u64 seq, unsigned int flags)
6427 {
6428         /* we only allocate 32bit for each seq wb address */
6429         BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
6430
6431         /* write fence seq to the "addr" */
6432         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6433         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6434                                  WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
6435         amdgpu_ring_write(ring, lower_32_bits(addr));
6436         amdgpu_ring_write(ring, upper_32_bits(addr));
6437         amdgpu_ring_write(ring, lower_32_bits(seq));
6438
6439         if (flags & AMDGPU_FENCE_FLAG_INT) {
6440                 /* set register to trigger INT */
6441                 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6442                 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6443                                          WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
6444                 amdgpu_ring_write(ring, mmCPC_INT_STATUS);
6445                 amdgpu_ring_write(ring, 0);
6446                 amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
6447         }
6448 }
6449
6450 static void gfx_v8_ring_emit_sb(struct amdgpu_ring *ring)
6451 {
6452         amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
6453         amdgpu_ring_write(ring, 0);
6454 }
6455
6456 static void gfx_v8_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
6457 {
6458         uint32_t dw2 = 0;
6459
6460         if (amdgpu_sriov_vf(ring->adev))
6461                 gfx_v8_0_ring_emit_ce_meta(ring);
6462
6463         dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
6464         if (flags & AMDGPU_HAVE_CTX_SWITCH) {
6465                 gfx_v8_0_ring_emit_vgt_flush(ring);
6466                 /* set load_global_config & load_global_uconfig */
6467                 dw2 |= 0x8001;
6468                 /* set load_cs_sh_regs */
6469                 dw2 |= 0x01000000;
6470                 /* set load_per_context_state & load_gfx_sh_regs for GFX */
6471                 dw2 |= 0x10002;
6472
6473                 /* set load_ce_ram if preamble presented */
6474                 if (AMDGPU_PREAMBLE_IB_PRESENT & flags)
6475                         dw2 |= 0x10000000;
6476         } else {
6477                 /* still load_ce_ram if this is the first time preamble presented
6478                  * although there is no context switch happens.
6479                  */
6480                 if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags)
6481                         dw2 |= 0x10000000;
6482         }
6483
6484         amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
6485         amdgpu_ring_write(ring, dw2);
6486         amdgpu_ring_write(ring, 0);
6487 }
6488
6489 static unsigned gfx_v8_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring)
6490 {
6491         unsigned ret;
6492
6493         amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
6494         amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
6495         amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
6496         amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */
6497         ret = ring->wptr & ring->buf_mask;
6498         amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */
6499         return ret;
6500 }
6501
6502 static void gfx_v8_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset)
6503 {
6504         unsigned cur;
6505
6506         BUG_ON(offset > ring->buf_mask);
6507         BUG_ON(ring->ring[offset] != 0x55aa55aa);
6508
6509         cur = (ring->wptr & ring->buf_mask) - 1;
6510         if (likely(cur > offset))
6511                 ring->ring[offset] = cur - offset;
6512         else
6513                 ring->ring[offset] = (ring->ring_size >> 2) - offset + cur;
6514 }
6515
6516 static void gfx_v8_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg)
6517 {
6518         struct amdgpu_device *adev = ring->adev;
6519
6520         amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
6521         amdgpu_ring_write(ring, 0 |     /* src: register*/
6522                                 (5 << 8) |      /* dst: memory */
6523                                 (1 << 20));     /* write confirm */
6524         amdgpu_ring_write(ring, reg);
6525         amdgpu_ring_write(ring, 0);
6526         amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
6527                                 adev->virt.reg_val_offs * 4));
6528         amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
6529                                 adev->virt.reg_val_offs * 4));
6530 }
6531
6532 static void gfx_v8_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
6533                                   uint32_t val)
6534 {
6535         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6536         amdgpu_ring_write(ring, (1 << 16)); /* no inc addr */
6537         amdgpu_ring_write(ring, reg);
6538         amdgpu_ring_write(ring, 0);
6539         amdgpu_ring_write(ring, val);
6540 }
6541
6542 static void gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
6543                                                  enum amdgpu_interrupt_state state)
6544 {
6545         WREG32_FIELD(CP_INT_CNTL_RING0, TIME_STAMP_INT_ENABLE,
6546                      state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6547 }
6548
6549 static void gfx_v8_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
6550                                                      int me, int pipe,
6551                                                      enum amdgpu_interrupt_state state)
6552 {
6553         u32 mec_int_cntl, mec_int_cntl_reg;
6554
6555         /*
6556          * amdgpu controls only the first MEC. That's why this function only
6557          * handles the setting of interrupts for this specific MEC. All other
6558          * pipes' interrupts are set by amdkfd.
6559          */
6560
6561         if (me == 1) {
6562                 switch (pipe) {
6563                 case 0:
6564                         mec_int_cntl_reg = mmCP_ME1_PIPE0_INT_CNTL;
6565                         break;
6566                 case 1:
6567                         mec_int_cntl_reg = mmCP_ME1_PIPE1_INT_CNTL;
6568                         break;
6569                 case 2:
6570                         mec_int_cntl_reg = mmCP_ME1_PIPE2_INT_CNTL;
6571                         break;
6572                 case 3:
6573                         mec_int_cntl_reg = mmCP_ME1_PIPE3_INT_CNTL;
6574                         break;
6575                 default:
6576                         DRM_DEBUG("invalid pipe %d\n", pipe);
6577                         return;
6578                 }
6579         } else {
6580                 DRM_DEBUG("invalid me %d\n", me);
6581                 return;
6582         }
6583
6584         switch (state) {
6585         case AMDGPU_IRQ_STATE_DISABLE:
6586                 mec_int_cntl = RREG32(mec_int_cntl_reg);
6587                 mec_int_cntl &= ~CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK;
6588                 WREG32(mec_int_cntl_reg, mec_int_cntl);
6589                 break;
6590         case AMDGPU_IRQ_STATE_ENABLE:
6591                 mec_int_cntl = RREG32(mec_int_cntl_reg);
6592                 mec_int_cntl |= CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK;
6593                 WREG32(mec_int_cntl_reg, mec_int_cntl);
6594                 break;
6595         default:
6596                 break;
6597         }
6598 }
6599
6600 static int gfx_v8_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
6601                                              struct amdgpu_irq_src *source,
6602                                              unsigned type,
6603                                              enum amdgpu_interrupt_state state)
6604 {
6605         WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_REG_INT_ENABLE,
6606                      state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6607
6608         return 0;
6609 }
6610
6611 static int gfx_v8_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
6612                                               struct amdgpu_irq_src *source,
6613                                               unsigned type,
6614                                               enum amdgpu_interrupt_state state)
6615 {
6616         WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_INSTR_INT_ENABLE,
6617                      state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6618
6619         return 0;
6620 }
6621
6622 static int gfx_v8_0_set_eop_interrupt_state(struct amdgpu_device *adev,
6623                                             struct amdgpu_irq_src *src,
6624                                             unsigned type,
6625                                             enum amdgpu_interrupt_state state)
6626 {
6627         switch (type) {
6628         case AMDGPU_CP_IRQ_GFX_EOP:
6629                 gfx_v8_0_set_gfx_eop_interrupt_state(adev, state);
6630                 break;
6631         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
6632                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
6633                 break;
6634         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
6635                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
6636                 break;
6637         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
6638                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
6639                 break;
6640         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
6641                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
6642                 break;
6643         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
6644                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
6645                 break;
6646         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
6647                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
6648                 break;
6649         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
6650                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
6651                 break;
6652         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
6653                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
6654                 break;
6655         default:
6656                 break;
6657         }
6658         return 0;
6659 }
6660
6661 static int gfx_v8_0_eop_irq(struct amdgpu_device *adev,
6662                             struct amdgpu_irq_src *source,
6663                             struct amdgpu_iv_entry *entry)
6664 {
6665         int i;
6666         u8 me_id, pipe_id, queue_id;
6667         struct amdgpu_ring *ring;
6668
6669         DRM_DEBUG("IH: CP EOP\n");
6670         me_id = (entry->ring_id & 0x0c) >> 2;
6671         pipe_id = (entry->ring_id & 0x03) >> 0;
6672         queue_id = (entry->ring_id & 0x70) >> 4;
6673
6674         switch (me_id) {
6675         case 0:
6676                 amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
6677                 break;
6678         case 1:
6679         case 2:
6680                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
6681                         ring = &adev->gfx.compute_ring[i];
6682                         /* Per-queue interrupt is supported for MEC starting from VI.
6683                           * The interrupt can only be enabled/disabled per pipe instead of per queue.
6684                           */
6685                         if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
6686                                 amdgpu_fence_process(ring);
6687                 }
6688                 break;
6689         }
6690         return 0;
6691 }
6692
6693 static int gfx_v8_0_priv_reg_irq(struct amdgpu_device *adev,
6694                                  struct amdgpu_irq_src *source,
6695                                  struct amdgpu_iv_entry *entry)
6696 {
6697         DRM_ERROR("Illegal register access in command stream\n");
6698         schedule_work(&adev->reset_work);
6699         return 0;
6700 }
6701
6702 static int gfx_v8_0_priv_inst_irq(struct amdgpu_device *adev,
6703                                   struct amdgpu_irq_src *source,
6704                                   struct amdgpu_iv_entry *entry)
6705 {
6706         DRM_ERROR("Illegal instruction in command stream\n");
6707         schedule_work(&adev->reset_work);
6708         return 0;
6709 }
6710
6711 static int gfx_v8_0_kiq_set_interrupt_state(struct amdgpu_device *adev,
6712                                             struct amdgpu_irq_src *src,
6713                                             unsigned int type,
6714                                             enum amdgpu_interrupt_state state)
6715 {
6716         struct amdgpu_ring *ring = &(adev->gfx.kiq.ring);
6717
6718         switch (type) {
6719         case AMDGPU_CP_KIQ_IRQ_DRIVER0:
6720                 WREG32_FIELD(CPC_INT_CNTL, GENERIC2_INT_ENABLE,
6721                              state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6722                 if (ring->me == 1)
6723                         WREG32_FIELD_OFFSET(CP_ME1_PIPE0_INT_CNTL,
6724                                      ring->pipe,
6725                                      GENERIC2_INT_ENABLE,
6726                                      state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6727                 else
6728                         WREG32_FIELD_OFFSET(CP_ME2_PIPE0_INT_CNTL,
6729                                      ring->pipe,
6730                                      GENERIC2_INT_ENABLE,
6731                                      state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6732                 break;
6733         default:
6734                 BUG(); /* kiq only support GENERIC2_INT now */
6735                 break;
6736         }
6737         return 0;
6738 }
6739
6740 static int gfx_v8_0_kiq_irq(struct amdgpu_device *adev,
6741                             struct amdgpu_irq_src *source,
6742                             struct amdgpu_iv_entry *entry)
6743 {
6744         u8 me_id, pipe_id, queue_id;
6745         struct amdgpu_ring *ring = &(adev->gfx.kiq.ring);
6746
6747         me_id = (entry->ring_id & 0x0c) >> 2;
6748         pipe_id = (entry->ring_id & 0x03) >> 0;
6749         queue_id = (entry->ring_id & 0x70) >> 4;
6750         DRM_DEBUG("IH: CPC GENERIC2_INT, me:%d, pipe:%d, queue:%d\n",
6751                    me_id, pipe_id, queue_id);
6752
6753         amdgpu_fence_process(ring);
6754         return 0;
6755 }
6756
6757 static const struct amd_ip_funcs gfx_v8_0_ip_funcs = {
6758         .name = "gfx_v8_0",
6759         .early_init = gfx_v8_0_early_init,
6760         .late_init = gfx_v8_0_late_init,
6761         .sw_init = gfx_v8_0_sw_init,
6762         .sw_fini = gfx_v8_0_sw_fini,
6763         .hw_init = gfx_v8_0_hw_init,
6764         .hw_fini = gfx_v8_0_hw_fini,
6765         .suspend = gfx_v8_0_suspend,
6766         .resume = gfx_v8_0_resume,
6767         .is_idle = gfx_v8_0_is_idle,
6768         .wait_for_idle = gfx_v8_0_wait_for_idle,
6769         .check_soft_reset = gfx_v8_0_check_soft_reset,
6770         .pre_soft_reset = gfx_v8_0_pre_soft_reset,
6771         .soft_reset = gfx_v8_0_soft_reset,
6772         .post_soft_reset = gfx_v8_0_post_soft_reset,
6773         .set_clockgating_state = gfx_v8_0_set_clockgating_state,
6774         .set_powergating_state = gfx_v8_0_set_powergating_state,
6775         .get_clockgating_state = gfx_v8_0_get_clockgating_state,
6776 };
6777
6778 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
6779         .type = AMDGPU_RING_TYPE_GFX,
6780         .align_mask = 0xff,
6781         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6782         .support_64bit_ptrs = false,
6783         .get_rptr = gfx_v8_0_ring_get_rptr,
6784         .get_wptr = gfx_v8_0_ring_get_wptr_gfx,
6785         .set_wptr = gfx_v8_0_ring_set_wptr_gfx,
6786         .emit_frame_size = /* maximum 215dw if count 16 IBs in */
6787                 5 +  /* COND_EXEC */
6788                 7 +  /* PIPELINE_SYNC */
6789                 19 + /* VM_FLUSH */
6790                 8 +  /* FENCE for VM_FLUSH */
6791                 20 + /* GDS switch */
6792                 4 + /* double SWITCH_BUFFER,
6793                        the first COND_EXEC jump to the place just
6794                            prior to this double SWITCH_BUFFER  */
6795                 5 + /* COND_EXEC */
6796                 7 +      /*     HDP_flush */
6797                 4 +      /*     VGT_flush */
6798                 14 + /* CE_META */
6799                 31 + /* DE_META */
6800                 3 + /* CNTX_CTRL */
6801                 5 + /* HDP_INVL */
6802                 8 + 8 + /* FENCE x2 */
6803                 2, /* SWITCH_BUFFER */
6804         .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_gfx */
6805         .emit_ib = gfx_v8_0_ring_emit_ib_gfx,
6806         .emit_fence = gfx_v8_0_ring_emit_fence_gfx,
6807         .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
6808         .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
6809         .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
6810         .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
6811         .emit_hdp_invalidate = gfx_v8_0_ring_emit_hdp_invalidate,
6812         .test_ring = gfx_v8_0_ring_test_ring,
6813         .test_ib = gfx_v8_0_ring_test_ib,
6814         .insert_nop = amdgpu_ring_insert_nop,
6815         .pad_ib = amdgpu_ring_generic_pad_ib,
6816         .emit_switch_buffer = gfx_v8_ring_emit_sb,
6817         .emit_cntxcntl = gfx_v8_ring_emit_cntxcntl,
6818         .init_cond_exec = gfx_v8_0_ring_emit_init_cond_exec,
6819         .patch_cond_exec = gfx_v8_0_ring_emit_patch_cond_exec,
6820 };
6821
6822 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
6823         .type = AMDGPU_RING_TYPE_COMPUTE,
6824         .align_mask = 0xff,
6825         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6826         .support_64bit_ptrs = false,
6827         .get_rptr = gfx_v8_0_ring_get_rptr,
6828         .get_wptr = gfx_v8_0_ring_get_wptr_compute,
6829         .set_wptr = gfx_v8_0_ring_set_wptr_compute,
6830         .emit_frame_size =
6831                 20 + /* gfx_v8_0_ring_emit_gds_switch */
6832                 7 + /* gfx_v8_0_ring_emit_hdp_flush */
6833                 5 + /* gfx_v8_0_ring_emit_hdp_invalidate */
6834                 7 + /* gfx_v8_0_ring_emit_pipeline_sync */
6835                 17 + /* gfx_v8_0_ring_emit_vm_flush */
6836                 7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_compute x3 for user fence, vm fence */
6837         .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_compute */
6838         .emit_ib = gfx_v8_0_ring_emit_ib_compute,
6839         .emit_fence = gfx_v8_0_ring_emit_fence_compute,
6840         .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
6841         .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
6842         .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
6843         .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
6844         .emit_hdp_invalidate = gfx_v8_0_ring_emit_hdp_invalidate,
6845         .test_ring = gfx_v8_0_ring_test_ring,
6846         .test_ib = gfx_v8_0_ring_test_ib,
6847         .insert_nop = amdgpu_ring_insert_nop,
6848         .pad_ib = amdgpu_ring_generic_pad_ib,
6849 };
6850
6851 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_kiq = {
6852         .type = AMDGPU_RING_TYPE_KIQ,
6853         .align_mask = 0xff,
6854         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6855         .support_64bit_ptrs = false,
6856         .get_rptr = gfx_v8_0_ring_get_rptr,
6857         .get_wptr = gfx_v8_0_ring_get_wptr_compute,
6858         .set_wptr = gfx_v8_0_ring_set_wptr_compute,
6859         .emit_frame_size =
6860                 20 + /* gfx_v8_0_ring_emit_gds_switch */
6861                 7 + /* gfx_v8_0_ring_emit_hdp_flush */
6862                 5 + /* gfx_v8_0_ring_emit_hdp_invalidate */
6863                 7 + /* gfx_v8_0_ring_emit_pipeline_sync */
6864                 17 + /* gfx_v8_0_ring_emit_vm_flush */
6865                 7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_kiq x3 for user fence, vm fence */
6866         .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_compute */
6867         .emit_ib = gfx_v8_0_ring_emit_ib_compute,
6868         .emit_fence = gfx_v8_0_ring_emit_fence_kiq,
6869         .test_ring = gfx_v8_0_ring_test_ring,
6870         .test_ib = gfx_v8_0_ring_test_ib,
6871         .insert_nop = amdgpu_ring_insert_nop,
6872         .pad_ib = amdgpu_ring_generic_pad_ib,
6873         .emit_rreg = gfx_v8_0_ring_emit_rreg,
6874         .emit_wreg = gfx_v8_0_ring_emit_wreg,
6875 };
6876
6877 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev)
6878 {
6879         int i;
6880
6881         adev->gfx.kiq.ring.funcs = &gfx_v8_0_ring_funcs_kiq;
6882
6883         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
6884                 adev->gfx.gfx_ring[i].funcs = &gfx_v8_0_ring_funcs_gfx;
6885
6886         for (i = 0; i < adev->gfx.num_compute_rings; i++)
6887                 adev->gfx.compute_ring[i].funcs = &gfx_v8_0_ring_funcs_compute;
6888 }
6889
6890 static const struct amdgpu_irq_src_funcs gfx_v8_0_eop_irq_funcs = {
6891         .set = gfx_v8_0_set_eop_interrupt_state,
6892         .process = gfx_v8_0_eop_irq,
6893 };
6894
6895 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_reg_irq_funcs = {
6896         .set = gfx_v8_0_set_priv_reg_fault_state,
6897         .process = gfx_v8_0_priv_reg_irq,
6898 };
6899
6900 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_inst_irq_funcs = {
6901         .set = gfx_v8_0_set_priv_inst_fault_state,
6902         .process = gfx_v8_0_priv_inst_irq,
6903 };
6904
6905 static const struct amdgpu_irq_src_funcs gfx_v8_0_kiq_irq_funcs = {
6906         .set = gfx_v8_0_kiq_set_interrupt_state,
6907         .process = gfx_v8_0_kiq_irq,
6908 };
6909
6910 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev)
6911 {
6912         adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
6913         adev->gfx.eop_irq.funcs = &gfx_v8_0_eop_irq_funcs;
6914
6915         adev->gfx.priv_reg_irq.num_types = 1;
6916         adev->gfx.priv_reg_irq.funcs = &gfx_v8_0_priv_reg_irq_funcs;
6917
6918         adev->gfx.priv_inst_irq.num_types = 1;
6919         adev->gfx.priv_inst_irq.funcs = &gfx_v8_0_priv_inst_irq_funcs;
6920
6921         adev->gfx.kiq.irq.num_types = AMDGPU_CP_KIQ_IRQ_LAST;
6922         adev->gfx.kiq.irq.funcs = &gfx_v8_0_kiq_irq_funcs;
6923 }
6924
6925 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev)
6926 {
6927         adev->gfx.rlc.funcs = &iceland_rlc_funcs;
6928 }
6929
6930 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev)
6931 {
6932         /* init asci gds info */
6933         adev->gds.mem.total_size = RREG32(mmGDS_VMID0_SIZE);
6934         adev->gds.gws.total_size = 64;
6935         adev->gds.oa.total_size = 16;
6936
6937         if (adev->gds.mem.total_size == 64 * 1024) {
6938                 adev->gds.mem.gfx_partition_size = 4096;
6939                 adev->gds.mem.cs_partition_size = 4096;
6940
6941                 adev->gds.gws.gfx_partition_size = 4;
6942                 adev->gds.gws.cs_partition_size = 4;
6943
6944                 adev->gds.oa.gfx_partition_size = 4;
6945                 adev->gds.oa.cs_partition_size = 1;
6946         } else {
6947                 adev->gds.mem.gfx_partition_size = 1024;
6948                 adev->gds.mem.cs_partition_size = 1024;
6949
6950                 adev->gds.gws.gfx_partition_size = 16;
6951                 adev->gds.gws.cs_partition_size = 16;
6952
6953                 adev->gds.oa.gfx_partition_size = 4;
6954                 adev->gds.oa.cs_partition_size = 4;
6955         }
6956 }
6957
6958 static void gfx_v8_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
6959                                                  u32 bitmap)
6960 {
6961         u32 data;
6962
6963         if (!bitmap)
6964                 return;
6965
6966         data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
6967         data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
6968
6969         WREG32(mmGC_USER_SHADER_ARRAY_CONFIG, data);
6970 }
6971
6972 static u32 gfx_v8_0_get_cu_active_bitmap(struct amdgpu_device *adev)
6973 {
6974         u32 data, mask;
6975
6976         data =  RREG32(mmCC_GC_SHADER_ARRAY_CONFIG) |
6977                 RREG32(mmGC_USER_SHADER_ARRAY_CONFIG);
6978
6979         mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh);
6980
6981         return ~REG_GET_FIELD(data, CC_GC_SHADER_ARRAY_CONFIG, INACTIVE_CUS) & mask;
6982 }
6983
6984 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev)
6985 {
6986         int i, j, k, counter, active_cu_number = 0;
6987         u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
6988         struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
6989         unsigned disable_masks[4 * 2];
6990         u32 ao_cu_num;
6991
6992         memset(cu_info, 0, sizeof(*cu_info));
6993
6994         if (adev->flags & AMD_IS_APU)
6995                 ao_cu_num = 2;
6996         else
6997                 ao_cu_num = adev->gfx.config.max_cu_per_sh;
6998
6999         amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2);
7000
7001         mutex_lock(&adev->grbm_idx_mutex);
7002         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
7003                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
7004                         mask = 1;
7005                         ao_bitmap = 0;
7006                         counter = 0;
7007                         gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
7008                         if (i < 4 && j < 2)
7009                                 gfx_v8_0_set_user_cu_inactive_bitmap(
7010                                         adev, disable_masks[i * 2 + j]);
7011                         bitmap = gfx_v8_0_get_cu_active_bitmap(adev);
7012                         cu_info->bitmap[i][j] = bitmap;
7013
7014                         for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
7015                                 if (bitmap & mask) {
7016                                         if (counter < ao_cu_num)
7017                                                 ao_bitmap |= mask;
7018                                         counter ++;
7019                                 }
7020                                 mask <<= 1;
7021                         }
7022                         active_cu_number += counter;
7023                         if (i < 2 && j < 2)
7024                                 ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
7025                         cu_info->ao_cu_bitmap[i][j] = ao_bitmap;
7026                 }
7027         }
7028         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
7029         mutex_unlock(&adev->grbm_idx_mutex);
7030
7031         cu_info->number = active_cu_number;
7032         cu_info->ao_cu_mask = ao_cu_mask;
7033 }
7034
7035 const struct amdgpu_ip_block_version gfx_v8_0_ip_block =
7036 {
7037         .type = AMD_IP_BLOCK_TYPE_GFX,
7038         .major = 8,
7039         .minor = 0,
7040         .rev = 0,
7041         .funcs = &gfx_v8_0_ip_funcs,
7042 };
7043
7044 const struct amdgpu_ip_block_version gfx_v8_1_ip_block =
7045 {
7046         .type = AMD_IP_BLOCK_TYPE_GFX,
7047         .major = 8,
7048         .minor = 1,
7049         .rev = 0,
7050         .funcs = &gfx_v8_0_ip_funcs,
7051 };
7052
7053 static void gfx_v8_0_ring_emit_ce_meta(struct amdgpu_ring *ring)
7054 {
7055         uint64_t ce_payload_addr;
7056         int cnt_ce;
7057         static union {
7058                 struct vi_ce_ib_state regular;
7059                 struct vi_ce_ib_state_chained_ib chained;
7060         } ce_payload = {};
7061
7062         if (ring->adev->virt.chained_ib_support) {
7063                 ce_payload_addr = AMDGPU_VA_RESERVED_SIZE - 2 * 4096 +
7064                                                   offsetof(struct vi_gfx_meta_data_chained_ib, ce_payload);
7065                 cnt_ce = (sizeof(ce_payload.chained) >> 2) + 4 - 2;
7066         } else {
7067                 ce_payload_addr = AMDGPU_VA_RESERVED_SIZE - 2 * 4096 +
7068                                                   offsetof(struct vi_gfx_meta_data, ce_payload);
7069                 cnt_ce = (sizeof(ce_payload.regular) >> 2) + 4 - 2;
7070         }
7071
7072         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_ce));
7073         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
7074                                 WRITE_DATA_DST_SEL(8) |
7075                                 WR_CONFIRM) |
7076                                 WRITE_DATA_CACHE_POLICY(0));
7077         amdgpu_ring_write(ring, lower_32_bits(ce_payload_addr));
7078         amdgpu_ring_write(ring, upper_32_bits(ce_payload_addr));
7079         amdgpu_ring_write_multiple(ring, (void *)&ce_payload, cnt_ce - 2);
7080 }
7081
7082 static void gfx_v8_0_ring_emit_de_meta(struct amdgpu_ring *ring)
7083 {
7084         uint64_t de_payload_addr, gds_addr, csa_addr;
7085         int cnt_de;
7086         static union {
7087                 struct vi_de_ib_state regular;
7088                 struct vi_de_ib_state_chained_ib chained;
7089         } de_payload = {};
7090
7091         csa_addr = AMDGPU_VA_RESERVED_SIZE - 2 * 4096;
7092         gds_addr = csa_addr + 4096;
7093         if (ring->adev->virt.chained_ib_support) {
7094                 de_payload.chained.gds_backup_addrlo = lower_32_bits(gds_addr);
7095                 de_payload.chained.gds_backup_addrhi = upper_32_bits(gds_addr);
7096                 de_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data_chained_ib, de_payload);
7097                 cnt_de = (sizeof(de_payload.chained) >> 2) + 4 - 2;
7098         } else {
7099                 de_payload.regular.gds_backup_addrlo = lower_32_bits(gds_addr);
7100                 de_payload.regular.gds_backup_addrhi = upper_32_bits(gds_addr);
7101                 de_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data, de_payload);
7102                 cnt_de = (sizeof(de_payload.regular) >> 2) + 4 - 2;
7103         }
7104
7105         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_de));
7106         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
7107                                 WRITE_DATA_DST_SEL(8) |
7108                                 WR_CONFIRM) |
7109                                 WRITE_DATA_CACHE_POLICY(0));
7110         amdgpu_ring_write(ring, lower_32_bits(de_payload_addr));
7111         amdgpu_ring_write(ring, upper_32_bits(de_payload_addr));
7112         amdgpu_ring_write_multiple(ring, (void *)&de_payload, cnt_de - 2);
7113 }
This page took 0.475545 seconds and 4 git commands to generate.