]> Git Repo - linux.git/blob - drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
Merge remote-tracking branch 'airlied/drm-next' into drm-misc-next
[linux.git] / drivers / gpu / drm / amd / amdgpu / gfx_v8_0.c
1 /*
2  * Copyright 2014 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23 #include <linux/firmware.h>
24 #include <drm/drmP.h>
25 #include "amdgpu.h"
26 #include "amdgpu_gfx.h"
27 #include "vi.h"
28 #include "vi_structs.h"
29 #include "vid.h"
30 #include "amdgpu_ucode.h"
31 #include "amdgpu_atombios.h"
32 #include "atombios_i2c.h"
33 #include "clearstate_vi.h"
34
35 #include "gmc/gmc_8_2_d.h"
36 #include "gmc/gmc_8_2_sh_mask.h"
37
38 #include "oss/oss_3_0_d.h"
39 #include "oss/oss_3_0_sh_mask.h"
40
41 #include "bif/bif_5_0_d.h"
42 #include "bif/bif_5_0_sh_mask.h"
43
44 #include "gca/gfx_8_0_d.h"
45 #include "gca/gfx_8_0_enum.h"
46 #include "gca/gfx_8_0_sh_mask.h"
47 #include "gca/gfx_8_0_enum.h"
48
49 #include "dce/dce_10_0_d.h"
50 #include "dce/dce_10_0_sh_mask.h"
51
52 #include "smu/smu_7_1_3_d.h"
53
54 #define GFX8_NUM_GFX_RINGS     1
55 #define GFX8_NUM_COMPUTE_RINGS 8
56
57 #define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001
58 #define CARRIZO_GB_ADDR_CONFIG_GOLDEN 0x22010001
59 #define POLARIS11_GB_ADDR_CONFIG_GOLDEN 0x22011002
60 #define TONGA_GB_ADDR_CONFIG_GOLDEN 0x22011003
61
62 #define ARRAY_MODE(x)                                   ((x) << GB_TILE_MODE0__ARRAY_MODE__SHIFT)
63 #define PIPE_CONFIG(x)                                  ((x) << GB_TILE_MODE0__PIPE_CONFIG__SHIFT)
64 #define TILE_SPLIT(x)                                   ((x) << GB_TILE_MODE0__TILE_SPLIT__SHIFT)
65 #define MICRO_TILE_MODE_NEW(x)                          ((x) << GB_TILE_MODE0__MICRO_TILE_MODE_NEW__SHIFT)
66 #define SAMPLE_SPLIT(x)                                 ((x) << GB_TILE_MODE0__SAMPLE_SPLIT__SHIFT)
67 #define BANK_WIDTH(x)                                   ((x) << GB_MACROTILE_MODE0__BANK_WIDTH__SHIFT)
68 #define BANK_HEIGHT(x)                                  ((x) << GB_MACROTILE_MODE0__BANK_HEIGHT__SHIFT)
69 #define MACRO_TILE_ASPECT(x)                            ((x) << GB_MACROTILE_MODE0__MACRO_TILE_ASPECT__SHIFT)
70 #define NUM_BANKS(x)                                    ((x) << GB_MACROTILE_MODE0__NUM_BANKS__SHIFT)
71
72 #define RLC_CGTT_MGCG_OVERRIDE__CPF_MASK            0x00000001L
73 #define RLC_CGTT_MGCG_OVERRIDE__RLC_MASK            0x00000002L
74 #define RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK           0x00000004L
75 #define RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK           0x00000008L
76 #define RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK           0x00000010L
77 #define RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK           0x00000020L
78
79 /* BPM SERDES CMD */
80 #define SET_BPM_SERDES_CMD    1
81 #define CLE_BPM_SERDES_CMD    0
82
83 /* BPM Register Address*/
84 enum {
85         BPM_REG_CGLS_EN = 0,        /* Enable/Disable CGLS */
86         BPM_REG_CGLS_ON,            /* ON/OFF CGLS: shall be controlled by RLC FW */
87         BPM_REG_CGCG_OVERRIDE,      /* Set/Clear CGCG Override */
88         BPM_REG_MGCG_OVERRIDE,      /* Set/Clear MGCG Override */
89         BPM_REG_FGCG_OVERRIDE,      /* Set/Clear FGCG Override */
90         BPM_REG_FGCG_MAX
91 };
92
93 #define RLC_FormatDirectRegListLength        14
94
95 MODULE_FIRMWARE("amdgpu/carrizo_ce.bin");
96 MODULE_FIRMWARE("amdgpu/carrizo_pfp.bin");
97 MODULE_FIRMWARE("amdgpu/carrizo_me.bin");
98 MODULE_FIRMWARE("amdgpu/carrizo_mec.bin");
99 MODULE_FIRMWARE("amdgpu/carrizo_mec2.bin");
100 MODULE_FIRMWARE("amdgpu/carrizo_rlc.bin");
101
102 MODULE_FIRMWARE("amdgpu/stoney_ce.bin");
103 MODULE_FIRMWARE("amdgpu/stoney_pfp.bin");
104 MODULE_FIRMWARE("amdgpu/stoney_me.bin");
105 MODULE_FIRMWARE("amdgpu/stoney_mec.bin");
106 MODULE_FIRMWARE("amdgpu/stoney_rlc.bin");
107
108 MODULE_FIRMWARE("amdgpu/tonga_ce.bin");
109 MODULE_FIRMWARE("amdgpu/tonga_pfp.bin");
110 MODULE_FIRMWARE("amdgpu/tonga_me.bin");
111 MODULE_FIRMWARE("amdgpu/tonga_mec.bin");
112 MODULE_FIRMWARE("amdgpu/tonga_mec2.bin");
113 MODULE_FIRMWARE("amdgpu/tonga_rlc.bin");
114
115 MODULE_FIRMWARE("amdgpu/topaz_ce.bin");
116 MODULE_FIRMWARE("amdgpu/topaz_pfp.bin");
117 MODULE_FIRMWARE("amdgpu/topaz_me.bin");
118 MODULE_FIRMWARE("amdgpu/topaz_mec.bin");
119 MODULE_FIRMWARE("amdgpu/topaz_rlc.bin");
120
121 MODULE_FIRMWARE("amdgpu/fiji_ce.bin");
122 MODULE_FIRMWARE("amdgpu/fiji_pfp.bin");
123 MODULE_FIRMWARE("amdgpu/fiji_me.bin");
124 MODULE_FIRMWARE("amdgpu/fiji_mec.bin");
125 MODULE_FIRMWARE("amdgpu/fiji_mec2.bin");
126 MODULE_FIRMWARE("amdgpu/fiji_rlc.bin");
127
128 MODULE_FIRMWARE("amdgpu/polaris11_ce.bin");
129 MODULE_FIRMWARE("amdgpu/polaris11_pfp.bin");
130 MODULE_FIRMWARE("amdgpu/polaris11_me.bin");
131 MODULE_FIRMWARE("amdgpu/polaris11_mec.bin");
132 MODULE_FIRMWARE("amdgpu/polaris11_mec2.bin");
133 MODULE_FIRMWARE("amdgpu/polaris11_rlc.bin");
134
135 MODULE_FIRMWARE("amdgpu/polaris10_ce.bin");
136 MODULE_FIRMWARE("amdgpu/polaris10_pfp.bin");
137 MODULE_FIRMWARE("amdgpu/polaris10_me.bin");
138 MODULE_FIRMWARE("amdgpu/polaris10_mec.bin");
139 MODULE_FIRMWARE("amdgpu/polaris10_mec2.bin");
140 MODULE_FIRMWARE("amdgpu/polaris10_rlc.bin");
141
142 MODULE_FIRMWARE("amdgpu/polaris12_ce.bin");
143 MODULE_FIRMWARE("amdgpu/polaris12_pfp.bin");
144 MODULE_FIRMWARE("amdgpu/polaris12_me.bin");
145 MODULE_FIRMWARE("amdgpu/polaris12_mec.bin");
146 MODULE_FIRMWARE("amdgpu/polaris12_mec2.bin");
147 MODULE_FIRMWARE("amdgpu/polaris12_rlc.bin");
148
149 static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] =
150 {
151         {mmGDS_VMID0_BASE, mmGDS_VMID0_SIZE, mmGDS_GWS_VMID0, mmGDS_OA_VMID0},
152         {mmGDS_VMID1_BASE, mmGDS_VMID1_SIZE, mmGDS_GWS_VMID1, mmGDS_OA_VMID1},
153         {mmGDS_VMID2_BASE, mmGDS_VMID2_SIZE, mmGDS_GWS_VMID2, mmGDS_OA_VMID2},
154         {mmGDS_VMID3_BASE, mmGDS_VMID3_SIZE, mmGDS_GWS_VMID3, mmGDS_OA_VMID3},
155         {mmGDS_VMID4_BASE, mmGDS_VMID4_SIZE, mmGDS_GWS_VMID4, mmGDS_OA_VMID4},
156         {mmGDS_VMID5_BASE, mmGDS_VMID5_SIZE, mmGDS_GWS_VMID5, mmGDS_OA_VMID5},
157         {mmGDS_VMID6_BASE, mmGDS_VMID6_SIZE, mmGDS_GWS_VMID6, mmGDS_OA_VMID6},
158         {mmGDS_VMID7_BASE, mmGDS_VMID7_SIZE, mmGDS_GWS_VMID7, mmGDS_OA_VMID7},
159         {mmGDS_VMID8_BASE, mmGDS_VMID8_SIZE, mmGDS_GWS_VMID8, mmGDS_OA_VMID8},
160         {mmGDS_VMID9_BASE, mmGDS_VMID9_SIZE, mmGDS_GWS_VMID9, mmGDS_OA_VMID9},
161         {mmGDS_VMID10_BASE, mmGDS_VMID10_SIZE, mmGDS_GWS_VMID10, mmGDS_OA_VMID10},
162         {mmGDS_VMID11_BASE, mmGDS_VMID11_SIZE, mmGDS_GWS_VMID11, mmGDS_OA_VMID11},
163         {mmGDS_VMID12_BASE, mmGDS_VMID12_SIZE, mmGDS_GWS_VMID12, mmGDS_OA_VMID12},
164         {mmGDS_VMID13_BASE, mmGDS_VMID13_SIZE, mmGDS_GWS_VMID13, mmGDS_OA_VMID13},
165         {mmGDS_VMID14_BASE, mmGDS_VMID14_SIZE, mmGDS_GWS_VMID14, mmGDS_OA_VMID14},
166         {mmGDS_VMID15_BASE, mmGDS_VMID15_SIZE, mmGDS_GWS_VMID15, mmGDS_OA_VMID15}
167 };
168
169 static const u32 golden_settings_tonga_a11[] =
170 {
171         mmCB_HW_CONTROL, 0xfffdf3cf, 0x00007208,
172         mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
173         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
174         mmGB_GPU_ID, 0x0000000f, 0x00000000,
175         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
176         mmPA_SC_FIFO_DEPTH_CNTL, 0x000003ff, 0x000000fc,
177         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
178         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
179         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
180         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
181         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
182         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
183         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000002fb,
184         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x0000543b,
185         mmTCP_CHAN_STEER_LO, 0xffffffff, 0xa9210876,
186         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
187 };
188
189 static const u32 tonga_golden_common_all[] =
190 {
191         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
192         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
193         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
194         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
195         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
196         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
197         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
198         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
199 };
200
201 static const u32 tonga_mgcg_cgcg_init[] =
202 {
203         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
204         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
205         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
206         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
207         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
208         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
209         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
210         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
211         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
212         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
213         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
214         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
215         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
216         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
217         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
218         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
219         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
220         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
221         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
222         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
223         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
224         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
225         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
226         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
227         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
228         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
229         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
230         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
231         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
232         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
233         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
234         mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
235         mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
236         mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
237         mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
238         mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
239         mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
240         mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
241         mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
242         mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
243         mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
244         mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
245         mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
246         mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
247         mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
248         mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
249         mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
250         mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
251         mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
252         mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
253         mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
254         mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
255         mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
256         mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
257         mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
258         mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
259         mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
260         mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
261         mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
262         mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
263         mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
264         mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
265         mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
266         mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
267         mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
268         mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
269         mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
270         mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
271         mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
272         mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
273         mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
274         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
275         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
276         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
277         mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
278 };
279
280 static const u32 golden_settings_polaris11_a11[] =
281 {
282         mmCB_HW_CONTROL, 0x0000f3cf, 0x00007208,
283         mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
284         mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
285         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
286         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
287         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
288         mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
289         mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
290         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
291         mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
292         mmSQ_CONFIG, 0x07f80000, 0x01180000,
293         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
294         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
295         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f3,
296         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
297         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003210,
298         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
299 };
300
301 static const u32 polaris11_golden_common_all[] =
302 {
303         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
304         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011002,
305         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
306         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
307         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
308         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
309 };
310
311 static const u32 golden_settings_polaris10_a11[] =
312 {
313         mmATC_MISC_CG, 0x000c0fc0, 0x000c0200,
314         mmCB_HW_CONTROL, 0x0001f3cf, 0x00007208,
315         mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
316         mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
317         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
318         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
319         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
320         mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
321         mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002a,
322         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
323         mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
324         mmSQ_CONFIG, 0x07f80000, 0x07180000,
325         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
326         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
327         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7,
328         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
329         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
330 };
331
332 static const u32 polaris10_golden_common_all[] =
333 {
334         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
335         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
336         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
337         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
338         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
339         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
340         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
341         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
342 };
343
344 static const u32 fiji_golden_common_all[] =
345 {
346         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
347         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x3a00161a,
348         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002e,
349         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
350         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
351         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
352         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
353         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
354         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
355         mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x00000009,
356 };
357
358 static const u32 golden_settings_fiji_a10[] =
359 {
360         mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
361         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
362         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
363         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
364         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
365         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
366         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
367         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
368         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
369         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000ff,
370         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
371 };
372
373 static const u32 fiji_mgcg_cgcg_init[] =
374 {
375         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
376         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
377         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
378         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
379         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
380         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
381         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
382         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
383         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
384         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
385         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
386         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
387         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
388         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
389         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
390         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
391         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
392         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
393         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
394         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
395         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
396         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
397         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
398         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
399         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
400         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
401         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
402         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
403         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
404         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
405         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
406         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
407         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
408         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
409         mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
410 };
411
412 static const u32 golden_settings_iceland_a11[] =
413 {
414         mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
415         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
416         mmDB_DEBUG3, 0xc0000000, 0xc0000000,
417         mmGB_GPU_ID, 0x0000000f, 0x00000000,
418         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
419         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
420         mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x00000002,
421         mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
422         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
423         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
424         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
425         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
426         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
427         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f1,
428         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
429         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010,
430 };
431
432 static const u32 iceland_golden_common_all[] =
433 {
434         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
435         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
436         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
437         mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
438         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
439         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
440         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
441         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
442 };
443
444 static const u32 iceland_mgcg_cgcg_init[] =
445 {
446         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
447         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
448         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
449         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
450         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0xc0000100,
451         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0xc0000100,
452         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0xc0000100,
453         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
454         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
455         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
456         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
457         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
458         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
459         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
460         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
461         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
462         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
463         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
464         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
465         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
466         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
467         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
468         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0xff000100,
469         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
470         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
471         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
472         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
473         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
474         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
475         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
476         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
477         mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
478         mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
479         mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
480         mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
481         mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
482         mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
483         mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
484         mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
485         mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
486         mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
487         mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
488         mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
489         mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
490         mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
491         mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
492         mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
493         mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
494         mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
495         mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
496         mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
497         mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
498         mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
499         mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
500         mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
501         mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
502         mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
503         mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
504         mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
505         mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
506         mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
507         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
508         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
509         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
510 };
511
512 static const u32 cz_golden_settings_a11[] =
513 {
514         mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
515         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
516         mmGB_GPU_ID, 0x0000000f, 0x00000000,
517         mmPA_SC_ENHANCE, 0xffffffff, 0x00000001,
518         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
519         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
520         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
521         mmTA_CNTL_AUX, 0x000f000f, 0x00010000,
522         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
523         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
524         mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f3,
525         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00001302
526 };
527
528 static const u32 cz_golden_common_all[] =
529 {
530         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
531         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
532         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
533         mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
534         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
535         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
536         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
537         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
538 };
539
540 static const u32 cz_mgcg_cgcg_init[] =
541 {
542         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
543         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
544         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
545         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
546         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
547         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
548         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x00000100,
549         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
550         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
551         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
552         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
553         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
554         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
555         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
556         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
557         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
558         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
559         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
560         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
561         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
562         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
563         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
564         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
565         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
566         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
567         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
568         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
569         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
570         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
571         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
572         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
573         mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
574         mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
575         mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
576         mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
577         mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
578         mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
579         mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
580         mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
581         mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
582         mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
583         mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
584         mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
585         mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
586         mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
587         mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
588         mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
589         mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
590         mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
591         mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
592         mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
593         mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
594         mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
595         mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
596         mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
597         mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
598         mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
599         mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
600         mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
601         mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
602         mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
603         mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
604         mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
605         mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
606         mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
607         mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
608         mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
609         mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
610         mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
611         mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
612         mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
613         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
614         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
615         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
616         mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
617 };
618
619 static const u32 stoney_golden_settings_a11[] =
620 {
621         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
622         mmGB_GPU_ID, 0x0000000f, 0x00000000,
623         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
624         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
625         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
626         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
627         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
628         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
629         mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f1,
630         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x10101010,
631 };
632
633 static const u32 stoney_golden_common_all[] =
634 {
635         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
636         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000000,
637         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
638         mmGB_ADDR_CONFIG, 0xffffffff, 0x12010001,
639         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
640         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
641         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
642         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
643 };
644
645 static const u32 stoney_mgcg_cgcg_init[] =
646 {
647         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
648         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
649         mmCP_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
650         mmRLC_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
651         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96940200,
652 };
653
654 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev);
655 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev);
656 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev);
657 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev);
658 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev);
659 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev);
660 static void gfx_v8_0_ring_emit_ce_meta_init(struct amdgpu_ring *ring, uint64_t addr);
661 static void gfx_v8_0_ring_emit_de_meta_init(struct amdgpu_ring *ring, uint64_t addr);
662 static int gfx_v8_0_compute_mqd_sw_init(struct amdgpu_device *adev);
663 static void gfx_v8_0_compute_mqd_sw_fini(struct amdgpu_device *adev);
664
665 static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev)
666 {
667         switch (adev->asic_type) {
668         case CHIP_TOPAZ:
669                 amdgpu_program_register_sequence(adev,
670                                                  iceland_mgcg_cgcg_init,
671                                                  (const u32)ARRAY_SIZE(iceland_mgcg_cgcg_init));
672                 amdgpu_program_register_sequence(adev,
673                                                  golden_settings_iceland_a11,
674                                                  (const u32)ARRAY_SIZE(golden_settings_iceland_a11));
675                 amdgpu_program_register_sequence(adev,
676                                                  iceland_golden_common_all,
677                                                  (const u32)ARRAY_SIZE(iceland_golden_common_all));
678                 break;
679         case CHIP_FIJI:
680                 amdgpu_program_register_sequence(adev,
681                                                  fiji_mgcg_cgcg_init,
682                                                  (const u32)ARRAY_SIZE(fiji_mgcg_cgcg_init));
683                 amdgpu_program_register_sequence(adev,
684                                                  golden_settings_fiji_a10,
685                                                  (const u32)ARRAY_SIZE(golden_settings_fiji_a10));
686                 amdgpu_program_register_sequence(adev,
687                                                  fiji_golden_common_all,
688                                                  (const u32)ARRAY_SIZE(fiji_golden_common_all));
689                 break;
690
691         case CHIP_TONGA:
692                 amdgpu_program_register_sequence(adev,
693                                                  tonga_mgcg_cgcg_init,
694                                                  (const u32)ARRAY_SIZE(tonga_mgcg_cgcg_init));
695                 amdgpu_program_register_sequence(adev,
696                                                  golden_settings_tonga_a11,
697                                                  (const u32)ARRAY_SIZE(golden_settings_tonga_a11));
698                 amdgpu_program_register_sequence(adev,
699                                                  tonga_golden_common_all,
700                                                  (const u32)ARRAY_SIZE(tonga_golden_common_all));
701                 break;
702         case CHIP_POLARIS11:
703         case CHIP_POLARIS12:
704                 amdgpu_program_register_sequence(adev,
705                                                  golden_settings_polaris11_a11,
706                                                  (const u32)ARRAY_SIZE(golden_settings_polaris11_a11));
707                 amdgpu_program_register_sequence(adev,
708                                                  polaris11_golden_common_all,
709                                                  (const u32)ARRAY_SIZE(polaris11_golden_common_all));
710                 break;
711         case CHIP_POLARIS10:
712                 amdgpu_program_register_sequence(adev,
713                                                  golden_settings_polaris10_a11,
714                                                  (const u32)ARRAY_SIZE(golden_settings_polaris10_a11));
715                 amdgpu_program_register_sequence(adev,
716                                                  polaris10_golden_common_all,
717                                                  (const u32)ARRAY_SIZE(polaris10_golden_common_all));
718                 WREG32_SMC(ixCG_ACLK_CNTL, 0x0000001C);
719                 if (adev->pdev->revision == 0xc7 &&
720                     ((adev->pdev->subsystem_device == 0xb37 && adev->pdev->subsystem_vendor == 0x1002) ||
721                      (adev->pdev->subsystem_device == 0x4a8 && adev->pdev->subsystem_vendor == 0x1043) ||
722                      (adev->pdev->subsystem_device == 0x9480 && adev->pdev->subsystem_vendor == 0x1682))) {
723                         amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1E, 0xDD);
724                         amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1F, 0xD0);
725                 }
726                 break;
727         case CHIP_CARRIZO:
728                 amdgpu_program_register_sequence(adev,
729                                                  cz_mgcg_cgcg_init,
730                                                  (const u32)ARRAY_SIZE(cz_mgcg_cgcg_init));
731                 amdgpu_program_register_sequence(adev,
732                                                  cz_golden_settings_a11,
733                                                  (const u32)ARRAY_SIZE(cz_golden_settings_a11));
734                 amdgpu_program_register_sequence(adev,
735                                                  cz_golden_common_all,
736                                                  (const u32)ARRAY_SIZE(cz_golden_common_all));
737                 break;
738         case CHIP_STONEY:
739                 amdgpu_program_register_sequence(adev,
740                                                  stoney_mgcg_cgcg_init,
741                                                  (const u32)ARRAY_SIZE(stoney_mgcg_cgcg_init));
742                 amdgpu_program_register_sequence(adev,
743                                                  stoney_golden_settings_a11,
744                                                  (const u32)ARRAY_SIZE(stoney_golden_settings_a11));
745                 amdgpu_program_register_sequence(adev,
746                                                  stoney_golden_common_all,
747                                                  (const u32)ARRAY_SIZE(stoney_golden_common_all));
748                 break;
749         default:
750                 break;
751         }
752 }
753
754 static void gfx_v8_0_scratch_init(struct amdgpu_device *adev)
755 {
756         adev->gfx.scratch.num_reg = 7;
757         adev->gfx.scratch.reg_base = mmSCRATCH_REG0;
758         adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1;
759 }
760
761 static int gfx_v8_0_ring_test_ring(struct amdgpu_ring *ring)
762 {
763         struct amdgpu_device *adev = ring->adev;
764         uint32_t scratch;
765         uint32_t tmp = 0;
766         unsigned i;
767         int r;
768
769         r = amdgpu_gfx_scratch_get(adev, &scratch);
770         if (r) {
771                 DRM_ERROR("amdgpu: cp failed to get scratch reg (%d).\n", r);
772                 return r;
773         }
774         WREG32(scratch, 0xCAFEDEAD);
775         r = amdgpu_ring_alloc(ring, 3);
776         if (r) {
777                 DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n",
778                           ring->idx, r);
779                 amdgpu_gfx_scratch_free(adev, scratch);
780                 return r;
781         }
782         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
783         amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
784         amdgpu_ring_write(ring, 0xDEADBEEF);
785         amdgpu_ring_commit(ring);
786
787         for (i = 0; i < adev->usec_timeout; i++) {
788                 tmp = RREG32(scratch);
789                 if (tmp == 0xDEADBEEF)
790                         break;
791                 DRM_UDELAY(1);
792         }
793         if (i < adev->usec_timeout) {
794                 DRM_INFO("ring test on %d succeeded in %d usecs\n",
795                          ring->idx, i);
796         } else {
797                 DRM_ERROR("amdgpu: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
798                           ring->idx, scratch, tmp);
799                 r = -EINVAL;
800         }
801         amdgpu_gfx_scratch_free(adev, scratch);
802         return r;
803 }
804
805 static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
806 {
807         struct amdgpu_device *adev = ring->adev;
808         struct amdgpu_ib ib;
809         struct dma_fence *f = NULL;
810         uint32_t scratch;
811         uint32_t tmp = 0;
812         long r;
813
814         r = amdgpu_gfx_scratch_get(adev, &scratch);
815         if (r) {
816                 DRM_ERROR("amdgpu: failed to get scratch reg (%ld).\n", r);
817                 return r;
818         }
819         WREG32(scratch, 0xCAFEDEAD);
820         memset(&ib, 0, sizeof(ib));
821         r = amdgpu_ib_get(adev, NULL, 256, &ib);
822         if (r) {
823                 DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
824                 goto err1;
825         }
826         ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
827         ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START));
828         ib.ptr[2] = 0xDEADBEEF;
829         ib.length_dw = 3;
830
831         r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
832         if (r)
833                 goto err2;
834
835         r = dma_fence_wait_timeout(f, false, timeout);
836         if (r == 0) {
837                 DRM_ERROR("amdgpu: IB test timed out.\n");
838                 r = -ETIMEDOUT;
839                 goto err2;
840         } else if (r < 0) {
841                 DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
842                 goto err2;
843         }
844         tmp = RREG32(scratch);
845         if (tmp == 0xDEADBEEF) {
846                 DRM_INFO("ib test on ring %d succeeded\n", ring->idx);
847                 r = 0;
848         } else {
849                 DRM_ERROR("amdgpu: ib test failed (scratch(0x%04X)=0x%08X)\n",
850                           scratch, tmp);
851                 r = -EINVAL;
852         }
853 err2:
854         amdgpu_ib_free(adev, &ib, NULL);
855         dma_fence_put(f);
856 err1:
857         amdgpu_gfx_scratch_free(adev, scratch);
858         return r;
859 }
860
861
862 static void gfx_v8_0_free_microcode(struct amdgpu_device *adev) {
863         release_firmware(adev->gfx.pfp_fw);
864         adev->gfx.pfp_fw = NULL;
865         release_firmware(adev->gfx.me_fw);
866         adev->gfx.me_fw = NULL;
867         release_firmware(adev->gfx.ce_fw);
868         adev->gfx.ce_fw = NULL;
869         release_firmware(adev->gfx.rlc_fw);
870         adev->gfx.rlc_fw = NULL;
871         release_firmware(adev->gfx.mec_fw);
872         adev->gfx.mec_fw = NULL;
873         if ((adev->asic_type != CHIP_STONEY) &&
874             (adev->asic_type != CHIP_TOPAZ))
875                 release_firmware(adev->gfx.mec2_fw);
876         adev->gfx.mec2_fw = NULL;
877
878         kfree(adev->gfx.rlc.register_list_format);
879 }
880
881 static int gfx_v8_0_init_microcode(struct amdgpu_device *adev)
882 {
883         const char *chip_name;
884         char fw_name[30];
885         int err;
886         struct amdgpu_firmware_info *info = NULL;
887         const struct common_firmware_header *header = NULL;
888         const struct gfx_firmware_header_v1_0 *cp_hdr;
889         const struct rlc_firmware_header_v2_0 *rlc_hdr;
890         unsigned int *tmp = NULL, i;
891
892         DRM_DEBUG("\n");
893
894         switch (adev->asic_type) {
895         case CHIP_TOPAZ:
896                 chip_name = "topaz";
897                 break;
898         case CHIP_TONGA:
899                 chip_name = "tonga";
900                 break;
901         case CHIP_CARRIZO:
902                 chip_name = "carrizo";
903                 break;
904         case CHIP_FIJI:
905                 chip_name = "fiji";
906                 break;
907         case CHIP_POLARIS11:
908                 chip_name = "polaris11";
909                 break;
910         case CHIP_POLARIS10:
911                 chip_name = "polaris10";
912                 break;
913         case CHIP_POLARIS12:
914                 chip_name = "polaris12";
915                 break;
916         case CHIP_STONEY:
917                 chip_name = "stoney";
918                 break;
919         default:
920                 BUG();
921         }
922
923         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
924         err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
925         if (err)
926                 goto out;
927         err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
928         if (err)
929                 goto out;
930         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
931         adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
932         adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
933
934         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
935         err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
936         if (err)
937                 goto out;
938         err = amdgpu_ucode_validate(adev->gfx.me_fw);
939         if (err)
940                 goto out;
941         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
942         adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
943
944         /* chain ib ucode isn't formal released, just disable it by far
945          * TODO: when ucod ready we should use ucode version to judge if
946          * chain-ib support or not.
947          */
948         adev->virt.chained_ib_support = false;
949
950         adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
951
952         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
953         err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
954         if (err)
955                 goto out;
956         err = amdgpu_ucode_validate(adev->gfx.ce_fw);
957         if (err)
958                 goto out;
959         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
960         adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
961         adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
962
963         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
964         err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
965         if (err)
966                 goto out;
967         err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
968         rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
969         adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
970         adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
971
972         adev->gfx.rlc.save_and_restore_offset =
973                         le32_to_cpu(rlc_hdr->save_and_restore_offset);
974         adev->gfx.rlc.clear_state_descriptor_offset =
975                         le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
976         adev->gfx.rlc.avail_scratch_ram_locations =
977                         le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
978         adev->gfx.rlc.reg_restore_list_size =
979                         le32_to_cpu(rlc_hdr->reg_restore_list_size);
980         adev->gfx.rlc.reg_list_format_start =
981                         le32_to_cpu(rlc_hdr->reg_list_format_start);
982         adev->gfx.rlc.reg_list_format_separate_start =
983                         le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
984         adev->gfx.rlc.starting_offsets_start =
985                         le32_to_cpu(rlc_hdr->starting_offsets_start);
986         adev->gfx.rlc.reg_list_format_size_bytes =
987                         le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
988         adev->gfx.rlc.reg_list_size_bytes =
989                         le32_to_cpu(rlc_hdr->reg_list_size_bytes);
990
991         adev->gfx.rlc.register_list_format =
992                         kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
993                                         adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
994
995         if (!adev->gfx.rlc.register_list_format) {
996                 err = -ENOMEM;
997                 goto out;
998         }
999
1000         tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1001                         le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
1002         for (i = 0 ; i < (rlc_hdr->reg_list_format_size_bytes >> 2); i++)
1003                 adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]);
1004
1005         adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
1006
1007         tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1008                         le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
1009         for (i = 0 ; i < (rlc_hdr->reg_list_size_bytes >> 2); i++)
1010                 adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
1011
1012         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
1013         err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1014         if (err)
1015                 goto out;
1016         err = amdgpu_ucode_validate(adev->gfx.mec_fw);
1017         if (err)
1018                 goto out;
1019         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1020         adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1021         adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1022
1023         if ((adev->asic_type != CHIP_STONEY) &&
1024             (adev->asic_type != CHIP_TOPAZ)) {
1025                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1026                 err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1027                 if (!err) {
1028                         err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
1029                         if (err)
1030                                 goto out;
1031                         cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1032                                 adev->gfx.mec2_fw->data;
1033                         adev->gfx.mec2_fw_version =
1034                                 le32_to_cpu(cp_hdr->header.ucode_version);
1035                         adev->gfx.mec2_feature_version =
1036                                 le32_to_cpu(cp_hdr->ucode_feature_version);
1037                 } else {
1038                         err = 0;
1039                         adev->gfx.mec2_fw = NULL;
1040                 }
1041         }
1042
1043         if (adev->firmware.load_type == AMDGPU_FW_LOAD_SMU) {
1044                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
1045                 info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
1046                 info->fw = adev->gfx.pfp_fw;
1047                 header = (const struct common_firmware_header *)info->fw->data;
1048                 adev->firmware.fw_size +=
1049                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1050
1051                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
1052                 info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
1053                 info->fw = adev->gfx.me_fw;
1054                 header = (const struct common_firmware_header *)info->fw->data;
1055                 adev->firmware.fw_size +=
1056                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1057
1058                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
1059                 info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
1060                 info->fw = adev->gfx.ce_fw;
1061                 header = (const struct common_firmware_header *)info->fw->data;
1062                 adev->firmware.fw_size +=
1063                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1064
1065                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
1066                 info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
1067                 info->fw = adev->gfx.rlc_fw;
1068                 header = (const struct common_firmware_header *)info->fw->data;
1069                 adev->firmware.fw_size +=
1070                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1071
1072                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
1073                 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
1074                 info->fw = adev->gfx.mec_fw;
1075                 header = (const struct common_firmware_header *)info->fw->data;
1076                 adev->firmware.fw_size +=
1077                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1078
1079                 /* we need account JT in */
1080                 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1081                 adev->firmware.fw_size +=
1082                         ALIGN(le32_to_cpu(cp_hdr->jt_size) << 2, PAGE_SIZE);
1083
1084                 if (amdgpu_sriov_vf(adev)) {
1085                         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_STORAGE];
1086                         info->ucode_id = AMDGPU_UCODE_ID_STORAGE;
1087                         info->fw = adev->gfx.mec_fw;
1088                         adev->firmware.fw_size +=
1089                                 ALIGN(le32_to_cpu(64 * PAGE_SIZE), PAGE_SIZE);
1090                 }
1091
1092                 if (adev->gfx.mec2_fw) {
1093                         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
1094                         info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
1095                         info->fw = adev->gfx.mec2_fw;
1096                         header = (const struct common_firmware_header *)info->fw->data;
1097                         adev->firmware.fw_size +=
1098                                 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1099                 }
1100
1101         }
1102
1103 out:
1104         if (err) {
1105                 dev_err(adev->dev,
1106                         "gfx8: Failed to load firmware \"%s\"\n",
1107                         fw_name);
1108                 release_firmware(adev->gfx.pfp_fw);
1109                 adev->gfx.pfp_fw = NULL;
1110                 release_firmware(adev->gfx.me_fw);
1111                 adev->gfx.me_fw = NULL;
1112                 release_firmware(adev->gfx.ce_fw);
1113                 adev->gfx.ce_fw = NULL;
1114                 release_firmware(adev->gfx.rlc_fw);
1115                 adev->gfx.rlc_fw = NULL;
1116                 release_firmware(adev->gfx.mec_fw);
1117                 adev->gfx.mec_fw = NULL;
1118                 release_firmware(adev->gfx.mec2_fw);
1119                 adev->gfx.mec2_fw = NULL;
1120         }
1121         return err;
1122 }
1123
1124 static void gfx_v8_0_get_csb_buffer(struct amdgpu_device *adev,
1125                                     volatile u32 *buffer)
1126 {
1127         u32 count = 0, i;
1128         const struct cs_section_def *sect = NULL;
1129         const struct cs_extent_def *ext = NULL;
1130
1131         if (adev->gfx.rlc.cs_data == NULL)
1132                 return;
1133         if (buffer == NULL)
1134                 return;
1135
1136         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1137         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1138
1139         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
1140         buffer[count++] = cpu_to_le32(0x80000000);
1141         buffer[count++] = cpu_to_le32(0x80000000);
1142
1143         for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
1144                 for (ext = sect->section; ext->extent != NULL; ++ext) {
1145                         if (sect->id == SECT_CONTEXT) {
1146                                 buffer[count++] =
1147                                         cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
1148                                 buffer[count++] = cpu_to_le32(ext->reg_index -
1149                                                 PACKET3_SET_CONTEXT_REG_START);
1150                                 for (i = 0; i < ext->reg_count; i++)
1151                                         buffer[count++] = cpu_to_le32(ext->extent[i]);
1152                         } else {
1153                                 return;
1154                         }
1155                 }
1156         }
1157
1158         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
1159         buffer[count++] = cpu_to_le32(mmPA_SC_RASTER_CONFIG -
1160                         PACKET3_SET_CONTEXT_REG_START);
1161         buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config);
1162         buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config_1);
1163
1164         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1165         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
1166
1167         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
1168         buffer[count++] = cpu_to_le32(0);
1169 }
1170
1171 static void cz_init_cp_jump_table(struct amdgpu_device *adev)
1172 {
1173         const __le32 *fw_data;
1174         volatile u32 *dst_ptr;
1175         int me, i, max_me = 4;
1176         u32 bo_offset = 0;
1177         u32 table_offset, table_size;
1178
1179         if (adev->asic_type == CHIP_CARRIZO)
1180                 max_me = 5;
1181
1182         /* write the cp table buffer */
1183         dst_ptr = adev->gfx.rlc.cp_table_ptr;
1184         for (me = 0; me < max_me; me++) {
1185                 if (me == 0) {
1186                         const struct gfx_firmware_header_v1_0 *hdr =
1187                                 (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
1188                         fw_data = (const __le32 *)
1189                                 (adev->gfx.ce_fw->data +
1190                                  le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1191                         table_offset = le32_to_cpu(hdr->jt_offset);
1192                         table_size = le32_to_cpu(hdr->jt_size);
1193                 } else if (me == 1) {
1194                         const struct gfx_firmware_header_v1_0 *hdr =
1195                                 (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
1196                         fw_data = (const __le32 *)
1197                                 (adev->gfx.pfp_fw->data +
1198                                  le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1199                         table_offset = le32_to_cpu(hdr->jt_offset);
1200                         table_size = le32_to_cpu(hdr->jt_size);
1201                 } else if (me == 2) {
1202                         const struct gfx_firmware_header_v1_0 *hdr =
1203                                 (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
1204                         fw_data = (const __le32 *)
1205                                 (adev->gfx.me_fw->data +
1206                                  le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1207                         table_offset = le32_to_cpu(hdr->jt_offset);
1208                         table_size = le32_to_cpu(hdr->jt_size);
1209                 } else if (me == 3) {
1210                         const struct gfx_firmware_header_v1_0 *hdr =
1211                                 (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1212                         fw_data = (const __le32 *)
1213                                 (adev->gfx.mec_fw->data +
1214                                  le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1215                         table_offset = le32_to_cpu(hdr->jt_offset);
1216                         table_size = le32_to_cpu(hdr->jt_size);
1217                 } else  if (me == 4) {
1218                         const struct gfx_firmware_header_v1_0 *hdr =
1219                                 (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
1220                         fw_data = (const __le32 *)
1221                                 (adev->gfx.mec2_fw->data +
1222                                  le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1223                         table_offset = le32_to_cpu(hdr->jt_offset);
1224                         table_size = le32_to_cpu(hdr->jt_size);
1225                 }
1226
1227                 for (i = 0; i < table_size; i ++) {
1228                         dst_ptr[bo_offset + i] =
1229                                 cpu_to_le32(le32_to_cpu(fw_data[table_offset + i]));
1230                 }
1231
1232                 bo_offset += table_size;
1233         }
1234 }
1235
1236 static void gfx_v8_0_rlc_fini(struct amdgpu_device *adev)
1237 {
1238         int r;
1239
1240         /* clear state block */
1241         if (adev->gfx.rlc.clear_state_obj) {
1242                 r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, true);
1243                 if (unlikely(r != 0))
1244                         dev_warn(adev->dev, "(%d) reserve RLC cbs bo failed\n", r);
1245                 amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj);
1246                 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1247                 amdgpu_bo_unref(&adev->gfx.rlc.clear_state_obj);
1248                 adev->gfx.rlc.clear_state_obj = NULL;
1249         }
1250
1251         /* jump table block */
1252         if (adev->gfx.rlc.cp_table_obj) {
1253                 r = amdgpu_bo_reserve(adev->gfx.rlc.cp_table_obj, true);
1254                 if (unlikely(r != 0))
1255                         dev_warn(adev->dev, "(%d) reserve RLC cp table bo failed\n", r);
1256                 amdgpu_bo_unpin(adev->gfx.rlc.cp_table_obj);
1257                 amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
1258                 amdgpu_bo_unref(&adev->gfx.rlc.cp_table_obj);
1259                 adev->gfx.rlc.cp_table_obj = NULL;
1260         }
1261 }
1262
1263 static int gfx_v8_0_rlc_init(struct amdgpu_device *adev)
1264 {
1265         volatile u32 *dst_ptr;
1266         u32 dws;
1267         const struct cs_section_def *cs_data;
1268         int r;
1269
1270         adev->gfx.rlc.cs_data = vi_cs_data;
1271
1272         cs_data = adev->gfx.rlc.cs_data;
1273
1274         if (cs_data) {
1275                 /* clear state block */
1276                 adev->gfx.rlc.clear_state_size = dws = gfx_v8_0_get_csb_size(adev);
1277
1278                 if (adev->gfx.rlc.clear_state_obj == NULL) {
1279                         r = amdgpu_bo_create(adev, dws * 4, PAGE_SIZE, true,
1280                                              AMDGPU_GEM_DOMAIN_VRAM,
1281                                              AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
1282                                              AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS,
1283                                              NULL, NULL,
1284                                              &adev->gfx.rlc.clear_state_obj);
1285                         if (r) {
1286                                 dev_warn(adev->dev, "(%d) create RLC c bo failed\n", r);
1287                                 gfx_v8_0_rlc_fini(adev);
1288                                 return r;
1289                         }
1290                 }
1291                 r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false);
1292                 if (unlikely(r != 0)) {
1293                         gfx_v8_0_rlc_fini(adev);
1294                         return r;
1295                 }
1296                 r = amdgpu_bo_pin(adev->gfx.rlc.clear_state_obj, AMDGPU_GEM_DOMAIN_VRAM,
1297                                   &adev->gfx.rlc.clear_state_gpu_addr);
1298                 if (r) {
1299                         amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1300                         dev_warn(adev->dev, "(%d) pin RLC cbs bo failed\n", r);
1301                         gfx_v8_0_rlc_fini(adev);
1302                         return r;
1303                 }
1304
1305                 r = amdgpu_bo_kmap(adev->gfx.rlc.clear_state_obj, (void **)&adev->gfx.rlc.cs_ptr);
1306                 if (r) {
1307                         dev_warn(adev->dev, "(%d) map RLC cbs bo failed\n", r);
1308                         gfx_v8_0_rlc_fini(adev);
1309                         return r;
1310                 }
1311                 /* set up the cs buffer */
1312                 dst_ptr = adev->gfx.rlc.cs_ptr;
1313                 gfx_v8_0_get_csb_buffer(adev, dst_ptr);
1314                 amdgpu_bo_kunmap(adev->gfx.rlc.clear_state_obj);
1315                 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1316         }
1317
1318         if ((adev->asic_type == CHIP_CARRIZO) ||
1319             (adev->asic_type == CHIP_STONEY)) {
1320                 adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
1321                 if (adev->gfx.rlc.cp_table_obj == NULL) {
1322                         r = amdgpu_bo_create(adev, adev->gfx.rlc.cp_table_size, PAGE_SIZE, true,
1323                                              AMDGPU_GEM_DOMAIN_VRAM,
1324                                              AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
1325                                              AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS,
1326                                              NULL, NULL,
1327                                              &adev->gfx.rlc.cp_table_obj);
1328                         if (r) {
1329                                 dev_warn(adev->dev, "(%d) create RLC cp table bo failed\n", r);
1330                                 return r;
1331                         }
1332                 }
1333
1334                 r = amdgpu_bo_reserve(adev->gfx.rlc.cp_table_obj, false);
1335                 if (unlikely(r != 0)) {
1336                         dev_warn(adev->dev, "(%d) reserve RLC cp table bo failed\n", r);
1337                         return r;
1338                 }
1339                 r = amdgpu_bo_pin(adev->gfx.rlc.cp_table_obj, AMDGPU_GEM_DOMAIN_VRAM,
1340                                   &adev->gfx.rlc.cp_table_gpu_addr);
1341                 if (r) {
1342                         amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
1343                         dev_warn(adev->dev, "(%d) pin RLC cp table bo failed\n", r);
1344                         return r;
1345                 }
1346                 r = amdgpu_bo_kmap(adev->gfx.rlc.cp_table_obj, (void **)&adev->gfx.rlc.cp_table_ptr);
1347                 if (r) {
1348                         dev_warn(adev->dev, "(%d) map RLC cp table bo failed\n", r);
1349                         return r;
1350                 }
1351
1352                 cz_init_cp_jump_table(adev);
1353
1354                 amdgpu_bo_kunmap(adev->gfx.rlc.cp_table_obj);
1355                 amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
1356         }
1357
1358         return 0;
1359 }
1360
1361 static void gfx_v8_0_mec_fini(struct amdgpu_device *adev)
1362 {
1363         int r;
1364
1365         if (adev->gfx.mec.hpd_eop_obj) {
1366                 r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, true);
1367                 if (unlikely(r != 0))
1368                         dev_warn(adev->dev, "(%d) reserve HPD EOP bo failed\n", r);
1369                 amdgpu_bo_unpin(adev->gfx.mec.hpd_eop_obj);
1370                 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1371                 amdgpu_bo_unref(&adev->gfx.mec.hpd_eop_obj);
1372                 adev->gfx.mec.hpd_eop_obj = NULL;
1373         }
1374 }
1375
1376 static int gfx_v8_0_kiq_init_ring(struct amdgpu_device *adev,
1377                                   struct amdgpu_ring *ring,
1378                                   struct amdgpu_irq_src *irq)
1379 {
1380         struct amdgpu_kiq *kiq = &adev->gfx.kiq;
1381         int r = 0;
1382
1383         r = amdgpu_wb_get(adev, &adev->virt.reg_val_offs);
1384         if (r)
1385                 return r;
1386
1387         ring->adev = NULL;
1388         ring->ring_obj = NULL;
1389         ring->use_doorbell = true;
1390         ring->doorbell_index = AMDGPU_DOORBELL_KIQ;
1391         if (adev->gfx.mec2_fw) {
1392                 ring->me = 2;
1393                 ring->pipe = 0;
1394         } else {
1395                 ring->me = 1;
1396                 ring->pipe = 1;
1397         }
1398
1399         ring->queue = 0;
1400         ring->eop_gpu_addr = kiq->eop_gpu_addr;
1401         sprintf(ring->name, "kiq %d.%d.%d", ring->me, ring->pipe, ring->queue);
1402         r = amdgpu_ring_init(adev, ring, 1024,
1403                              irq, AMDGPU_CP_KIQ_IRQ_DRIVER0);
1404         if (r)
1405                 dev_warn(adev->dev, "(%d) failed to init kiq ring\n", r);
1406
1407         return r;
1408 }
1409 static void gfx_v8_0_kiq_free_ring(struct amdgpu_ring *ring,
1410                                    struct amdgpu_irq_src *irq)
1411 {
1412         amdgpu_wb_free(ring->adev, ring->adev->virt.reg_val_offs);
1413         amdgpu_ring_fini(ring);
1414 }
1415
1416 #define MEC_HPD_SIZE 2048
1417
1418 static int gfx_v8_0_mec_init(struct amdgpu_device *adev)
1419 {
1420         int r;
1421         u32 *hpd;
1422
1423         /*
1424          * we assign only 1 pipe because all other pipes will
1425          * be handled by KFD
1426          */
1427         adev->gfx.mec.num_mec = 1;
1428         adev->gfx.mec.num_pipe = 1;
1429         adev->gfx.mec.num_queue = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe * 8;
1430
1431         if (adev->gfx.mec.hpd_eop_obj == NULL) {
1432                 r = amdgpu_bo_create(adev,
1433                                      adev->gfx.mec.num_queue * MEC_HPD_SIZE,
1434                                      PAGE_SIZE, true,
1435                                      AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL,
1436                                      &adev->gfx.mec.hpd_eop_obj);
1437                 if (r) {
1438                         dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
1439                         return r;
1440                 }
1441         }
1442
1443         r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false);
1444         if (unlikely(r != 0)) {
1445                 gfx_v8_0_mec_fini(adev);
1446                 return r;
1447         }
1448         r = amdgpu_bo_pin(adev->gfx.mec.hpd_eop_obj, AMDGPU_GEM_DOMAIN_GTT,
1449                           &adev->gfx.mec.hpd_eop_gpu_addr);
1450         if (r) {
1451                 dev_warn(adev->dev, "(%d) pin HDP EOP bo failed\n", r);
1452                 gfx_v8_0_mec_fini(adev);
1453                 return r;
1454         }
1455         r = amdgpu_bo_kmap(adev->gfx.mec.hpd_eop_obj, (void **)&hpd);
1456         if (r) {
1457                 dev_warn(adev->dev, "(%d) map HDP EOP bo failed\n", r);
1458                 gfx_v8_0_mec_fini(adev);
1459                 return r;
1460         }
1461
1462         memset(hpd, 0, adev->gfx.mec.num_queue * MEC_HPD_SIZE);
1463
1464         amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
1465         amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1466
1467         return 0;
1468 }
1469
1470 static void gfx_v8_0_kiq_fini(struct amdgpu_device *adev)
1471 {
1472         struct amdgpu_kiq *kiq = &adev->gfx.kiq;
1473
1474         amdgpu_bo_free_kernel(&kiq->eop_obj, &kiq->eop_gpu_addr, NULL);
1475 }
1476
1477 static int gfx_v8_0_kiq_init(struct amdgpu_device *adev)
1478 {
1479         int r;
1480         u32 *hpd;
1481         struct amdgpu_kiq *kiq = &adev->gfx.kiq;
1482
1483         r = amdgpu_bo_create_kernel(adev, MEC_HPD_SIZE, PAGE_SIZE,
1484                                     AMDGPU_GEM_DOMAIN_GTT, &kiq->eop_obj,
1485                                     &kiq->eop_gpu_addr, (void **)&hpd);
1486         if (r) {
1487                 dev_warn(adev->dev, "failed to create KIQ bo (%d).\n", r);
1488                 return r;
1489         }
1490
1491         memset(hpd, 0, MEC_HPD_SIZE);
1492
1493         r = amdgpu_bo_reserve(kiq->eop_obj, true);
1494         if (unlikely(r != 0))
1495                 dev_warn(adev->dev, "(%d) reserve kiq eop bo failed\n", r);
1496         amdgpu_bo_kunmap(kiq->eop_obj);
1497         amdgpu_bo_unreserve(kiq->eop_obj);
1498
1499         return 0;
1500 }
1501
1502 static const u32 vgpr_init_compute_shader[] =
1503 {
1504         0x7e000209, 0x7e020208,
1505         0x7e040207, 0x7e060206,
1506         0x7e080205, 0x7e0a0204,
1507         0x7e0c0203, 0x7e0e0202,
1508         0x7e100201, 0x7e120200,
1509         0x7e140209, 0x7e160208,
1510         0x7e180207, 0x7e1a0206,
1511         0x7e1c0205, 0x7e1e0204,
1512         0x7e200203, 0x7e220202,
1513         0x7e240201, 0x7e260200,
1514         0x7e280209, 0x7e2a0208,
1515         0x7e2c0207, 0x7e2e0206,
1516         0x7e300205, 0x7e320204,
1517         0x7e340203, 0x7e360202,
1518         0x7e380201, 0x7e3a0200,
1519         0x7e3c0209, 0x7e3e0208,
1520         0x7e400207, 0x7e420206,
1521         0x7e440205, 0x7e460204,
1522         0x7e480203, 0x7e4a0202,
1523         0x7e4c0201, 0x7e4e0200,
1524         0x7e500209, 0x7e520208,
1525         0x7e540207, 0x7e560206,
1526         0x7e580205, 0x7e5a0204,
1527         0x7e5c0203, 0x7e5e0202,
1528         0x7e600201, 0x7e620200,
1529         0x7e640209, 0x7e660208,
1530         0x7e680207, 0x7e6a0206,
1531         0x7e6c0205, 0x7e6e0204,
1532         0x7e700203, 0x7e720202,
1533         0x7e740201, 0x7e760200,
1534         0x7e780209, 0x7e7a0208,
1535         0x7e7c0207, 0x7e7e0206,
1536         0xbf8a0000, 0xbf810000,
1537 };
1538
1539 static const u32 sgpr_init_compute_shader[] =
1540 {
1541         0xbe8a0100, 0xbe8c0102,
1542         0xbe8e0104, 0xbe900106,
1543         0xbe920108, 0xbe940100,
1544         0xbe960102, 0xbe980104,
1545         0xbe9a0106, 0xbe9c0108,
1546         0xbe9e0100, 0xbea00102,
1547         0xbea20104, 0xbea40106,
1548         0xbea60108, 0xbea80100,
1549         0xbeaa0102, 0xbeac0104,
1550         0xbeae0106, 0xbeb00108,
1551         0xbeb20100, 0xbeb40102,
1552         0xbeb60104, 0xbeb80106,
1553         0xbeba0108, 0xbebc0100,
1554         0xbebe0102, 0xbec00104,
1555         0xbec20106, 0xbec40108,
1556         0xbec60100, 0xbec80102,
1557         0xbee60004, 0xbee70005,
1558         0xbeea0006, 0xbeeb0007,
1559         0xbee80008, 0xbee90009,
1560         0xbefc0000, 0xbf8a0000,
1561         0xbf810000, 0x00000000,
1562 };
1563
1564 static const u32 vgpr_init_regs[] =
1565 {
1566         mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xffffffff,
1567         mmCOMPUTE_RESOURCE_LIMITS, 0,
1568         mmCOMPUTE_NUM_THREAD_X, 256*4,
1569         mmCOMPUTE_NUM_THREAD_Y, 1,
1570         mmCOMPUTE_NUM_THREAD_Z, 1,
1571         mmCOMPUTE_PGM_RSRC2, 20,
1572         mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1573         mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1574         mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1575         mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1576         mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1577         mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1578         mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1579         mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1580         mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1581         mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1582 };
1583
1584 static const u32 sgpr1_init_regs[] =
1585 {
1586         mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0x0f,
1587         mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1588         mmCOMPUTE_NUM_THREAD_X, 256*5,
1589         mmCOMPUTE_NUM_THREAD_Y, 1,
1590         mmCOMPUTE_NUM_THREAD_Z, 1,
1591         mmCOMPUTE_PGM_RSRC2, 20,
1592         mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1593         mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1594         mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1595         mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1596         mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1597         mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1598         mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1599         mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1600         mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1601         mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1602 };
1603
1604 static const u32 sgpr2_init_regs[] =
1605 {
1606         mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xf0,
1607         mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1608         mmCOMPUTE_NUM_THREAD_X, 256*5,
1609         mmCOMPUTE_NUM_THREAD_Y, 1,
1610         mmCOMPUTE_NUM_THREAD_Z, 1,
1611         mmCOMPUTE_PGM_RSRC2, 20,
1612         mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1613         mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1614         mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1615         mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1616         mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1617         mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1618         mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1619         mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1620         mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1621         mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1622 };
1623
1624 static const u32 sec_ded_counter_registers[] =
1625 {
1626         mmCPC_EDC_ATC_CNT,
1627         mmCPC_EDC_SCRATCH_CNT,
1628         mmCPC_EDC_UCODE_CNT,
1629         mmCPF_EDC_ATC_CNT,
1630         mmCPF_EDC_ROQ_CNT,
1631         mmCPF_EDC_TAG_CNT,
1632         mmCPG_EDC_ATC_CNT,
1633         mmCPG_EDC_DMA_CNT,
1634         mmCPG_EDC_TAG_CNT,
1635         mmDC_EDC_CSINVOC_CNT,
1636         mmDC_EDC_RESTORE_CNT,
1637         mmDC_EDC_STATE_CNT,
1638         mmGDS_EDC_CNT,
1639         mmGDS_EDC_GRBM_CNT,
1640         mmGDS_EDC_OA_DED,
1641         mmSPI_EDC_CNT,
1642         mmSQC_ATC_EDC_GATCL1_CNT,
1643         mmSQC_EDC_CNT,
1644         mmSQ_EDC_DED_CNT,
1645         mmSQ_EDC_INFO,
1646         mmSQ_EDC_SEC_CNT,
1647         mmTCC_EDC_CNT,
1648         mmTCP_ATC_EDC_GATCL1_CNT,
1649         mmTCP_EDC_CNT,
1650         mmTD_EDC_CNT
1651 };
1652
1653 static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
1654 {
1655         struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
1656         struct amdgpu_ib ib;
1657         struct dma_fence *f = NULL;
1658         int r, i;
1659         u32 tmp;
1660         unsigned total_size, vgpr_offset, sgpr_offset;
1661         u64 gpu_addr;
1662
1663         /* only supported on CZ */
1664         if (adev->asic_type != CHIP_CARRIZO)
1665                 return 0;
1666
1667         /* bail if the compute ring is not ready */
1668         if (!ring->ready)
1669                 return 0;
1670
1671         tmp = RREG32(mmGB_EDC_MODE);
1672         WREG32(mmGB_EDC_MODE, 0);
1673
1674         total_size =
1675                 (((ARRAY_SIZE(vgpr_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1676         total_size +=
1677                 (((ARRAY_SIZE(sgpr1_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1678         total_size +=
1679                 (((ARRAY_SIZE(sgpr2_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1680         total_size = ALIGN(total_size, 256);
1681         vgpr_offset = total_size;
1682         total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256);
1683         sgpr_offset = total_size;
1684         total_size += sizeof(sgpr_init_compute_shader);
1685
1686         /* allocate an indirect buffer to put the commands in */
1687         memset(&ib, 0, sizeof(ib));
1688         r = amdgpu_ib_get(adev, NULL, total_size, &ib);
1689         if (r) {
1690                 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
1691                 return r;
1692         }
1693
1694         /* load the compute shaders */
1695         for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++)
1696                 ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i];
1697
1698         for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
1699                 ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
1700
1701         /* init the ib length to 0 */
1702         ib.length_dw = 0;
1703
1704         /* VGPR */
1705         /* write the register state for the compute dispatch */
1706         for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i += 2) {
1707                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1708                 ib.ptr[ib.length_dw++] = vgpr_init_regs[i] - PACKET3_SET_SH_REG_START;
1709                 ib.ptr[ib.length_dw++] = vgpr_init_regs[i + 1];
1710         }
1711         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1712         gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
1713         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1714         ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1715         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1716         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1717
1718         /* write dispatch packet */
1719         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1720         ib.ptr[ib.length_dw++] = 8; /* x */
1721         ib.ptr[ib.length_dw++] = 1; /* y */
1722         ib.ptr[ib.length_dw++] = 1; /* z */
1723         ib.ptr[ib.length_dw++] =
1724                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1725
1726         /* write CS partial flush packet */
1727         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1728         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1729
1730         /* SGPR1 */
1731         /* write the register state for the compute dispatch */
1732         for (i = 0; i < ARRAY_SIZE(sgpr1_init_regs); i += 2) {
1733                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1734                 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i] - PACKET3_SET_SH_REG_START;
1735                 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i + 1];
1736         }
1737         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1738         gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1739         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1740         ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1741         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1742         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1743
1744         /* write dispatch packet */
1745         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1746         ib.ptr[ib.length_dw++] = 8; /* x */
1747         ib.ptr[ib.length_dw++] = 1; /* y */
1748         ib.ptr[ib.length_dw++] = 1; /* z */
1749         ib.ptr[ib.length_dw++] =
1750                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1751
1752         /* write CS partial flush packet */
1753         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1754         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1755
1756         /* SGPR2 */
1757         /* write the register state for the compute dispatch */
1758         for (i = 0; i < ARRAY_SIZE(sgpr2_init_regs); i += 2) {
1759                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1760                 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i] - PACKET3_SET_SH_REG_START;
1761                 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i + 1];
1762         }
1763         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1764         gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1765         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1766         ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1767         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1768         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1769
1770         /* write dispatch packet */
1771         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1772         ib.ptr[ib.length_dw++] = 8; /* x */
1773         ib.ptr[ib.length_dw++] = 1; /* y */
1774         ib.ptr[ib.length_dw++] = 1; /* z */
1775         ib.ptr[ib.length_dw++] =
1776                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1777
1778         /* write CS partial flush packet */
1779         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1780         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1781
1782         /* shedule the ib on the ring */
1783         r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
1784         if (r) {
1785                 DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
1786                 goto fail;
1787         }
1788
1789         /* wait for the GPU to finish processing the IB */
1790         r = dma_fence_wait(f, false);
1791         if (r) {
1792                 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
1793                 goto fail;
1794         }
1795
1796         tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, DED_MODE, 2);
1797         tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, PROP_FED, 1);
1798         WREG32(mmGB_EDC_MODE, tmp);
1799
1800         tmp = RREG32(mmCC_GC_EDC_CONFIG);
1801         tmp = REG_SET_FIELD(tmp, CC_GC_EDC_CONFIG, DIS_EDC, 0) | 1;
1802         WREG32(mmCC_GC_EDC_CONFIG, tmp);
1803
1804
1805         /* read back registers to clear the counters */
1806         for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++)
1807                 RREG32(sec_ded_counter_registers[i]);
1808
1809 fail:
1810         amdgpu_ib_free(adev, &ib, NULL);
1811         dma_fence_put(f);
1812
1813         return r;
1814 }
1815
1816 static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev)
1817 {
1818         u32 gb_addr_config;
1819         u32 mc_shared_chmap, mc_arb_ramcfg;
1820         u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map;
1821         u32 tmp;
1822         int ret;
1823
1824         switch (adev->asic_type) {
1825         case CHIP_TOPAZ:
1826                 adev->gfx.config.max_shader_engines = 1;
1827                 adev->gfx.config.max_tile_pipes = 2;
1828                 adev->gfx.config.max_cu_per_sh = 6;
1829                 adev->gfx.config.max_sh_per_se = 1;
1830                 adev->gfx.config.max_backends_per_se = 2;
1831                 adev->gfx.config.max_texture_channel_caches = 2;
1832                 adev->gfx.config.max_gprs = 256;
1833                 adev->gfx.config.max_gs_threads = 32;
1834                 adev->gfx.config.max_hw_contexts = 8;
1835
1836                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1837                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1838                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1839                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1840                 gb_addr_config = TOPAZ_GB_ADDR_CONFIG_GOLDEN;
1841                 break;
1842         case CHIP_FIJI:
1843                 adev->gfx.config.max_shader_engines = 4;
1844                 adev->gfx.config.max_tile_pipes = 16;
1845                 adev->gfx.config.max_cu_per_sh = 16;
1846                 adev->gfx.config.max_sh_per_se = 1;
1847                 adev->gfx.config.max_backends_per_se = 4;
1848                 adev->gfx.config.max_texture_channel_caches = 16;
1849                 adev->gfx.config.max_gprs = 256;
1850                 adev->gfx.config.max_gs_threads = 32;
1851                 adev->gfx.config.max_hw_contexts = 8;
1852
1853                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1854                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1855                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1856                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1857                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1858                 break;
1859         case CHIP_POLARIS11:
1860         case CHIP_POLARIS12:
1861                 ret = amdgpu_atombios_get_gfx_info(adev);
1862                 if (ret)
1863                         return ret;
1864                 adev->gfx.config.max_gprs = 256;
1865                 adev->gfx.config.max_gs_threads = 32;
1866                 adev->gfx.config.max_hw_contexts = 8;
1867
1868                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1869                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1870                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1871                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1872                 gb_addr_config = POLARIS11_GB_ADDR_CONFIG_GOLDEN;
1873                 break;
1874         case CHIP_POLARIS10:
1875                 ret = amdgpu_atombios_get_gfx_info(adev);
1876                 if (ret)
1877                         return ret;
1878                 adev->gfx.config.max_gprs = 256;
1879                 adev->gfx.config.max_gs_threads = 32;
1880                 adev->gfx.config.max_hw_contexts = 8;
1881
1882                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1883                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1884                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1885                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1886                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1887                 break;
1888         case CHIP_TONGA:
1889                 adev->gfx.config.max_shader_engines = 4;
1890                 adev->gfx.config.max_tile_pipes = 8;
1891                 adev->gfx.config.max_cu_per_sh = 8;
1892                 adev->gfx.config.max_sh_per_se = 1;
1893                 adev->gfx.config.max_backends_per_se = 2;
1894                 adev->gfx.config.max_texture_channel_caches = 8;
1895                 adev->gfx.config.max_gprs = 256;
1896                 adev->gfx.config.max_gs_threads = 32;
1897                 adev->gfx.config.max_hw_contexts = 8;
1898
1899                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1900                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1901                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1902                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1903                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1904                 break;
1905         case CHIP_CARRIZO:
1906                 adev->gfx.config.max_shader_engines = 1;
1907                 adev->gfx.config.max_tile_pipes = 2;
1908                 adev->gfx.config.max_sh_per_se = 1;
1909                 adev->gfx.config.max_backends_per_se = 2;
1910
1911                 switch (adev->pdev->revision) {
1912                 case 0xc4:
1913                 case 0x84:
1914                 case 0xc8:
1915                 case 0xcc:
1916                 case 0xe1:
1917                 case 0xe3:
1918                         /* B10 */
1919                         adev->gfx.config.max_cu_per_sh = 8;
1920                         break;
1921                 case 0xc5:
1922                 case 0x81:
1923                 case 0x85:
1924                 case 0xc9:
1925                 case 0xcd:
1926                 case 0xe2:
1927                 case 0xe4:
1928                         /* B8 */
1929                         adev->gfx.config.max_cu_per_sh = 6;
1930                         break;
1931                 case 0xc6:
1932                 case 0xca:
1933                 case 0xce:
1934                 case 0x88:
1935                 case 0xe6:
1936                         /* B6 */
1937                         adev->gfx.config.max_cu_per_sh = 6;
1938                         break;
1939                 case 0xc7:
1940                 case 0x87:
1941                 case 0xcb:
1942                 case 0xe5:
1943                 case 0x89:
1944                 default:
1945                         /* B4 */
1946                         adev->gfx.config.max_cu_per_sh = 4;
1947                         break;
1948                 }
1949
1950                 adev->gfx.config.max_texture_channel_caches = 2;
1951                 adev->gfx.config.max_gprs = 256;
1952                 adev->gfx.config.max_gs_threads = 32;
1953                 adev->gfx.config.max_hw_contexts = 8;
1954
1955                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1956                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1957                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1958                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1959                 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1960                 break;
1961         case CHIP_STONEY:
1962                 adev->gfx.config.max_shader_engines = 1;
1963                 adev->gfx.config.max_tile_pipes = 2;
1964                 adev->gfx.config.max_sh_per_se = 1;
1965                 adev->gfx.config.max_backends_per_se = 1;
1966
1967                 switch (adev->pdev->revision) {
1968                 case 0x80:
1969                 case 0x81:
1970                 case 0xc0:
1971                 case 0xc1:
1972                 case 0xc2:
1973                 case 0xc4:
1974                 case 0xc8:
1975                 case 0xc9:
1976                 case 0xd6:
1977                 case 0xda:
1978                 case 0xe9:
1979                 case 0xea:
1980                         adev->gfx.config.max_cu_per_sh = 3;
1981                         break;
1982                 case 0x83:
1983                 case 0xd0:
1984                 case 0xd1:
1985                 case 0xd2:
1986                 case 0xd4:
1987                 case 0xdb:
1988                 case 0xe1:
1989                 case 0xe2:
1990                 default:
1991                         adev->gfx.config.max_cu_per_sh = 2;
1992                         break;
1993                 }
1994
1995                 adev->gfx.config.max_texture_channel_caches = 2;
1996                 adev->gfx.config.max_gprs = 256;
1997                 adev->gfx.config.max_gs_threads = 16;
1998                 adev->gfx.config.max_hw_contexts = 8;
1999
2000                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2001                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2002                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2003                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
2004                 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
2005                 break;
2006         default:
2007                 adev->gfx.config.max_shader_engines = 2;
2008                 adev->gfx.config.max_tile_pipes = 4;
2009                 adev->gfx.config.max_cu_per_sh = 2;
2010                 adev->gfx.config.max_sh_per_se = 1;
2011                 adev->gfx.config.max_backends_per_se = 2;
2012                 adev->gfx.config.max_texture_channel_caches = 4;
2013                 adev->gfx.config.max_gprs = 256;
2014                 adev->gfx.config.max_gs_threads = 32;
2015                 adev->gfx.config.max_hw_contexts = 8;
2016
2017                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2018                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2019                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2020                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
2021                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
2022                 break;
2023         }
2024
2025         mc_shared_chmap = RREG32(mmMC_SHARED_CHMAP);
2026         adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG);
2027         mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg;
2028
2029         adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes;
2030         adev->gfx.config.mem_max_burst_length_bytes = 256;
2031         if (adev->flags & AMD_IS_APU) {
2032                 /* Get memory bank mapping mode. */
2033                 tmp = RREG32(mmMC_FUS_DRAM0_BANK_ADDR_MAPPING);
2034                 dimm00_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
2035                 dimm01_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
2036
2037                 tmp = RREG32(mmMC_FUS_DRAM1_BANK_ADDR_MAPPING);
2038                 dimm10_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
2039                 dimm11_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
2040
2041                 /* Validate settings in case only one DIMM installed. */
2042                 if ((dimm00_addr_map == 0) || (dimm00_addr_map == 3) || (dimm00_addr_map == 4) || (dimm00_addr_map > 12))
2043                         dimm00_addr_map = 0;
2044                 if ((dimm01_addr_map == 0) || (dimm01_addr_map == 3) || (dimm01_addr_map == 4) || (dimm01_addr_map > 12))
2045                         dimm01_addr_map = 0;
2046                 if ((dimm10_addr_map == 0) || (dimm10_addr_map == 3) || (dimm10_addr_map == 4) || (dimm10_addr_map > 12))
2047                         dimm10_addr_map = 0;
2048                 if ((dimm11_addr_map == 0) || (dimm11_addr_map == 3) || (dimm11_addr_map == 4) || (dimm11_addr_map > 12))
2049                         dimm11_addr_map = 0;
2050
2051                 /* If DIMM Addr map is 8GB, ROW size should be 2KB. Otherwise 1KB. */
2052                 /* If ROW size(DIMM1) != ROW size(DMIMM0), ROW size should be larger one. */
2053                 if ((dimm00_addr_map == 11) || (dimm01_addr_map == 11) || (dimm10_addr_map == 11) || (dimm11_addr_map == 11))
2054                         adev->gfx.config.mem_row_size_in_kb = 2;
2055                 else
2056                         adev->gfx.config.mem_row_size_in_kb = 1;
2057         } else {
2058                 tmp = REG_GET_FIELD(mc_arb_ramcfg, MC_ARB_RAMCFG, NOOFCOLS);
2059                 adev->gfx.config.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
2060                 if (adev->gfx.config.mem_row_size_in_kb > 4)
2061                         adev->gfx.config.mem_row_size_in_kb = 4;
2062         }
2063
2064         adev->gfx.config.shader_engine_tile_size = 32;
2065         adev->gfx.config.num_gpus = 1;
2066         adev->gfx.config.multi_gpu_tile_size = 64;
2067
2068         /* fix up row size */
2069         switch (adev->gfx.config.mem_row_size_in_kb) {
2070         case 1:
2071         default:
2072                 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 0);
2073                 break;
2074         case 2:
2075                 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 1);
2076                 break;
2077         case 4:
2078                 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 2);
2079                 break;
2080         }
2081         adev->gfx.config.gb_addr_config = gb_addr_config;
2082
2083         return 0;
2084 }
2085
2086 static int gfx_v8_0_sw_init(void *handle)
2087 {
2088         int i, r;
2089         struct amdgpu_ring *ring;
2090         struct amdgpu_kiq *kiq;
2091         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2092
2093         /* KIQ event */
2094         r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 178, &adev->gfx.kiq.irq);
2095         if (r)
2096                 return r;
2097
2098         /* EOP Event */
2099         r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 181, &adev->gfx.eop_irq);
2100         if (r)
2101                 return r;
2102
2103         /* Privileged reg */
2104         r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 184,
2105                               &adev->gfx.priv_reg_irq);
2106         if (r)
2107                 return r;
2108
2109         /* Privileged inst */
2110         r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 185,
2111                               &adev->gfx.priv_inst_irq);
2112         if (r)
2113                 return r;
2114
2115         adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
2116
2117         gfx_v8_0_scratch_init(adev);
2118
2119         r = gfx_v8_0_init_microcode(adev);
2120         if (r) {
2121                 DRM_ERROR("Failed to load gfx firmware!\n");
2122                 return r;
2123         }
2124
2125         r = gfx_v8_0_rlc_init(adev);
2126         if (r) {
2127                 DRM_ERROR("Failed to init rlc BOs!\n");
2128                 return r;
2129         }
2130
2131         r = gfx_v8_0_mec_init(adev);
2132         if (r) {
2133                 DRM_ERROR("Failed to init MEC BOs!\n");
2134                 return r;
2135         }
2136
2137         /* set up the gfx ring */
2138         for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
2139                 ring = &adev->gfx.gfx_ring[i];
2140                 ring->ring_obj = NULL;
2141                 sprintf(ring->name, "gfx");
2142                 /* no gfx doorbells on iceland */
2143                 if (adev->asic_type != CHIP_TOPAZ) {
2144                         ring->use_doorbell = true;
2145                         ring->doorbell_index = AMDGPU_DOORBELL_GFX_RING0;
2146                 }
2147
2148                 r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
2149                                      AMDGPU_CP_IRQ_GFX_EOP);
2150                 if (r)
2151                         return r;
2152         }
2153
2154         /* set up the compute queues */
2155         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
2156                 unsigned irq_type;
2157
2158                 /* max 32 queues per MEC */
2159                 if ((i >= 32) || (i >= AMDGPU_MAX_COMPUTE_RINGS)) {
2160                         DRM_ERROR("Too many (%d) compute rings!\n", i);
2161                         break;
2162                 }
2163                 ring = &adev->gfx.compute_ring[i];
2164                 ring->ring_obj = NULL;
2165                 ring->use_doorbell = true;
2166                 ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + i;
2167                 ring->me = 1; /* first MEC */
2168                 ring->pipe = i / 8;
2169                 ring->queue = i % 8;
2170                 ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE);
2171                 sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
2172                 irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP + ring->pipe;
2173                 /* type-2 packets are deprecated on MEC, use type-3 instead */
2174                 r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
2175                                      irq_type);
2176                 if (r)
2177                         return r;
2178         }
2179
2180         if (amdgpu_sriov_vf(adev)) {
2181                 r = gfx_v8_0_kiq_init(adev);
2182                 if (r) {
2183                         DRM_ERROR("Failed to init KIQ BOs!\n");
2184                         return r;
2185                 }
2186
2187                 kiq = &adev->gfx.kiq;
2188                 r = gfx_v8_0_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
2189                 if (r)
2190                         return r;
2191
2192                 /* create MQD for all compute queues as wel as KIQ for SRIOV case */
2193                 r = gfx_v8_0_compute_mqd_sw_init(adev);
2194                 if (r)
2195                         return r;
2196         }
2197
2198         /* reserve GDS, GWS and OA resource for gfx */
2199         r = amdgpu_bo_create_kernel(adev, adev->gds.mem.gfx_partition_size,
2200                                     PAGE_SIZE, AMDGPU_GEM_DOMAIN_GDS,
2201                                     &adev->gds.gds_gfx_bo, NULL, NULL);
2202         if (r)
2203                 return r;
2204
2205         r = amdgpu_bo_create_kernel(adev, adev->gds.gws.gfx_partition_size,
2206                                     PAGE_SIZE, AMDGPU_GEM_DOMAIN_GWS,
2207                                     &adev->gds.gws_gfx_bo, NULL, NULL);
2208         if (r)
2209                 return r;
2210
2211         r = amdgpu_bo_create_kernel(adev, adev->gds.oa.gfx_partition_size,
2212                                     PAGE_SIZE, AMDGPU_GEM_DOMAIN_OA,
2213                                     &adev->gds.oa_gfx_bo, NULL, NULL);
2214         if (r)
2215                 return r;
2216
2217         adev->gfx.ce_ram_size = 0x8000;
2218
2219         r = gfx_v8_0_gpu_early_init(adev);
2220         if (r)
2221                 return r;
2222
2223         return 0;
2224 }
2225
2226 static int gfx_v8_0_sw_fini(void *handle)
2227 {
2228         int i;
2229         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2230
2231         amdgpu_bo_free_kernel(&adev->gds.oa_gfx_bo, NULL, NULL);
2232         amdgpu_bo_free_kernel(&adev->gds.gws_gfx_bo, NULL, NULL);
2233         amdgpu_bo_free_kernel(&adev->gds.gds_gfx_bo, NULL, NULL);
2234
2235         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2236                 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
2237         for (i = 0; i < adev->gfx.num_compute_rings; i++)
2238                 amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
2239
2240         if (amdgpu_sriov_vf(adev)) {
2241                 gfx_v8_0_compute_mqd_sw_fini(adev);
2242                 gfx_v8_0_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq);
2243                 gfx_v8_0_kiq_fini(adev);
2244         }
2245
2246         gfx_v8_0_mec_fini(adev);
2247         gfx_v8_0_rlc_fini(adev);
2248         gfx_v8_0_free_microcode(adev);
2249
2250         return 0;
2251 }
2252
2253 static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev)
2254 {
2255         uint32_t *modearray, *mod2array;
2256         const u32 num_tile_mode_states = ARRAY_SIZE(adev->gfx.config.tile_mode_array);
2257         const u32 num_secondary_tile_mode_states = ARRAY_SIZE(adev->gfx.config.macrotile_mode_array);
2258         u32 reg_offset;
2259
2260         modearray = adev->gfx.config.tile_mode_array;
2261         mod2array = adev->gfx.config.macrotile_mode_array;
2262
2263         for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2264                 modearray[reg_offset] = 0;
2265
2266         for (reg_offset = 0; reg_offset <  num_secondary_tile_mode_states; reg_offset++)
2267                 mod2array[reg_offset] = 0;
2268
2269         switch (adev->asic_type) {
2270         case CHIP_TOPAZ:
2271                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2272                                 PIPE_CONFIG(ADDR_SURF_P2) |
2273                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2274                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2275                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2276                                 PIPE_CONFIG(ADDR_SURF_P2) |
2277                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2278                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2279                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2280                                 PIPE_CONFIG(ADDR_SURF_P2) |
2281                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2282                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2283                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2284                                 PIPE_CONFIG(ADDR_SURF_P2) |
2285                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2286                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2287                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2288                                 PIPE_CONFIG(ADDR_SURF_P2) |
2289                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2290                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2291                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2292                                 PIPE_CONFIG(ADDR_SURF_P2) |
2293                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2294                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2295                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2296                                 PIPE_CONFIG(ADDR_SURF_P2) |
2297                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2298                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2299                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2300                                 PIPE_CONFIG(ADDR_SURF_P2));
2301                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2302                                 PIPE_CONFIG(ADDR_SURF_P2) |
2303                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2304                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2305                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2306                                  PIPE_CONFIG(ADDR_SURF_P2) |
2307                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2308                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2309                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2310                                  PIPE_CONFIG(ADDR_SURF_P2) |
2311                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2312                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2313                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2314                                  PIPE_CONFIG(ADDR_SURF_P2) |
2315                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2316                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2317                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2318                                  PIPE_CONFIG(ADDR_SURF_P2) |
2319                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2320                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2321                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2322                                  PIPE_CONFIG(ADDR_SURF_P2) |
2323                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2324                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2325                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2326                                  PIPE_CONFIG(ADDR_SURF_P2) |
2327                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2328                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2329                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2330                                  PIPE_CONFIG(ADDR_SURF_P2) |
2331                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2332                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2333                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2334                                  PIPE_CONFIG(ADDR_SURF_P2) |
2335                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2336                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2337                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2338                                  PIPE_CONFIG(ADDR_SURF_P2) |
2339                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2340                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2341                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2342                                  PIPE_CONFIG(ADDR_SURF_P2) |
2343                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2344                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2345                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2346                                  PIPE_CONFIG(ADDR_SURF_P2) |
2347                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2348                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2349                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2350                                  PIPE_CONFIG(ADDR_SURF_P2) |
2351                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2352                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2353                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2354                                  PIPE_CONFIG(ADDR_SURF_P2) |
2355                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2356                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2357                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2358                                  PIPE_CONFIG(ADDR_SURF_P2) |
2359                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2360                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2361                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2362                                  PIPE_CONFIG(ADDR_SURF_P2) |
2363                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2364                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2365                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2366                                  PIPE_CONFIG(ADDR_SURF_P2) |
2367                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2368                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2369                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2370                                  PIPE_CONFIG(ADDR_SURF_P2) |
2371                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2372                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2373
2374                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2375                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2376                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2377                                 NUM_BANKS(ADDR_SURF_8_BANK));
2378                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2379                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2380                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2381                                 NUM_BANKS(ADDR_SURF_8_BANK));
2382                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2383                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2384                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2385                                 NUM_BANKS(ADDR_SURF_8_BANK));
2386                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2387                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2388                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2389                                 NUM_BANKS(ADDR_SURF_8_BANK));
2390                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2391                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2392                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2393                                 NUM_BANKS(ADDR_SURF_8_BANK));
2394                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2395                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2396                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2397                                 NUM_BANKS(ADDR_SURF_8_BANK));
2398                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2399                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2400                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2401                                 NUM_BANKS(ADDR_SURF_8_BANK));
2402                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2403                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2404                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2405                                 NUM_BANKS(ADDR_SURF_16_BANK));
2406                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2407                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2408                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2409                                 NUM_BANKS(ADDR_SURF_16_BANK));
2410                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2411                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2412                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2413                                  NUM_BANKS(ADDR_SURF_16_BANK));
2414                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2415                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2416                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2417                                  NUM_BANKS(ADDR_SURF_16_BANK));
2418                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2419                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2420                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2421                                  NUM_BANKS(ADDR_SURF_16_BANK));
2422                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2423                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2424                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2425                                  NUM_BANKS(ADDR_SURF_16_BANK));
2426                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2427                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2428                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2429                                  NUM_BANKS(ADDR_SURF_8_BANK));
2430
2431                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2432                         if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
2433                             reg_offset != 23)
2434                                 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2435
2436                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2437                         if (reg_offset != 7)
2438                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2439
2440                 break;
2441         case CHIP_FIJI:
2442                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2443                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2444                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2445                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2446                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2447                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2448                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2449                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2450                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2451                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2452                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2453                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2454                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2455                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2456                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2457                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2458                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2459                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2460                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2461                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2462                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2463                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2464                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2465                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2466                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2467                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2468                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2469                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2470                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2471                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2472                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2473                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2474                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2475                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2476                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2477                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2478                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2479                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2480                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2481                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2482                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2483                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2484                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2485                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2486                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2487                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2488                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2489                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2490                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2491                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2492                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2493                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2494                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2495                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2496                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2497                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2498                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2499                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2500                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2501                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2502                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2503                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2504                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2505                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2506                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2507                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2508                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2509                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2510                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2511                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2512                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2513                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2514                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2515                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2516                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2517                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2518                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2519                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2520                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2521                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2522                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2523                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2524                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2525                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2526                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2527                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2528                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2529                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2530                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2531                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2532                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2533                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2534                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2535                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2536                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2537                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2538                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2539                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2540                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2541                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2542                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2543                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2544                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2545                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2546                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2547                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2548                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2549                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2550                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2551                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2552                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2553                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2554                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2555                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2556                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2557                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2558                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2559                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2560                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2561                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2562                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2563                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2564
2565                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2566                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2567                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2568                                 NUM_BANKS(ADDR_SURF_8_BANK));
2569                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2570                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2571                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2572                                 NUM_BANKS(ADDR_SURF_8_BANK));
2573                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2574                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2575                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2576                                 NUM_BANKS(ADDR_SURF_8_BANK));
2577                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2578                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2579                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2580                                 NUM_BANKS(ADDR_SURF_8_BANK));
2581                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2582                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2583                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2584                                 NUM_BANKS(ADDR_SURF_8_BANK));
2585                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2586                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2587                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2588                                 NUM_BANKS(ADDR_SURF_8_BANK));
2589                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2590                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2591                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2592                                 NUM_BANKS(ADDR_SURF_8_BANK));
2593                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2594                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2595                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2596                                 NUM_BANKS(ADDR_SURF_8_BANK));
2597                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2598                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2599                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2600                                 NUM_BANKS(ADDR_SURF_8_BANK));
2601                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2602                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2603                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2604                                  NUM_BANKS(ADDR_SURF_8_BANK));
2605                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2606                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2607                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2608                                  NUM_BANKS(ADDR_SURF_8_BANK));
2609                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2610                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2611                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2612                                  NUM_BANKS(ADDR_SURF_8_BANK));
2613                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2614                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2615                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2616                                  NUM_BANKS(ADDR_SURF_8_BANK));
2617                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2618                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2619                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2620                                  NUM_BANKS(ADDR_SURF_4_BANK));
2621
2622                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2623                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2624
2625                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2626                         if (reg_offset != 7)
2627                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2628
2629                 break;
2630         case CHIP_TONGA:
2631                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2632                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2633                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2634                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2635                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2636                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2637                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2638                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2639                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2640                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2641                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2642                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2643                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2644                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2645                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2646                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2647                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2648                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2649                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2650                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2651                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2652                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2653                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2654                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2655                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2656                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2657                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2658                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2659                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2660                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2661                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2662                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2663                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2664                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2665                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2666                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2667                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2668                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2669                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2670                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2671                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2672                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2673                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2674                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2675                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2676                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2677                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2678                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2679                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2680                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2681                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2682                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2683                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2684                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2685                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2686                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2687                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2688                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2689                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2690                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2691                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2692                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2693                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2694                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2695                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2696                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2697                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2698                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2699                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2700                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2701                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2702                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2703                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2704                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2705                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2706                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2707                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2708                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2709                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2710                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2711                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2712                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2713                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2714                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2715                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2716                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2717                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2718                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2719                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2720                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2721                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2722                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2723                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2724                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2725                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2726                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2727                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2728                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2729                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2730                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2731                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2732                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2733                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2734                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2735                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2736                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2737                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2738                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2739                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2740                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2741                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2742                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2743                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2744                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2745                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2746                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2747                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2748                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2749                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2750                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2751                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2752                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2753
2754                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2755                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2756                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2757                                 NUM_BANKS(ADDR_SURF_16_BANK));
2758                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2759                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2760                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2761                                 NUM_BANKS(ADDR_SURF_16_BANK));
2762                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2763                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2764                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2765                                 NUM_BANKS(ADDR_SURF_16_BANK));
2766                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2767                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2768                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2769                                 NUM_BANKS(ADDR_SURF_16_BANK));
2770                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2771                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2772                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2773                                 NUM_BANKS(ADDR_SURF_16_BANK));
2774                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2775                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2776                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2777                                 NUM_BANKS(ADDR_SURF_16_BANK));
2778                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2779                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2780                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2781                                 NUM_BANKS(ADDR_SURF_16_BANK));
2782                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2783                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2784                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2785                                 NUM_BANKS(ADDR_SURF_16_BANK));
2786                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2787                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2788                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2789                                 NUM_BANKS(ADDR_SURF_16_BANK));
2790                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2791                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2792                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2793                                  NUM_BANKS(ADDR_SURF_16_BANK));
2794                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2795                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2796                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2797                                  NUM_BANKS(ADDR_SURF_16_BANK));
2798                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2799                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2800                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2801                                  NUM_BANKS(ADDR_SURF_8_BANK));
2802                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2803                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2804                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2805                                  NUM_BANKS(ADDR_SURF_4_BANK));
2806                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2807                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2808                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2809                                  NUM_BANKS(ADDR_SURF_4_BANK));
2810
2811                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2812                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2813
2814                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2815                         if (reg_offset != 7)
2816                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2817
2818                 break;
2819         case CHIP_POLARIS11:
2820         case CHIP_POLARIS12:
2821                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2822                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2823                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2824                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2825                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2826                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2827                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2828                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2829                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2830                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2831                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2832                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2833                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2834                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2835                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2836                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2837                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2838                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2839                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2840                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2841                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2842                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2843                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2844                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2845                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2846                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2847                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2848                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2849                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2850                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2851                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2852                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2853                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2854                                 PIPE_CONFIG(ADDR_SURF_P4_16x16));
2855                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2856                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2857                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2858                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2859                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2860                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2861                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2862                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2863                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2864                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2865                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2866                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2867                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2868                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2869                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2870                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2871                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2872                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2873                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2874                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2875                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2876                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2877                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2878                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2879                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2880                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2881                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2882                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2883                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2884                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2885                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2886                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2887                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2888                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2889                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2890                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2891                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2892                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2893                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2894                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2895                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2896                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2897                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2898                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2899                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2900                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2901                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2902                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2903                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2904                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2905                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2906                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2907                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2908                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2909                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2910                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2911                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2912                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2913                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2914                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2915                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2916                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2917                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2918                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2919                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2920                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2921                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2922                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2923                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2924                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2925                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2926                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2927                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2928                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2929                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2930                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2931                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2932                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2933                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2934                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2935                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2936                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2937                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2938                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2939                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2940                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2941                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2942                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2943
2944                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2945                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2946                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2947                                 NUM_BANKS(ADDR_SURF_16_BANK));
2948
2949                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2950                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2951                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2952                                 NUM_BANKS(ADDR_SURF_16_BANK));
2953
2954                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2955                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2956                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2957                                 NUM_BANKS(ADDR_SURF_16_BANK));
2958
2959                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2960                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2961                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2962                                 NUM_BANKS(ADDR_SURF_16_BANK));
2963
2964                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2965                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2966                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2967                                 NUM_BANKS(ADDR_SURF_16_BANK));
2968
2969                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2970                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2971                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2972                                 NUM_BANKS(ADDR_SURF_16_BANK));
2973
2974                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2975                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2976                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2977                                 NUM_BANKS(ADDR_SURF_16_BANK));
2978
2979                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2980                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2981                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2982                                 NUM_BANKS(ADDR_SURF_16_BANK));
2983
2984                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2985                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2986                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2987                                 NUM_BANKS(ADDR_SURF_16_BANK));
2988
2989                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2990                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2991                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2992                                 NUM_BANKS(ADDR_SURF_16_BANK));
2993
2994                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2995                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2996                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2997                                 NUM_BANKS(ADDR_SURF_16_BANK));
2998
2999                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3000                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3001                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3002                                 NUM_BANKS(ADDR_SURF_16_BANK));
3003
3004                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3005                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3006                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3007                                 NUM_BANKS(ADDR_SURF_8_BANK));
3008
3009                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3010                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3011                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3012                                 NUM_BANKS(ADDR_SURF_4_BANK));
3013
3014                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3015                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3016
3017                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3018                         if (reg_offset != 7)
3019                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3020
3021                 break;
3022         case CHIP_POLARIS10:
3023                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3024                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3025                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3026                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3027                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3028                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3029                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3030                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3031                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3032                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3033                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3034                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3035                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3036                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3037                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3038                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3039                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3040                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3041                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3042                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3043                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3044                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3045                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3046                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3047                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3048                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3049                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3050                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3051                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3052                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3053                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3054                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3055                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3056                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
3057                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3058                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3059                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3060                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3061                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3062                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3063                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3064                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3065                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3066                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3067                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3068                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3069                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3070                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3071                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3072                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3073                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3074                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3075                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3076                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3077                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3078                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3079                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3080                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3081                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3082                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3083                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3084                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3085                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3086                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3087                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3088                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3089                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3090                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3091                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3092                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3093                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3094                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3095                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3096                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3097                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3098                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3099                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3100                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3101                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3102                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3103                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3104                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3105                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3106                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3107                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3108                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3109                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3110                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3111                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3112                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3113                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3114                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3115                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3116                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3117                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3118                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3119                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3120                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3121                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3122                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3123                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3124                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3125                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3126                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3127                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3128                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3129                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3130                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3131                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3132                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3133                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3134                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3135                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3136                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3137                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3138                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3139                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3140                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3141                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3142                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3143                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3144                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3145
3146                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3147                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3148                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3149                                 NUM_BANKS(ADDR_SURF_16_BANK));
3150
3151                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3152                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3153                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3154                                 NUM_BANKS(ADDR_SURF_16_BANK));
3155
3156                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3157                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3158                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3159                                 NUM_BANKS(ADDR_SURF_16_BANK));
3160
3161                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3162                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3163                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3164                                 NUM_BANKS(ADDR_SURF_16_BANK));
3165
3166                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3167                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3168                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3169                                 NUM_BANKS(ADDR_SURF_16_BANK));
3170
3171                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3172                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3173                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3174                                 NUM_BANKS(ADDR_SURF_16_BANK));
3175
3176                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3177                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3178                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3179                                 NUM_BANKS(ADDR_SURF_16_BANK));
3180
3181                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3182                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3183                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3184                                 NUM_BANKS(ADDR_SURF_16_BANK));
3185
3186                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3187                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3188                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3189                                 NUM_BANKS(ADDR_SURF_16_BANK));
3190
3191                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3192                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3193                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3194                                 NUM_BANKS(ADDR_SURF_16_BANK));
3195
3196                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3197                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3198                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3199                                 NUM_BANKS(ADDR_SURF_16_BANK));
3200
3201                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3202                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3203                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3204                                 NUM_BANKS(ADDR_SURF_8_BANK));
3205
3206                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3207                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3208                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3209                                 NUM_BANKS(ADDR_SURF_4_BANK));
3210
3211                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3212                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3213                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3214                                 NUM_BANKS(ADDR_SURF_4_BANK));
3215
3216                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3217                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3218
3219                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3220                         if (reg_offset != 7)
3221                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3222
3223                 break;
3224         case CHIP_STONEY:
3225                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3226                                 PIPE_CONFIG(ADDR_SURF_P2) |
3227                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3228                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3229                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3230                                 PIPE_CONFIG(ADDR_SURF_P2) |
3231                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3232                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3233                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3234                                 PIPE_CONFIG(ADDR_SURF_P2) |
3235                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3236                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3237                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3238                                 PIPE_CONFIG(ADDR_SURF_P2) |
3239                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3240                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3241                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3242                                 PIPE_CONFIG(ADDR_SURF_P2) |
3243                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3244                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3245                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3246                                 PIPE_CONFIG(ADDR_SURF_P2) |
3247                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3248                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3249                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3250                                 PIPE_CONFIG(ADDR_SURF_P2) |
3251                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3252                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3253                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3254                                 PIPE_CONFIG(ADDR_SURF_P2));
3255                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3256                                 PIPE_CONFIG(ADDR_SURF_P2) |
3257                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3258                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3259                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3260                                  PIPE_CONFIG(ADDR_SURF_P2) |
3261                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3262                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3263                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3264                                  PIPE_CONFIG(ADDR_SURF_P2) |
3265                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3266                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3267                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3268                                  PIPE_CONFIG(ADDR_SURF_P2) |
3269                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3270                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3271                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3272                                  PIPE_CONFIG(ADDR_SURF_P2) |
3273                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3274                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3275                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3276                                  PIPE_CONFIG(ADDR_SURF_P2) |
3277                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3278                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3279                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3280                                  PIPE_CONFIG(ADDR_SURF_P2) |
3281                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3282                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3283                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3284                                  PIPE_CONFIG(ADDR_SURF_P2) |
3285                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3286                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3287                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3288                                  PIPE_CONFIG(ADDR_SURF_P2) |
3289                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3290                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3291                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3292                                  PIPE_CONFIG(ADDR_SURF_P2) |
3293                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3294                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3295                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3296                                  PIPE_CONFIG(ADDR_SURF_P2) |
3297                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3298                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3299                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3300                                  PIPE_CONFIG(ADDR_SURF_P2) |
3301                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3302                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3303                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3304                                  PIPE_CONFIG(ADDR_SURF_P2) |
3305                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3306                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3307                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3308                                  PIPE_CONFIG(ADDR_SURF_P2) |
3309                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3310                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3311                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3312                                  PIPE_CONFIG(ADDR_SURF_P2) |
3313                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3314                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3315                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3316                                  PIPE_CONFIG(ADDR_SURF_P2) |
3317                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3318                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3319                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3320                                  PIPE_CONFIG(ADDR_SURF_P2) |
3321                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3322                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3323                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3324                                  PIPE_CONFIG(ADDR_SURF_P2) |
3325                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3326                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3327
3328                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3329                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3330                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3331                                 NUM_BANKS(ADDR_SURF_8_BANK));
3332                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3333                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3334                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3335                                 NUM_BANKS(ADDR_SURF_8_BANK));
3336                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3337                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3338                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3339                                 NUM_BANKS(ADDR_SURF_8_BANK));
3340                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3341                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3342                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3343                                 NUM_BANKS(ADDR_SURF_8_BANK));
3344                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3345                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3346                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3347                                 NUM_BANKS(ADDR_SURF_8_BANK));
3348                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3349                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3350                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3351                                 NUM_BANKS(ADDR_SURF_8_BANK));
3352                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3353                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3354                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3355                                 NUM_BANKS(ADDR_SURF_8_BANK));
3356                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3357                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3358                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3359                                 NUM_BANKS(ADDR_SURF_16_BANK));
3360                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3361                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3362                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3363                                 NUM_BANKS(ADDR_SURF_16_BANK));
3364                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3365                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3366                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3367                                  NUM_BANKS(ADDR_SURF_16_BANK));
3368                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3369                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3370                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3371                                  NUM_BANKS(ADDR_SURF_16_BANK));
3372                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3373                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3374                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3375                                  NUM_BANKS(ADDR_SURF_16_BANK));
3376                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3377                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3378                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3379                                  NUM_BANKS(ADDR_SURF_16_BANK));
3380                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3381                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3382                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3383                                  NUM_BANKS(ADDR_SURF_8_BANK));
3384
3385                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3386                         if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3387                             reg_offset != 23)
3388                                 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3389
3390                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3391                         if (reg_offset != 7)
3392                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3393
3394                 break;
3395         default:
3396                 dev_warn(adev->dev,
3397                          "Unknown chip type (%d) in function gfx_v8_0_tiling_mode_table_init() falling through to CHIP_CARRIZO\n",
3398                          adev->asic_type);
3399
3400         case CHIP_CARRIZO:
3401                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3402                                 PIPE_CONFIG(ADDR_SURF_P2) |
3403                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3404                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3405                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3406                                 PIPE_CONFIG(ADDR_SURF_P2) |
3407                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3408                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3409                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3410                                 PIPE_CONFIG(ADDR_SURF_P2) |
3411                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3412                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3413                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3414                                 PIPE_CONFIG(ADDR_SURF_P2) |
3415                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3416                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3417                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3418                                 PIPE_CONFIG(ADDR_SURF_P2) |
3419                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3420                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3421                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3422                                 PIPE_CONFIG(ADDR_SURF_P2) |
3423                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3424                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3425                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3426                                 PIPE_CONFIG(ADDR_SURF_P2) |
3427                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3428                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3429                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3430                                 PIPE_CONFIG(ADDR_SURF_P2));
3431                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3432                                 PIPE_CONFIG(ADDR_SURF_P2) |
3433                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3434                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3435                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3436                                  PIPE_CONFIG(ADDR_SURF_P2) |
3437                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3438                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3439                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3440                                  PIPE_CONFIG(ADDR_SURF_P2) |
3441                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3442                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3443                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3444                                  PIPE_CONFIG(ADDR_SURF_P2) |
3445                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3446                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3447                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3448                                  PIPE_CONFIG(ADDR_SURF_P2) |
3449                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3450                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3451                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3452                                  PIPE_CONFIG(ADDR_SURF_P2) |
3453                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3454                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3455                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3456                                  PIPE_CONFIG(ADDR_SURF_P2) |
3457                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3458                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3459                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3460                                  PIPE_CONFIG(ADDR_SURF_P2) |
3461                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3462                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3463                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3464                                  PIPE_CONFIG(ADDR_SURF_P2) |
3465                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3466                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3467                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3468                                  PIPE_CONFIG(ADDR_SURF_P2) |
3469                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3470                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3471                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3472                                  PIPE_CONFIG(ADDR_SURF_P2) |
3473                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3474                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3475                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3476                                  PIPE_CONFIG(ADDR_SURF_P2) |
3477                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3478                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3479                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3480                                  PIPE_CONFIG(ADDR_SURF_P2) |
3481                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3482                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3483                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3484                                  PIPE_CONFIG(ADDR_SURF_P2) |
3485                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3486                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3487                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3488                                  PIPE_CONFIG(ADDR_SURF_P2) |
3489                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3490                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3491                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3492                                  PIPE_CONFIG(ADDR_SURF_P2) |
3493                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3494                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3495                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3496                                  PIPE_CONFIG(ADDR_SURF_P2) |
3497                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3498                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3499                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3500                                  PIPE_CONFIG(ADDR_SURF_P2) |
3501                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3502                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3503
3504                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3505                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3506                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3507                                 NUM_BANKS(ADDR_SURF_8_BANK));
3508                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3509                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3510                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3511                                 NUM_BANKS(ADDR_SURF_8_BANK));
3512                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3513                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3514                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3515                                 NUM_BANKS(ADDR_SURF_8_BANK));
3516                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3517                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3518                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3519                                 NUM_BANKS(ADDR_SURF_8_BANK));
3520                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3521                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3522                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3523                                 NUM_BANKS(ADDR_SURF_8_BANK));
3524                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3525                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3526                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3527                                 NUM_BANKS(ADDR_SURF_8_BANK));
3528                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3529                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3530                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3531                                 NUM_BANKS(ADDR_SURF_8_BANK));
3532                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3533                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3534                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3535                                 NUM_BANKS(ADDR_SURF_16_BANK));
3536                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3537                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3538                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3539                                 NUM_BANKS(ADDR_SURF_16_BANK));
3540                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3541                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3542                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3543                                  NUM_BANKS(ADDR_SURF_16_BANK));
3544                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3545                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3546                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3547                                  NUM_BANKS(ADDR_SURF_16_BANK));
3548                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3549                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3550                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3551                                  NUM_BANKS(ADDR_SURF_16_BANK));
3552                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3553                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3554                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3555                                  NUM_BANKS(ADDR_SURF_16_BANK));
3556                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3557                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3558                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3559                                  NUM_BANKS(ADDR_SURF_8_BANK));
3560
3561                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3562                         if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3563                             reg_offset != 23)
3564                                 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3565
3566                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3567                         if (reg_offset != 7)
3568                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3569
3570                 break;
3571         }
3572 }
3573
3574 static void gfx_v8_0_select_se_sh(struct amdgpu_device *adev,
3575                                   u32 se_num, u32 sh_num, u32 instance)
3576 {
3577         u32 data;
3578
3579         if (instance == 0xffffffff)
3580                 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
3581         else
3582                 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
3583
3584         if (se_num == 0xffffffff)
3585                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
3586         else
3587                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
3588
3589         if (sh_num == 0xffffffff)
3590                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
3591         else
3592                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
3593
3594         WREG32(mmGRBM_GFX_INDEX, data);
3595 }
3596
3597 static u32 gfx_v8_0_create_bitmask(u32 bit_width)
3598 {
3599         return (u32)((1ULL << bit_width) - 1);
3600 }
3601
3602 static u32 gfx_v8_0_get_rb_active_bitmap(struct amdgpu_device *adev)
3603 {
3604         u32 data, mask;
3605
3606         data =  RREG32(mmCC_RB_BACKEND_DISABLE) |
3607                 RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3608
3609         data = REG_GET_FIELD(data, GC_USER_RB_BACKEND_DISABLE, BACKEND_DISABLE);
3610
3611         mask = gfx_v8_0_create_bitmask(adev->gfx.config.max_backends_per_se /
3612                                        adev->gfx.config.max_sh_per_se);
3613
3614         return (~data) & mask;
3615 }
3616
3617 static void
3618 gfx_v8_0_raster_config(struct amdgpu_device *adev, u32 *rconf, u32 *rconf1)
3619 {
3620         switch (adev->asic_type) {
3621         case CHIP_FIJI:
3622                 *rconf |= RB_MAP_PKR0(2) | RB_MAP_PKR1(2) |
3623                           RB_XSEL2(1) | PKR_MAP(2) |
3624                           PKR_XSEL(1) | PKR_YSEL(1) |
3625                           SE_MAP(2) | SE_XSEL(2) | SE_YSEL(3);
3626                 *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(3) |
3627                            SE_PAIR_YSEL(2);
3628                 break;
3629         case CHIP_TONGA:
3630         case CHIP_POLARIS10:
3631                 *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3632                           SE_XSEL(1) | SE_YSEL(1);
3633                 *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(2) |
3634                            SE_PAIR_YSEL(2);
3635                 break;
3636         case CHIP_TOPAZ:
3637         case CHIP_CARRIZO:
3638                 *rconf |= RB_MAP_PKR0(2);
3639                 *rconf1 |= 0x0;
3640                 break;
3641         case CHIP_POLARIS11:
3642         case CHIP_POLARIS12:
3643                 *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3644                           SE_XSEL(1) | SE_YSEL(1);
3645                 *rconf1 |= 0x0;
3646                 break;
3647         case CHIP_STONEY:
3648                 *rconf |= 0x0;
3649                 *rconf1 |= 0x0;
3650                 break;
3651         default:
3652                 DRM_ERROR("unknown asic: 0x%x\n", adev->asic_type);
3653                 break;
3654         }
3655 }
3656
3657 static void
3658 gfx_v8_0_write_harvested_raster_configs(struct amdgpu_device *adev,
3659                                         u32 raster_config, u32 raster_config_1,
3660                                         unsigned rb_mask, unsigned num_rb)
3661 {
3662         unsigned sh_per_se = max_t(unsigned, adev->gfx.config.max_sh_per_se, 1);
3663         unsigned num_se = max_t(unsigned, adev->gfx.config.max_shader_engines, 1);
3664         unsigned rb_per_pkr = min_t(unsigned, num_rb / num_se / sh_per_se, 2);
3665         unsigned rb_per_se = num_rb / num_se;
3666         unsigned se_mask[4];
3667         unsigned se;
3668
3669         se_mask[0] = ((1 << rb_per_se) - 1) & rb_mask;
3670         se_mask[1] = (se_mask[0] << rb_per_se) & rb_mask;
3671         se_mask[2] = (se_mask[1] << rb_per_se) & rb_mask;
3672         se_mask[3] = (se_mask[2] << rb_per_se) & rb_mask;
3673
3674         WARN_ON(!(num_se == 1 || num_se == 2 || num_se == 4));
3675         WARN_ON(!(sh_per_se == 1 || sh_per_se == 2));
3676         WARN_ON(!(rb_per_pkr == 1 || rb_per_pkr == 2));
3677
3678         if ((num_se > 2) && ((!se_mask[0] && !se_mask[1]) ||
3679                              (!se_mask[2] && !se_mask[3]))) {
3680                 raster_config_1 &= ~SE_PAIR_MAP_MASK;
3681
3682                 if (!se_mask[0] && !se_mask[1]) {
3683                         raster_config_1 |=
3684                                 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_3);
3685                 } else {
3686                         raster_config_1 |=
3687                                 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_0);
3688                 }
3689         }
3690
3691         for (se = 0; se < num_se; se++) {
3692                 unsigned raster_config_se = raster_config;
3693                 unsigned pkr0_mask = ((1 << rb_per_pkr) - 1) << (se * rb_per_se);
3694                 unsigned pkr1_mask = pkr0_mask << rb_per_pkr;
3695                 int idx = (se / 2) * 2;
3696
3697                 if ((num_se > 1) && (!se_mask[idx] || !se_mask[idx + 1])) {
3698                         raster_config_se &= ~SE_MAP_MASK;
3699
3700                         if (!se_mask[idx]) {
3701                                 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_3);
3702                         } else {
3703                                 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_0);
3704                         }
3705                 }
3706
3707                 pkr0_mask &= rb_mask;
3708                 pkr1_mask &= rb_mask;
3709                 if (rb_per_se > 2 && (!pkr0_mask || !pkr1_mask)) {
3710                         raster_config_se &= ~PKR_MAP_MASK;
3711
3712                         if (!pkr0_mask) {
3713                                 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_3);
3714                         } else {
3715                                 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_0);
3716                         }
3717                 }
3718
3719                 if (rb_per_se >= 2) {
3720                         unsigned rb0_mask = 1 << (se * rb_per_se);
3721                         unsigned rb1_mask = rb0_mask << 1;
3722
3723                         rb0_mask &= rb_mask;
3724                         rb1_mask &= rb_mask;
3725                         if (!rb0_mask || !rb1_mask) {
3726                                 raster_config_se &= ~RB_MAP_PKR0_MASK;
3727
3728                                 if (!rb0_mask) {
3729                                         raster_config_se |=
3730                                                 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_3);
3731                                 } else {
3732                                         raster_config_se |=
3733                                                 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_0);
3734                                 }
3735                         }
3736
3737                         if (rb_per_se > 2) {
3738                                 rb0_mask = 1 << (se * rb_per_se + rb_per_pkr);
3739                                 rb1_mask = rb0_mask << 1;
3740                                 rb0_mask &= rb_mask;
3741                                 rb1_mask &= rb_mask;
3742                                 if (!rb0_mask || !rb1_mask) {
3743                                         raster_config_se &= ~RB_MAP_PKR1_MASK;
3744
3745                                         if (!rb0_mask) {
3746                                                 raster_config_se |=
3747                                                         RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_3);
3748                                         } else {
3749                                                 raster_config_se |=
3750                                                         RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_0);
3751                                         }
3752                                 }
3753                         }
3754                 }
3755
3756                 /* GRBM_GFX_INDEX has a different offset on VI */
3757                 gfx_v8_0_select_se_sh(adev, se, 0xffffffff, 0xffffffff);
3758                 WREG32(mmPA_SC_RASTER_CONFIG, raster_config_se);
3759                 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3760         }
3761
3762         /* GRBM_GFX_INDEX has a different offset on VI */
3763         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3764 }
3765
3766 static void gfx_v8_0_setup_rb(struct amdgpu_device *adev)
3767 {
3768         int i, j;
3769         u32 data;
3770         u32 raster_config = 0, raster_config_1 = 0;
3771         u32 active_rbs = 0;
3772         u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
3773                                         adev->gfx.config.max_sh_per_se;
3774         unsigned num_rb_pipes;
3775
3776         mutex_lock(&adev->grbm_idx_mutex);
3777         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3778                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3779                         gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3780                         data = gfx_v8_0_get_rb_active_bitmap(adev);
3781                         active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
3782                                                rb_bitmap_width_per_sh);
3783                 }
3784         }
3785         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3786
3787         adev->gfx.config.backend_enable_mask = active_rbs;
3788         adev->gfx.config.num_rbs = hweight32(active_rbs);
3789
3790         num_rb_pipes = min_t(unsigned, adev->gfx.config.max_backends_per_se *
3791                              adev->gfx.config.max_shader_engines, 16);
3792
3793         gfx_v8_0_raster_config(adev, &raster_config, &raster_config_1);
3794
3795         if (!adev->gfx.config.backend_enable_mask ||
3796                         adev->gfx.config.num_rbs >= num_rb_pipes) {
3797                 WREG32(mmPA_SC_RASTER_CONFIG, raster_config);
3798                 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3799         } else {
3800                 gfx_v8_0_write_harvested_raster_configs(adev, raster_config, raster_config_1,
3801                                                         adev->gfx.config.backend_enable_mask,
3802                                                         num_rb_pipes);
3803         }
3804
3805         /* cache the values for userspace */
3806         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3807                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3808                         gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3809                         adev->gfx.config.rb_config[i][j].rb_backend_disable =
3810                                 RREG32(mmCC_RB_BACKEND_DISABLE);
3811                         adev->gfx.config.rb_config[i][j].user_rb_backend_disable =
3812                                 RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3813                         adev->gfx.config.rb_config[i][j].raster_config =
3814                                 RREG32(mmPA_SC_RASTER_CONFIG);
3815                         adev->gfx.config.rb_config[i][j].raster_config_1 =
3816                                 RREG32(mmPA_SC_RASTER_CONFIG_1);
3817                 }
3818         }
3819         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3820         mutex_unlock(&adev->grbm_idx_mutex);
3821 }
3822
3823 /**
3824  * gfx_v8_0_init_compute_vmid - gart enable
3825  *
3826  * @rdev: amdgpu_device pointer
3827  *
3828  * Initialize compute vmid sh_mem registers
3829  *
3830  */
3831 #define DEFAULT_SH_MEM_BASES    (0x6000)
3832 #define FIRST_COMPUTE_VMID      (8)
3833 #define LAST_COMPUTE_VMID       (16)
3834 static void gfx_v8_0_init_compute_vmid(struct amdgpu_device *adev)
3835 {
3836         int i;
3837         uint32_t sh_mem_config;
3838         uint32_t sh_mem_bases;
3839
3840         /*
3841          * Configure apertures:
3842          * LDS:         0x60000000'00000000 - 0x60000001'00000000 (4GB)
3843          * Scratch:     0x60000001'00000000 - 0x60000002'00000000 (4GB)
3844          * GPUVM:       0x60010000'00000000 - 0x60020000'00000000 (1TB)
3845          */
3846         sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
3847
3848         sh_mem_config = SH_MEM_ADDRESS_MODE_HSA64 <<
3849                         SH_MEM_CONFIG__ADDRESS_MODE__SHIFT |
3850                         SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
3851                         SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT |
3852                         MTYPE_CC << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT |
3853                         SH_MEM_CONFIG__PRIVATE_ATC_MASK;
3854
3855         mutex_lock(&adev->srbm_mutex);
3856         for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
3857                 vi_srbm_select(adev, 0, 0, 0, i);
3858                 /* CP and shaders */
3859                 WREG32(mmSH_MEM_CONFIG, sh_mem_config);
3860                 WREG32(mmSH_MEM_APE1_BASE, 1);
3861                 WREG32(mmSH_MEM_APE1_LIMIT, 0);
3862                 WREG32(mmSH_MEM_BASES, sh_mem_bases);
3863         }
3864         vi_srbm_select(adev, 0, 0, 0, 0);
3865         mutex_unlock(&adev->srbm_mutex);
3866 }
3867
3868 static void gfx_v8_0_config_init(struct amdgpu_device *adev)
3869 {
3870         switch (adev->asic_type) {
3871         default:
3872                 adev->gfx.config.double_offchip_lds_buf = 1;
3873                 break;
3874         case CHIP_CARRIZO:
3875         case CHIP_STONEY:
3876                 adev->gfx.config.double_offchip_lds_buf = 0;
3877                 break;
3878         }
3879 }
3880
3881 static void gfx_v8_0_gpu_init(struct amdgpu_device *adev)
3882 {
3883         u32 tmp, sh_static_mem_cfg;
3884         int i;
3885
3886         WREG32_FIELD(GRBM_CNTL, READ_TIMEOUT, 0xFF);
3887         WREG32(mmGB_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3888         WREG32(mmHDP_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3889         WREG32(mmDMIF_ADDR_CALC, adev->gfx.config.gb_addr_config);
3890
3891         gfx_v8_0_tiling_mode_table_init(adev);
3892         gfx_v8_0_setup_rb(adev);
3893         gfx_v8_0_get_cu_info(adev);
3894         gfx_v8_0_config_init(adev);
3895
3896         /* XXX SH_MEM regs */
3897         /* where to put LDS, scratch, GPUVM in FSA64 space */
3898         sh_static_mem_cfg = REG_SET_FIELD(0, SH_STATIC_MEM_CONFIG,
3899                                    SWIZZLE_ENABLE, 1);
3900         sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG,
3901                                    ELEMENT_SIZE, 1);
3902         sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG,
3903                                    INDEX_STRIDE, 3);
3904         mutex_lock(&adev->srbm_mutex);
3905         for (i = 0; i < adev->vm_manager.id_mgr[0].num_ids; i++) {
3906                 vi_srbm_select(adev, 0, 0, 0, i);
3907                 /* CP and shaders */
3908                 if (i == 0) {
3909                         tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_UC);
3910                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
3911                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3912                                             SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3913                         WREG32(mmSH_MEM_CONFIG, tmp);
3914                         WREG32(mmSH_MEM_BASES, 0);
3915                 } else {
3916                         tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_NC);
3917                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
3918                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3919                                             SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3920                         WREG32(mmSH_MEM_CONFIG, tmp);
3921                         tmp = adev->mc.shared_aperture_start >> 48;
3922                         WREG32(mmSH_MEM_BASES, tmp);
3923                 }
3924
3925                 WREG32(mmSH_MEM_APE1_BASE, 1);
3926                 WREG32(mmSH_MEM_APE1_LIMIT, 0);
3927                 WREG32(mmSH_STATIC_MEM_CONFIG, sh_static_mem_cfg);
3928         }
3929         vi_srbm_select(adev, 0, 0, 0, 0);
3930         mutex_unlock(&adev->srbm_mutex);
3931
3932         gfx_v8_0_init_compute_vmid(adev);
3933
3934         mutex_lock(&adev->grbm_idx_mutex);
3935         /*
3936          * making sure that the following register writes will be broadcasted
3937          * to all the shaders
3938          */
3939         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3940
3941         WREG32(mmPA_SC_FIFO_SIZE,
3942                    (adev->gfx.config.sc_prim_fifo_size_frontend <<
3943                         PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) |
3944                    (adev->gfx.config.sc_prim_fifo_size_backend <<
3945                         PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT) |
3946                    (adev->gfx.config.sc_hiz_tile_fifo_size <<
3947                         PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) |
3948                    (adev->gfx.config.sc_earlyz_tile_fifo_size <<
3949                         PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT));
3950
3951         tmp = RREG32(mmSPI_ARB_PRIORITY);
3952         tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS0, 2);
3953         tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS1, 2);
3954         tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS2, 2);
3955         tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS3, 2);
3956         WREG32(mmSPI_ARB_PRIORITY, tmp);
3957
3958         mutex_unlock(&adev->grbm_idx_mutex);
3959
3960 }
3961
3962 static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
3963 {
3964         u32 i, j, k;
3965         u32 mask;
3966
3967         mutex_lock(&adev->grbm_idx_mutex);
3968         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3969                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3970                         gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3971                         for (k = 0; k < adev->usec_timeout; k++) {
3972                                 if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0)
3973                                         break;
3974                                 udelay(1);
3975                         }
3976                 }
3977         }
3978         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3979         mutex_unlock(&adev->grbm_idx_mutex);
3980
3981         mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
3982                 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
3983                 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
3984                 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
3985         for (k = 0; k < adev->usec_timeout; k++) {
3986                 if ((RREG32(mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
3987                         break;
3988                 udelay(1);
3989         }
3990 }
3991
3992 static void gfx_v8_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
3993                                                bool enable)
3994 {
3995         u32 tmp = RREG32(mmCP_INT_CNTL_RING0);
3996
3997         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
3998         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
3999         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
4000         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
4001
4002         WREG32(mmCP_INT_CNTL_RING0, tmp);
4003 }
4004
4005 static void gfx_v8_0_init_csb(struct amdgpu_device *adev)
4006 {
4007         /* csib */
4008         WREG32(mmRLC_CSIB_ADDR_HI,
4009                         adev->gfx.rlc.clear_state_gpu_addr >> 32);
4010         WREG32(mmRLC_CSIB_ADDR_LO,
4011                         adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
4012         WREG32(mmRLC_CSIB_LENGTH,
4013                         adev->gfx.rlc.clear_state_size);
4014 }
4015
4016 static void gfx_v8_0_parse_ind_reg_list(int *register_list_format,
4017                                 int ind_offset,
4018                                 int list_size,
4019                                 int *unique_indices,
4020                                 int *indices_count,
4021                                 int max_indices,
4022                                 int *ind_start_offsets,
4023                                 int *offset_count,
4024                                 int max_offset)
4025 {
4026         int indices;
4027         bool new_entry = true;
4028
4029         for (; ind_offset < list_size; ind_offset++) {
4030
4031                 if (new_entry) {
4032                         new_entry = false;
4033                         ind_start_offsets[*offset_count] = ind_offset;
4034                         *offset_count = *offset_count + 1;
4035                         BUG_ON(*offset_count >= max_offset);
4036                 }
4037
4038                 if (register_list_format[ind_offset] == 0xFFFFFFFF) {
4039                         new_entry = true;
4040                         continue;
4041                 }
4042
4043                 ind_offset += 2;
4044
4045                 /* look for the matching indice */
4046                 for (indices = 0;
4047                         indices < *indices_count;
4048                         indices++) {
4049                         if (unique_indices[indices] ==
4050                                 register_list_format[ind_offset])
4051                                 break;
4052                 }
4053
4054                 if (indices >= *indices_count) {
4055                         unique_indices[*indices_count] =
4056                                 register_list_format[ind_offset];
4057                         indices = *indices_count;
4058                         *indices_count = *indices_count + 1;
4059                         BUG_ON(*indices_count >= max_indices);
4060                 }
4061
4062                 register_list_format[ind_offset] = indices;
4063         }
4064 }
4065
4066 static int gfx_v8_0_init_save_restore_list(struct amdgpu_device *adev)
4067 {
4068         int i, temp, data;
4069         int unique_indices[] = {0, 0, 0, 0, 0, 0, 0, 0};
4070         int indices_count = 0;
4071         int indirect_start_offsets[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
4072         int offset_count = 0;
4073
4074         int list_size;
4075         unsigned int *register_list_format =
4076                 kmalloc(adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
4077         if (!register_list_format)
4078                 return -ENOMEM;
4079         memcpy(register_list_format, adev->gfx.rlc.register_list_format,
4080                         adev->gfx.rlc.reg_list_format_size_bytes);
4081
4082         gfx_v8_0_parse_ind_reg_list(register_list_format,
4083                                 RLC_FormatDirectRegListLength,
4084                                 adev->gfx.rlc.reg_list_format_size_bytes >> 2,
4085                                 unique_indices,
4086                                 &indices_count,
4087                                 sizeof(unique_indices) / sizeof(int),
4088                                 indirect_start_offsets,
4089                                 &offset_count,
4090                                 sizeof(indirect_start_offsets)/sizeof(int));
4091
4092         /* save and restore list */
4093         WREG32_FIELD(RLC_SRM_CNTL, AUTO_INCR_ADDR, 1);
4094
4095         WREG32(mmRLC_SRM_ARAM_ADDR, 0);
4096         for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
4097                 WREG32(mmRLC_SRM_ARAM_DATA, adev->gfx.rlc.register_restore[i]);
4098
4099         /* indirect list */
4100         WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_list_format_start);
4101         for (i = 0; i < adev->gfx.rlc.reg_list_format_size_bytes >> 2; i++)
4102                 WREG32(mmRLC_GPM_SCRATCH_DATA, register_list_format[i]);
4103
4104         list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
4105         list_size = list_size >> 1;
4106         WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_restore_list_size);
4107         WREG32(mmRLC_GPM_SCRATCH_DATA, list_size);
4108
4109         /* starting offsets starts */
4110         WREG32(mmRLC_GPM_SCRATCH_ADDR,
4111                 adev->gfx.rlc.starting_offsets_start);
4112         for (i = 0; i < sizeof(indirect_start_offsets)/sizeof(int); i++)
4113                 WREG32(mmRLC_GPM_SCRATCH_DATA,
4114                                 indirect_start_offsets[i]);
4115
4116         /* unique indices */
4117         temp = mmRLC_SRM_INDEX_CNTL_ADDR_0;
4118         data = mmRLC_SRM_INDEX_CNTL_DATA_0;
4119         for (i = 0; i < sizeof(unique_indices) / sizeof(int); i++) {
4120                 if (unique_indices[i] != 0) {
4121                         WREG32(temp + i, unique_indices[i] & 0x3FFFF);
4122                         WREG32(data + i, unique_indices[i] >> 20);
4123                 }
4124         }
4125         kfree(register_list_format);
4126
4127         return 0;
4128 }
4129
4130 static void gfx_v8_0_enable_save_restore_machine(struct amdgpu_device *adev)
4131 {
4132         WREG32_FIELD(RLC_SRM_CNTL, SRM_ENABLE, 1);
4133 }
4134
4135 static void gfx_v8_0_init_power_gating(struct amdgpu_device *adev)
4136 {
4137         uint32_t data;
4138
4139         WREG32_FIELD(CP_RB_WPTR_POLL_CNTL, IDLE_POLL_COUNT, 0x60);
4140
4141         data = REG_SET_FIELD(0, RLC_PG_DELAY, POWER_UP_DELAY, 0x10);
4142         data = REG_SET_FIELD(data, RLC_PG_DELAY, POWER_DOWN_DELAY, 0x10);
4143         data = REG_SET_FIELD(data, RLC_PG_DELAY, CMD_PROPAGATE_DELAY, 0x10);
4144         data = REG_SET_FIELD(data, RLC_PG_DELAY, MEM_SLEEP_DELAY, 0x10);
4145         WREG32(mmRLC_PG_DELAY, data);
4146
4147         WREG32_FIELD(RLC_PG_DELAY_2, SERDES_CMD_DELAY, 0x3);
4148         WREG32_FIELD(RLC_AUTO_PG_CTRL, GRBM_REG_SAVE_GFX_IDLE_THRESHOLD, 0x55f0);
4149
4150 }
4151
4152 static void cz_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
4153                                                 bool enable)
4154 {
4155         WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PU_ENABLE, enable ? 1 : 0);
4156 }
4157
4158 static void cz_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
4159                                                   bool enable)
4160 {
4161         WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PD_ENABLE, enable ? 1 : 0);
4162 }
4163
4164 static void cz_enable_cp_power_gating(struct amdgpu_device *adev, bool enable)
4165 {
4166         WREG32_FIELD(RLC_PG_CNTL, CP_PG_DISABLE, enable ? 0 : 1);
4167 }
4168
4169 static void gfx_v8_0_init_pg(struct amdgpu_device *adev)
4170 {
4171         if ((adev->asic_type == CHIP_CARRIZO) ||
4172             (adev->asic_type == CHIP_STONEY)) {
4173                 gfx_v8_0_init_csb(adev);
4174                 gfx_v8_0_init_save_restore_list(adev);
4175                 gfx_v8_0_enable_save_restore_machine(adev);
4176                 WREG32(mmRLC_JUMP_TABLE_RESTORE, adev->gfx.rlc.cp_table_gpu_addr >> 8);
4177                 gfx_v8_0_init_power_gating(adev);
4178                 WREG32(mmRLC_PG_ALWAYS_ON_CU_MASK, adev->gfx.cu_info.ao_cu_mask);
4179         } else if ((adev->asic_type == CHIP_POLARIS11) ||
4180                    (adev->asic_type == CHIP_POLARIS12)) {
4181                 gfx_v8_0_init_csb(adev);
4182                 gfx_v8_0_init_save_restore_list(adev);
4183                 gfx_v8_0_enable_save_restore_machine(adev);
4184                 gfx_v8_0_init_power_gating(adev);
4185         }
4186
4187 }
4188
4189 static void gfx_v8_0_rlc_stop(struct amdgpu_device *adev)
4190 {
4191         WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 0);
4192
4193         gfx_v8_0_enable_gui_idle_interrupt(adev, false);
4194         gfx_v8_0_wait_for_rlc_serdes(adev);
4195 }
4196
4197 static void gfx_v8_0_rlc_reset(struct amdgpu_device *adev)
4198 {
4199         WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
4200         udelay(50);
4201
4202         WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
4203         udelay(50);
4204 }
4205
4206 static void gfx_v8_0_rlc_start(struct amdgpu_device *adev)
4207 {
4208         WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 1);
4209
4210         /* carrizo do enable cp interrupt after cp inited */
4211         if (!(adev->flags & AMD_IS_APU))
4212                 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
4213
4214         udelay(50);
4215 }
4216
4217 static int gfx_v8_0_rlc_load_microcode(struct amdgpu_device *adev)
4218 {
4219         const struct rlc_firmware_header_v2_0 *hdr;
4220         const __le32 *fw_data;
4221         unsigned i, fw_size;
4222
4223         if (!adev->gfx.rlc_fw)
4224                 return -EINVAL;
4225
4226         hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
4227         amdgpu_ucode_print_rlc_hdr(&hdr->header);
4228
4229         fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
4230                            le32_to_cpu(hdr->header.ucode_array_offset_bytes));
4231         fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
4232
4233         WREG32(mmRLC_GPM_UCODE_ADDR, 0);
4234         for (i = 0; i < fw_size; i++)
4235                 WREG32(mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
4236         WREG32(mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
4237
4238         return 0;
4239 }
4240
4241 static int gfx_v8_0_rlc_resume(struct amdgpu_device *adev)
4242 {
4243         int r;
4244         u32 tmp;
4245
4246         gfx_v8_0_rlc_stop(adev);
4247
4248         /* disable CG */
4249         tmp = RREG32(mmRLC_CGCG_CGLS_CTRL);
4250         tmp &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
4251                  RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
4252         WREG32(mmRLC_CGCG_CGLS_CTRL, tmp);
4253         if (adev->asic_type == CHIP_POLARIS11 ||
4254             adev->asic_type == CHIP_POLARIS10 ||
4255             adev->asic_type == CHIP_POLARIS12) {
4256                 tmp = RREG32(mmRLC_CGCG_CGLS_CTRL_3D);
4257                 tmp &= ~0x3;
4258                 WREG32(mmRLC_CGCG_CGLS_CTRL_3D, tmp);
4259         }
4260
4261         /* disable PG */
4262         WREG32(mmRLC_PG_CNTL, 0);
4263
4264         gfx_v8_0_rlc_reset(adev);
4265         gfx_v8_0_init_pg(adev);
4266
4267         if (!adev->pp_enabled) {
4268                 if (adev->firmware.load_type != AMDGPU_FW_LOAD_SMU) {
4269                         /* legacy rlc firmware loading */
4270                         r = gfx_v8_0_rlc_load_microcode(adev);
4271                         if (r)
4272                                 return r;
4273                 } else {
4274                         r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
4275                                                         AMDGPU_UCODE_ID_RLC_G);
4276                         if (r)
4277                                 return -EINVAL;
4278                 }
4279         }
4280
4281         gfx_v8_0_rlc_start(adev);
4282
4283         return 0;
4284 }
4285
4286 static void gfx_v8_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
4287 {
4288         int i;
4289         u32 tmp = RREG32(mmCP_ME_CNTL);
4290
4291         if (enable) {
4292                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 0);
4293                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 0);
4294                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 0);
4295         } else {
4296                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 1);
4297                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 1);
4298                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 1);
4299                 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
4300                         adev->gfx.gfx_ring[i].ready = false;
4301         }
4302         WREG32(mmCP_ME_CNTL, tmp);
4303         udelay(50);
4304 }
4305
4306 static int gfx_v8_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
4307 {
4308         const struct gfx_firmware_header_v1_0 *pfp_hdr;
4309         const struct gfx_firmware_header_v1_0 *ce_hdr;
4310         const struct gfx_firmware_header_v1_0 *me_hdr;
4311         const __le32 *fw_data;
4312         unsigned i, fw_size;
4313
4314         if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
4315                 return -EINVAL;
4316
4317         pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
4318                 adev->gfx.pfp_fw->data;
4319         ce_hdr = (const struct gfx_firmware_header_v1_0 *)
4320                 adev->gfx.ce_fw->data;
4321         me_hdr = (const struct gfx_firmware_header_v1_0 *)
4322                 adev->gfx.me_fw->data;
4323
4324         amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
4325         amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
4326         amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
4327
4328         gfx_v8_0_cp_gfx_enable(adev, false);
4329
4330         /* PFP */
4331         fw_data = (const __le32 *)
4332                 (adev->gfx.pfp_fw->data +
4333                  le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
4334         fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
4335         WREG32(mmCP_PFP_UCODE_ADDR, 0);
4336         for (i = 0; i < fw_size; i++)
4337                 WREG32(mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
4338         WREG32(mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
4339
4340         /* CE */
4341         fw_data = (const __le32 *)
4342                 (adev->gfx.ce_fw->data +
4343                  le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
4344         fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
4345         WREG32(mmCP_CE_UCODE_ADDR, 0);
4346         for (i = 0; i < fw_size; i++)
4347                 WREG32(mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
4348         WREG32(mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
4349
4350         /* ME */
4351         fw_data = (const __le32 *)
4352                 (adev->gfx.me_fw->data +
4353                  le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
4354         fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
4355         WREG32(mmCP_ME_RAM_WADDR, 0);
4356         for (i = 0; i < fw_size; i++)
4357                 WREG32(mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
4358         WREG32(mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version);
4359
4360         return 0;
4361 }
4362
4363 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev)
4364 {
4365         u32 count = 0;
4366         const struct cs_section_def *sect = NULL;
4367         const struct cs_extent_def *ext = NULL;
4368
4369         /* begin clear state */
4370         count += 2;
4371         /* context control state */
4372         count += 3;
4373
4374         for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4375                 for (ext = sect->section; ext->extent != NULL; ++ext) {
4376                         if (sect->id == SECT_CONTEXT)
4377                                 count += 2 + ext->reg_count;
4378                         else
4379                                 return 0;
4380                 }
4381         }
4382         /* pa_sc_raster_config/pa_sc_raster_config1 */
4383         count += 4;
4384         /* end clear state */
4385         count += 2;
4386         /* clear state */
4387         count += 2;
4388
4389         return count;
4390 }
4391
4392 static int gfx_v8_0_cp_gfx_start(struct amdgpu_device *adev)
4393 {
4394         struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
4395         const struct cs_section_def *sect = NULL;
4396         const struct cs_extent_def *ext = NULL;
4397         int r, i;
4398
4399         /* init the CP */
4400         WREG32(mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
4401         WREG32(mmCP_ENDIAN_SWAP, 0);
4402         WREG32(mmCP_DEVICE_ID, 1);
4403
4404         gfx_v8_0_cp_gfx_enable(adev, true);
4405
4406         r = amdgpu_ring_alloc(ring, gfx_v8_0_get_csb_size(adev) + 4);
4407         if (r) {
4408                 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
4409                 return r;
4410         }
4411
4412         /* clear state buffer */
4413         amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4414         amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
4415
4416         amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4417         amdgpu_ring_write(ring, 0x80000000);
4418         amdgpu_ring_write(ring, 0x80000000);
4419
4420         for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4421                 for (ext = sect->section; ext->extent != NULL; ++ext) {
4422                         if (sect->id == SECT_CONTEXT) {
4423                                 amdgpu_ring_write(ring,
4424                                        PACKET3(PACKET3_SET_CONTEXT_REG,
4425                                                ext->reg_count));
4426                                 amdgpu_ring_write(ring,
4427                                        ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
4428                                 for (i = 0; i < ext->reg_count; i++)
4429                                         amdgpu_ring_write(ring, ext->extent[i]);
4430                         }
4431                 }
4432         }
4433
4434         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
4435         amdgpu_ring_write(ring, mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
4436         switch (adev->asic_type) {
4437         case CHIP_TONGA:
4438         case CHIP_POLARIS10:
4439                 amdgpu_ring_write(ring, 0x16000012);
4440                 amdgpu_ring_write(ring, 0x0000002A);
4441                 break;
4442         case CHIP_POLARIS11:
4443         case CHIP_POLARIS12:
4444                 amdgpu_ring_write(ring, 0x16000012);
4445                 amdgpu_ring_write(ring, 0x00000000);
4446                 break;
4447         case CHIP_FIJI:
4448                 amdgpu_ring_write(ring, 0x3a00161a);
4449                 amdgpu_ring_write(ring, 0x0000002e);
4450                 break;
4451         case CHIP_CARRIZO:
4452                 amdgpu_ring_write(ring, 0x00000002);
4453                 amdgpu_ring_write(ring, 0x00000000);
4454                 break;
4455         case CHIP_TOPAZ:
4456                 amdgpu_ring_write(ring, adev->gfx.config.num_rbs == 1 ?
4457                                 0x00000000 : 0x00000002);
4458                 amdgpu_ring_write(ring, 0x00000000);
4459                 break;
4460         case CHIP_STONEY:
4461                 amdgpu_ring_write(ring, 0x00000000);
4462                 amdgpu_ring_write(ring, 0x00000000);
4463                 break;
4464         default:
4465                 BUG();
4466         }
4467
4468         amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4469         amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
4470
4471         amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
4472         amdgpu_ring_write(ring, 0);
4473
4474         /* init the CE partitions */
4475         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
4476         amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
4477         amdgpu_ring_write(ring, 0x8000);
4478         amdgpu_ring_write(ring, 0x8000);
4479
4480         amdgpu_ring_commit(ring);
4481
4482         return 0;
4483 }
4484
4485 static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev)
4486 {
4487         struct amdgpu_ring *ring;
4488         u32 tmp;
4489         u32 rb_bufsz;
4490         u64 rb_addr, rptr_addr, wptr_gpu_addr;
4491         int r;
4492
4493         /* Set the write pointer delay */
4494         WREG32(mmCP_RB_WPTR_DELAY, 0);
4495
4496         /* set the RB to use vmid 0 */
4497         WREG32(mmCP_RB_VMID, 0);
4498
4499         /* Set ring buffer size */
4500         ring = &adev->gfx.gfx_ring[0];
4501         rb_bufsz = order_base_2(ring->ring_size / 8);
4502         tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
4503         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
4504         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MTYPE, 3);
4505         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MIN_IB_AVAILSZ, 1);
4506 #ifdef __BIG_ENDIAN
4507         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
4508 #endif
4509         WREG32(mmCP_RB0_CNTL, tmp);
4510
4511         /* Initialize the ring buffer's read and write pointers */
4512         WREG32(mmCP_RB0_CNTL, tmp | CP_RB0_CNTL__RB_RPTR_WR_ENA_MASK);
4513         ring->wptr = 0;
4514         WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
4515
4516         /* set the wb address wether it's enabled or not */
4517         rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4518         WREG32(mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
4519         WREG32(mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 0xFF);
4520
4521         wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4522         WREG32(mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr));
4523         WREG32(mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr));
4524         mdelay(1);
4525         WREG32(mmCP_RB0_CNTL, tmp);
4526
4527         rb_addr = ring->gpu_addr >> 8;
4528         WREG32(mmCP_RB0_BASE, rb_addr);
4529         WREG32(mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
4530
4531         /* no gfx doorbells on iceland */
4532         if (adev->asic_type != CHIP_TOPAZ) {
4533                 tmp = RREG32(mmCP_RB_DOORBELL_CONTROL);
4534                 if (ring->use_doorbell) {
4535                         tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4536                                             DOORBELL_OFFSET, ring->doorbell_index);
4537                         tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4538                                             DOORBELL_HIT, 0);
4539                         tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4540                                             DOORBELL_EN, 1);
4541                 } else {
4542                         tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4543                                             DOORBELL_EN, 0);
4544                 }
4545                 WREG32(mmCP_RB_DOORBELL_CONTROL, tmp);
4546
4547                 if (adev->asic_type == CHIP_TONGA) {
4548                         tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
4549                                             DOORBELL_RANGE_LOWER,
4550                                             AMDGPU_DOORBELL_GFX_RING0);
4551                         WREG32(mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
4552
4553                         WREG32(mmCP_RB_DOORBELL_RANGE_UPPER,
4554                                CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
4555                 }
4556
4557         }
4558
4559         /* start the ring */
4560         amdgpu_ring_clear_ring(ring);
4561         gfx_v8_0_cp_gfx_start(adev);
4562         ring->ready = true;
4563         r = amdgpu_ring_test_ring(ring);
4564         if (r)
4565                 ring->ready = false;
4566
4567         return r;
4568 }
4569
4570 static void gfx_v8_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
4571 {
4572         int i;
4573
4574         if (enable) {
4575                 WREG32(mmCP_MEC_CNTL, 0);
4576         } else {
4577                 WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
4578                 for (i = 0; i < adev->gfx.num_compute_rings; i++)
4579                         adev->gfx.compute_ring[i].ready = false;
4580                 adev->gfx.kiq.ring.ready = false;
4581         }
4582         udelay(50);
4583 }
4584
4585 static int gfx_v8_0_cp_compute_load_microcode(struct amdgpu_device *adev)
4586 {
4587         const struct gfx_firmware_header_v1_0 *mec_hdr;
4588         const __le32 *fw_data;
4589         unsigned i, fw_size;
4590
4591         if (!adev->gfx.mec_fw)
4592                 return -EINVAL;
4593
4594         gfx_v8_0_cp_compute_enable(adev, false);
4595
4596         mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
4597         amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
4598
4599         fw_data = (const __le32 *)
4600                 (adev->gfx.mec_fw->data +
4601                  le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
4602         fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
4603
4604         /* MEC1 */
4605         WREG32(mmCP_MEC_ME1_UCODE_ADDR, 0);
4606         for (i = 0; i < fw_size; i++)
4607                 WREG32(mmCP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data+i));
4608         WREG32(mmCP_MEC_ME1_UCODE_ADDR, adev->gfx.mec_fw_version);
4609
4610         /* Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */
4611         if (adev->gfx.mec2_fw) {
4612                 const struct gfx_firmware_header_v1_0 *mec2_hdr;
4613
4614                 mec2_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
4615                 amdgpu_ucode_print_gfx_hdr(&mec2_hdr->header);
4616
4617                 fw_data = (const __le32 *)
4618                         (adev->gfx.mec2_fw->data +
4619                          le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes));
4620                 fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4;
4621
4622                 WREG32(mmCP_MEC_ME2_UCODE_ADDR, 0);
4623                 for (i = 0; i < fw_size; i++)
4624                         WREG32(mmCP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data+i));
4625                 WREG32(mmCP_MEC_ME2_UCODE_ADDR, adev->gfx.mec2_fw_version);
4626         }
4627
4628         return 0;
4629 }
4630
4631 static void gfx_v8_0_cp_compute_fini(struct amdgpu_device *adev)
4632 {
4633         int i, r;
4634
4635         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4636                 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4637
4638                 if (ring->mqd_obj) {
4639                         r = amdgpu_bo_reserve(ring->mqd_obj, false);
4640                         if (unlikely(r != 0))
4641                                 dev_warn(adev->dev, "(%d) reserve MQD bo failed\n", r);
4642
4643                         amdgpu_bo_unpin(ring->mqd_obj);
4644                         amdgpu_bo_unreserve(ring->mqd_obj);
4645
4646                         amdgpu_bo_unref(&ring->mqd_obj);
4647                         ring->mqd_obj = NULL;
4648                         ring->mqd_ptr = NULL;
4649                         ring->mqd_gpu_addr = 0;
4650                 }
4651         }
4652 }
4653
4654 /* KIQ functions */
4655 static void gfx_v8_0_kiq_setting(struct amdgpu_ring *ring)
4656 {
4657         uint32_t tmp;
4658         struct amdgpu_device *adev = ring->adev;
4659
4660         /* tell RLC which is KIQ queue */
4661         tmp = RREG32(mmRLC_CP_SCHEDULERS);
4662         tmp &= 0xffffff00;
4663         tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
4664         WREG32(mmRLC_CP_SCHEDULERS, tmp);
4665         tmp |= 0x80;
4666         WREG32(mmRLC_CP_SCHEDULERS, tmp);
4667 }
4668
4669 static void gfx_v8_0_kiq_enable(struct amdgpu_ring *ring)
4670 {
4671         amdgpu_ring_alloc(ring, 8);
4672         /* set resources */
4673         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_RESOURCES, 6));
4674         amdgpu_ring_write(ring, 0);     /* vmid_mask:0 queue_type:0 (KIQ) */
4675         amdgpu_ring_write(ring, 0x000000FF);    /* queue mask lo */
4676         amdgpu_ring_write(ring, 0);     /* queue mask hi */
4677         amdgpu_ring_write(ring, 0);     /* gws mask lo */
4678         amdgpu_ring_write(ring, 0);     /* gws mask hi */
4679         amdgpu_ring_write(ring, 0);     /* oac mask */
4680         amdgpu_ring_write(ring, 0);     /* gds heap base:0, gds heap size:0 */
4681         amdgpu_ring_commit(ring);
4682         udelay(50);
4683 }
4684
4685 static void gfx_v8_0_map_queue_enable(struct amdgpu_ring *kiq_ring,
4686                                    struct amdgpu_ring *ring)
4687 {
4688         struct amdgpu_device *adev = kiq_ring->adev;
4689         uint64_t mqd_addr, wptr_addr;
4690
4691         mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
4692         wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4693         amdgpu_ring_alloc(kiq_ring, 8);
4694
4695         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
4696         /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
4697         amdgpu_ring_write(kiq_ring, 0x21010000);
4698         amdgpu_ring_write(kiq_ring, (ring->doorbell_index << 2) |
4699                         (ring->queue << 26) |
4700                         (ring->pipe << 29) |
4701                         ((ring->me == 1 ? 0 : 1) << 31)); /* doorbell */
4702         amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
4703         amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
4704         amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
4705         amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
4706         amdgpu_ring_commit(kiq_ring);
4707         udelay(50);
4708 }
4709
4710 static int gfx_v8_0_mqd_init(struct amdgpu_ring *ring)
4711 {
4712         struct amdgpu_device *adev = ring->adev;
4713         struct vi_mqd *mqd = ring->mqd_ptr;
4714         uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
4715         uint32_t tmp;
4716
4717         mqd->header = 0xC0310800;
4718         mqd->compute_pipelinestat_enable = 0x00000001;
4719         mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
4720         mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
4721         mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
4722         mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
4723         mqd->compute_misc_reserved = 0x00000003;
4724
4725         eop_base_addr = ring->eop_gpu_addr >> 8;
4726         mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
4727         mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
4728
4729         /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4730         tmp = RREG32(mmCP_HQD_EOP_CONTROL);
4731         tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
4732                         (order_base_2(MEC_HPD_SIZE / 4) - 1));
4733
4734         mqd->cp_hqd_eop_control = tmp;
4735
4736         /* enable doorbell? */
4737         tmp = REG_SET_FIELD(RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL),
4738                             CP_HQD_PQ_DOORBELL_CONTROL,
4739                             DOORBELL_EN,
4740                             ring->use_doorbell ? 1 : 0);
4741
4742         mqd->cp_hqd_pq_doorbell_control = tmp;
4743
4744         /* disable the queue if it's active */
4745         mqd->cp_hqd_dequeue_request = 0;
4746         mqd->cp_hqd_pq_rptr = 0;
4747         mqd->cp_hqd_pq_wptr = 0;
4748
4749         /* set the pointer to the MQD */
4750         mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
4751         mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
4752
4753         /* set MQD vmid to 0 */
4754         tmp = RREG32(mmCP_MQD_CONTROL);
4755         tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
4756         mqd->cp_mqd_control = tmp;
4757
4758         /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4759         hqd_gpu_addr = ring->gpu_addr >> 8;
4760         mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
4761         mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4762
4763         /* set up the HQD, this is similar to CP_RB0_CNTL */
4764         tmp = RREG32(mmCP_HQD_PQ_CONTROL);
4765         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
4766                             (order_base_2(ring->ring_size / 4) - 1));
4767         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
4768                         ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
4769 #ifdef __BIG_ENDIAN
4770         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
4771 #endif
4772         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
4773         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
4774         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
4775         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
4776         mqd->cp_hqd_pq_control = tmp;
4777
4778         /* set the wb address whether it's enabled or not */
4779         wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4780         mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
4781         mqd->cp_hqd_pq_rptr_report_addr_hi =
4782                 upper_32_bits(wb_gpu_addr) & 0xffff;
4783
4784         /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
4785         wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4786         mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
4787         mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4788
4789         tmp = 0;
4790         /* enable the doorbell if requested */
4791         if (ring->use_doorbell) {
4792                 tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
4793                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4794                                 DOORBELL_OFFSET, ring->doorbell_index);
4795
4796                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4797                                          DOORBELL_EN, 1);
4798                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4799                                          DOORBELL_SOURCE, 0);
4800                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4801                                          DOORBELL_HIT, 0);
4802         }
4803
4804         mqd->cp_hqd_pq_doorbell_control = tmp;
4805
4806         /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4807         ring->wptr = 0;
4808         mqd->cp_hqd_pq_wptr = ring->wptr;
4809         mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
4810
4811         /* set the vmid for the queue */
4812         mqd->cp_hqd_vmid = 0;
4813
4814         tmp = RREG32(mmCP_HQD_PERSISTENT_STATE);
4815         tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
4816         mqd->cp_hqd_persistent_state = tmp;
4817
4818         /* activate the queue */
4819         mqd->cp_hqd_active = 1;
4820
4821         return 0;
4822 }
4823
4824 static int gfx_v8_0_kiq_init_register(struct amdgpu_ring *ring)
4825 {
4826         struct amdgpu_device *adev = ring->adev;
4827         struct vi_mqd *mqd = ring->mqd_ptr;
4828         int j;
4829
4830         /* disable wptr polling */
4831         WREG32_FIELD(CP_PQ_WPTR_POLL_CNTL, EN, 0);
4832
4833         WREG32(mmCP_HQD_EOP_BASE_ADDR, mqd->cp_hqd_eop_base_addr_lo);
4834         WREG32(mmCP_HQD_EOP_BASE_ADDR_HI, mqd->cp_hqd_eop_base_addr_hi);
4835
4836         /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4837         WREG32(mmCP_HQD_EOP_CONTROL, mqd->cp_hqd_eop_control);
4838
4839         /* enable doorbell? */
4840         WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, mqd->cp_hqd_pq_doorbell_control);
4841
4842         /* disable the queue if it's active */
4843         if (RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK) {
4844                 WREG32(mmCP_HQD_DEQUEUE_REQUEST, 1);
4845                 for (j = 0; j < adev->usec_timeout; j++) {
4846                         if (!(RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK))
4847                                 break;
4848                         udelay(1);
4849                 }
4850                 WREG32(mmCP_HQD_DEQUEUE_REQUEST, mqd->cp_hqd_dequeue_request);
4851                 WREG32(mmCP_HQD_PQ_RPTR, mqd->cp_hqd_pq_rptr);
4852                 WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
4853         }
4854
4855         /* set the pointer to the MQD */
4856         WREG32(mmCP_MQD_BASE_ADDR, mqd->cp_mqd_base_addr_lo);
4857         WREG32(mmCP_MQD_BASE_ADDR_HI, mqd->cp_mqd_base_addr_hi);
4858
4859         /* set MQD vmid to 0 */
4860         WREG32(mmCP_MQD_CONTROL, mqd->cp_mqd_control);
4861
4862         /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4863         WREG32(mmCP_HQD_PQ_BASE, mqd->cp_hqd_pq_base_lo);
4864         WREG32(mmCP_HQD_PQ_BASE_HI, mqd->cp_hqd_pq_base_hi);
4865
4866         /* set up the HQD, this is similar to CP_RB0_CNTL */
4867         WREG32(mmCP_HQD_PQ_CONTROL, mqd->cp_hqd_pq_control);
4868
4869         /* set the wb address whether it's enabled or not */
4870         WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR,
4871                                 mqd->cp_hqd_pq_rptr_report_addr_lo);
4872         WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
4873                                 mqd->cp_hqd_pq_rptr_report_addr_hi);
4874
4875         /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
4876         WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR, mqd->cp_hqd_pq_wptr_poll_addr_lo);
4877         WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR_HI, mqd->cp_hqd_pq_wptr_poll_addr_hi);
4878
4879         /* enable the doorbell if requested */
4880         if (ring->use_doorbell) {
4881                 if ((adev->asic_type == CHIP_CARRIZO) ||
4882                                 (adev->asic_type == CHIP_FIJI) ||
4883                                 (adev->asic_type == CHIP_STONEY)) {
4884                         WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER,
4885                                                 AMDGPU_DOORBELL_KIQ << 2);
4886                         WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER,
4887                                                 AMDGPU_DOORBELL_MEC_RING7 << 2);
4888                 }
4889         }
4890         WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, mqd->cp_hqd_pq_doorbell_control);
4891
4892         /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4893         WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
4894
4895         /* set the vmid for the queue */
4896         WREG32(mmCP_HQD_VMID, mqd->cp_hqd_vmid);
4897
4898         WREG32(mmCP_HQD_PERSISTENT_STATE, mqd->cp_hqd_persistent_state);
4899
4900         /* activate the queue */
4901         WREG32(mmCP_HQD_ACTIVE, mqd->cp_hqd_active);
4902
4903         if (ring->use_doorbell)
4904                 WREG32_FIELD(CP_PQ_STATUS, DOORBELL_ENABLE, 1);
4905
4906         return 0;
4907 }
4908
4909 static int gfx_v8_0_kiq_init_queue(struct amdgpu_ring *ring)
4910 {
4911         struct amdgpu_device *adev = ring->adev;
4912         struct amdgpu_kiq *kiq = &adev->gfx.kiq;
4913         struct vi_mqd *mqd = ring->mqd_ptr;
4914         bool is_kiq = (ring->funcs->type == AMDGPU_RING_TYPE_KIQ);
4915         int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS;
4916
4917         if (is_kiq) {
4918                 gfx_v8_0_kiq_setting(&kiq->ring);
4919         } else {
4920                 mqd_idx = ring - &adev->gfx.compute_ring[0];
4921         }
4922
4923         if (!adev->gfx.in_reset) {
4924                 memset((void *)mqd, 0, sizeof(*mqd));
4925                 mutex_lock(&adev->srbm_mutex);
4926                 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4927                 gfx_v8_0_mqd_init(ring);
4928                 if (is_kiq)
4929                         gfx_v8_0_kiq_init_register(ring);
4930                 vi_srbm_select(adev, 0, 0, 0, 0);
4931                 mutex_unlock(&adev->srbm_mutex);
4932
4933                 if (adev->gfx.mec.mqd_backup[mqd_idx])
4934                         memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(*mqd));
4935         } else { /* for GPU_RESET case */
4936                 /* reset MQD to a clean status */
4937                 if (adev->gfx.mec.mqd_backup[mqd_idx])
4938                         memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(*mqd));
4939
4940                 /* reset ring buffer */
4941                 ring->wptr = 0;
4942                 amdgpu_ring_clear_ring(ring);
4943
4944                 if (is_kiq) {
4945                     mutex_lock(&adev->srbm_mutex);
4946                     vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4947                     gfx_v8_0_kiq_init_register(ring);
4948                     vi_srbm_select(adev, 0, 0, 0, 0);
4949                     mutex_unlock(&adev->srbm_mutex);
4950                 }
4951         }
4952
4953         if (is_kiq)
4954                 gfx_v8_0_kiq_enable(ring);
4955         else
4956                 gfx_v8_0_map_queue_enable(&kiq->ring, ring);
4957
4958         return 0;
4959 }
4960
4961 static int gfx_v8_0_kiq_resume(struct amdgpu_device *adev)
4962 {
4963         struct amdgpu_ring *ring = NULL;
4964         int r = 0, i;
4965
4966         gfx_v8_0_cp_compute_enable(adev, true);
4967
4968         ring = &adev->gfx.kiq.ring;
4969
4970         r = amdgpu_bo_reserve(ring->mqd_obj, false);
4971         if (unlikely(r != 0))
4972                 goto done;
4973
4974         r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr);
4975         if (!r) {
4976                 r = gfx_v8_0_kiq_init_queue(ring);
4977                 amdgpu_bo_kunmap(ring->mqd_obj);
4978                 ring->mqd_ptr = NULL;
4979         }
4980         amdgpu_bo_unreserve(ring->mqd_obj);
4981         if (r)
4982                 goto done;
4983
4984         ring->ready = true;
4985         r = amdgpu_ring_test_ring(ring);
4986         if (r) {
4987                 ring->ready = false;
4988                 goto done;
4989         }
4990
4991         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4992                 ring = &adev->gfx.compute_ring[i];
4993
4994                 r = amdgpu_bo_reserve(ring->mqd_obj, false);
4995                 if (unlikely(r != 0))
4996                         goto done;
4997                 r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr);
4998                 if (!r) {
4999                         r = gfx_v8_0_kiq_init_queue(ring);
5000                         amdgpu_bo_kunmap(ring->mqd_obj);
5001                         ring->mqd_ptr = NULL;
5002                 }
5003                 amdgpu_bo_unreserve(ring->mqd_obj);
5004                 if (r)
5005                         goto done;
5006
5007                 ring->ready = true;
5008                 r = amdgpu_ring_test_ring(ring);
5009                 if (r)
5010                         ring->ready = false;
5011         }
5012
5013 done:
5014         return r;
5015 }
5016
5017 static int gfx_v8_0_cp_compute_resume(struct amdgpu_device *adev)
5018 {
5019         int r, i, j;
5020         u32 tmp;
5021         bool use_doorbell = true;
5022         u64 hqd_gpu_addr;
5023         u64 mqd_gpu_addr;
5024         u64 eop_gpu_addr;
5025         u64 wb_gpu_addr;
5026         u32 *buf;
5027         struct vi_mqd *mqd;
5028
5029         /* init the queues.  */
5030         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5031                 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5032
5033                 if (ring->mqd_obj == NULL) {
5034                         r = amdgpu_bo_create(adev,
5035                                              sizeof(struct vi_mqd),
5036                                              PAGE_SIZE, true,
5037                                              AMDGPU_GEM_DOMAIN_GTT, 0, NULL,
5038                                              NULL, &ring->mqd_obj);
5039                         if (r) {
5040                                 dev_warn(adev->dev, "(%d) create MQD bo failed\n", r);
5041                                 return r;
5042                         }
5043                 }
5044
5045                 r = amdgpu_bo_reserve(ring->mqd_obj, false);
5046                 if (unlikely(r != 0)) {
5047                         gfx_v8_0_cp_compute_fini(adev);
5048                         return r;
5049                 }
5050                 r = amdgpu_bo_pin(ring->mqd_obj, AMDGPU_GEM_DOMAIN_GTT,
5051                                   &mqd_gpu_addr);
5052                 if (r) {
5053                         dev_warn(adev->dev, "(%d) pin MQD bo failed\n", r);
5054                         gfx_v8_0_cp_compute_fini(adev);
5055                         return r;
5056                 }
5057                 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&buf);
5058                 if (r) {
5059                         dev_warn(adev->dev, "(%d) map MQD bo failed\n", r);
5060                         gfx_v8_0_cp_compute_fini(adev);
5061                         return r;
5062                 }
5063
5064                 /* init the mqd struct */
5065                 memset(buf, 0, sizeof(struct vi_mqd));
5066
5067                 mqd = (struct vi_mqd *)buf;
5068                 mqd->header = 0xC0310800;
5069                 mqd->compute_pipelinestat_enable = 0x00000001;
5070                 mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
5071                 mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
5072                 mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
5073                 mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
5074                 mqd->compute_misc_reserved = 0x00000003;
5075
5076                 mutex_lock(&adev->srbm_mutex);
5077                 vi_srbm_select(adev, ring->me,
5078                                ring->pipe,
5079                                ring->queue, 0);
5080
5081                 eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE);
5082                 eop_gpu_addr >>= 8;
5083
5084                 /* write the EOP addr */
5085                 WREG32(mmCP_HQD_EOP_BASE_ADDR, eop_gpu_addr);
5086                 WREG32(mmCP_HQD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr));
5087
5088                 /* set the VMID assigned */
5089                 WREG32(mmCP_HQD_VMID, 0);
5090
5091                 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
5092                 tmp = RREG32(mmCP_HQD_EOP_CONTROL);
5093                 tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
5094                                     (order_base_2(MEC_HPD_SIZE / 4) - 1));
5095                 WREG32(mmCP_HQD_EOP_CONTROL, tmp);
5096
5097                 /* disable wptr polling */
5098                 tmp = RREG32(mmCP_PQ_WPTR_POLL_CNTL);
5099                 tmp = REG_SET_FIELD(tmp, CP_PQ_WPTR_POLL_CNTL, EN, 0);
5100                 WREG32(mmCP_PQ_WPTR_POLL_CNTL, tmp);
5101
5102                 mqd->cp_hqd_eop_base_addr_lo =
5103                         RREG32(mmCP_HQD_EOP_BASE_ADDR);
5104                 mqd->cp_hqd_eop_base_addr_hi =
5105                         RREG32(mmCP_HQD_EOP_BASE_ADDR_HI);
5106
5107                 /* enable doorbell? */
5108                 tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
5109                 if (use_doorbell) {
5110                         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
5111                 } else {
5112                         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 0);
5113                 }
5114                 WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, tmp);
5115                 mqd->cp_hqd_pq_doorbell_control = tmp;
5116
5117                 /* disable the queue if it's active */
5118                 mqd->cp_hqd_dequeue_request = 0;
5119                 mqd->cp_hqd_pq_rptr = 0;
5120                 mqd->cp_hqd_pq_wptr= 0;
5121                 if (RREG32(mmCP_HQD_ACTIVE) & 1) {
5122                         WREG32(mmCP_HQD_DEQUEUE_REQUEST, 1);
5123                         for (j = 0; j < adev->usec_timeout; j++) {
5124                                 if (!(RREG32(mmCP_HQD_ACTIVE) & 1))
5125                                         break;
5126                                 udelay(1);
5127                         }
5128                         WREG32(mmCP_HQD_DEQUEUE_REQUEST, mqd->cp_hqd_dequeue_request);
5129                         WREG32(mmCP_HQD_PQ_RPTR, mqd->cp_hqd_pq_rptr);
5130                         WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
5131                 }
5132
5133                 /* set the pointer to the MQD */
5134                 mqd->cp_mqd_base_addr_lo = mqd_gpu_addr & 0xfffffffc;
5135                 mqd->cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
5136                 WREG32(mmCP_MQD_BASE_ADDR, mqd->cp_mqd_base_addr_lo);
5137                 WREG32(mmCP_MQD_BASE_ADDR_HI, mqd->cp_mqd_base_addr_hi);
5138
5139                 /* set MQD vmid to 0 */
5140                 tmp = RREG32(mmCP_MQD_CONTROL);
5141                 tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
5142                 WREG32(mmCP_MQD_CONTROL, tmp);
5143                 mqd->cp_mqd_control = tmp;
5144
5145                 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
5146                 hqd_gpu_addr = ring->gpu_addr >> 8;
5147                 mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
5148                 mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
5149                 WREG32(mmCP_HQD_PQ_BASE, mqd->cp_hqd_pq_base_lo);
5150                 WREG32(mmCP_HQD_PQ_BASE_HI, mqd->cp_hqd_pq_base_hi);
5151
5152                 /* set up the HQD, this is similar to CP_RB0_CNTL */
5153                 tmp = RREG32(mmCP_HQD_PQ_CONTROL);
5154                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
5155                                     (order_base_2(ring->ring_size / 4) - 1));
5156                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
5157                                ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
5158 #ifdef __BIG_ENDIAN
5159                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
5160 #endif
5161                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
5162                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
5163                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
5164                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
5165                 WREG32(mmCP_HQD_PQ_CONTROL, tmp);
5166                 mqd->cp_hqd_pq_control = tmp;
5167
5168                 /* set the wb address wether it's enabled or not */
5169                 wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
5170                 mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
5171                 mqd->cp_hqd_pq_rptr_report_addr_hi =
5172                         upper_32_bits(wb_gpu_addr) & 0xffff;
5173                 WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR,
5174                        mqd->cp_hqd_pq_rptr_report_addr_lo);
5175                 WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
5176                        mqd->cp_hqd_pq_rptr_report_addr_hi);
5177
5178                 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
5179                 wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
5180                 mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
5181                 mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
5182                 WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR, mqd->cp_hqd_pq_wptr_poll_addr_lo);
5183                 WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
5184                        mqd->cp_hqd_pq_wptr_poll_addr_hi);
5185
5186                 /* enable the doorbell if requested */
5187                 if (use_doorbell) {
5188                         if ((adev->asic_type == CHIP_CARRIZO) ||
5189                             (adev->asic_type == CHIP_FIJI) ||
5190                             (adev->asic_type == CHIP_STONEY) ||
5191                             (adev->asic_type == CHIP_POLARIS11) ||
5192                             (adev->asic_type == CHIP_POLARIS10) ||
5193                             (adev->asic_type == CHIP_POLARIS12)) {
5194                                 WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER,
5195                                        AMDGPU_DOORBELL_KIQ << 2);
5196                                 WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER,
5197                                        AMDGPU_DOORBELL_MEC_RING7 << 2);
5198                         }
5199                         tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
5200                         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
5201                                             DOORBELL_OFFSET, ring->doorbell_index);
5202                         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
5203                         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_SOURCE, 0);
5204                         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_HIT, 0);
5205                         mqd->cp_hqd_pq_doorbell_control = tmp;
5206
5207                 } else {
5208                         mqd->cp_hqd_pq_doorbell_control = 0;
5209                 }
5210                 WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL,
5211                        mqd->cp_hqd_pq_doorbell_control);
5212
5213                 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
5214                 ring->wptr = 0;
5215                 mqd->cp_hqd_pq_wptr = lower_32_bits(ring->wptr);
5216                 WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
5217                 mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
5218
5219                 /* set the vmid for the queue */
5220                 mqd->cp_hqd_vmid = 0;
5221                 WREG32(mmCP_HQD_VMID, mqd->cp_hqd_vmid);
5222
5223                 tmp = RREG32(mmCP_HQD_PERSISTENT_STATE);
5224                 tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
5225                 WREG32(mmCP_HQD_PERSISTENT_STATE, tmp);
5226                 mqd->cp_hqd_persistent_state = tmp;
5227                 if (adev->asic_type == CHIP_STONEY ||
5228                         adev->asic_type == CHIP_POLARIS11 ||
5229                         adev->asic_type == CHIP_POLARIS10 ||
5230                         adev->asic_type == CHIP_POLARIS12) {
5231                         tmp = RREG32(mmCP_ME1_PIPE3_INT_CNTL);
5232                         tmp = REG_SET_FIELD(tmp, CP_ME1_PIPE3_INT_CNTL, GENERIC2_INT_ENABLE, 1);
5233                         WREG32(mmCP_ME1_PIPE3_INT_CNTL, tmp);
5234                 }
5235
5236                 /* activate the queue */
5237                 mqd->cp_hqd_active = 1;
5238                 WREG32(mmCP_HQD_ACTIVE, mqd->cp_hqd_active);
5239
5240                 vi_srbm_select(adev, 0, 0, 0, 0);
5241                 mutex_unlock(&adev->srbm_mutex);
5242
5243                 amdgpu_bo_kunmap(ring->mqd_obj);
5244                 amdgpu_bo_unreserve(ring->mqd_obj);
5245         }
5246
5247         if (use_doorbell) {
5248                 tmp = RREG32(mmCP_PQ_STATUS);
5249                 tmp = REG_SET_FIELD(tmp, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
5250                 WREG32(mmCP_PQ_STATUS, tmp);
5251         }
5252
5253         gfx_v8_0_cp_compute_enable(adev, true);
5254
5255         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5256                 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5257
5258                 ring->ready = true;
5259                 r = amdgpu_ring_test_ring(ring);
5260                 if (r)
5261                         ring->ready = false;
5262         }
5263
5264         return 0;
5265 }
5266
5267 static int gfx_v8_0_cp_resume(struct amdgpu_device *adev)
5268 {
5269         int r;
5270
5271         if (!(adev->flags & AMD_IS_APU))
5272                 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
5273
5274         if (!adev->pp_enabled) {
5275                 if (adev->firmware.load_type != AMDGPU_FW_LOAD_SMU) {
5276                         /* legacy firmware loading */
5277                         r = gfx_v8_0_cp_gfx_load_microcode(adev);
5278                         if (r)
5279                                 return r;
5280
5281                         r = gfx_v8_0_cp_compute_load_microcode(adev);
5282                         if (r)
5283                                 return r;
5284                 } else {
5285                         r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
5286                                                         AMDGPU_UCODE_ID_CP_CE);
5287                         if (r)
5288                                 return -EINVAL;
5289
5290                         r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
5291                                                         AMDGPU_UCODE_ID_CP_PFP);
5292                         if (r)
5293                                 return -EINVAL;
5294
5295                         r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
5296                                                         AMDGPU_UCODE_ID_CP_ME);
5297                         if (r)
5298                                 return -EINVAL;
5299
5300                         if (adev->asic_type == CHIP_TOPAZ) {
5301                                 r = gfx_v8_0_cp_compute_load_microcode(adev);
5302                                 if (r)
5303                                         return r;
5304                         } else {
5305                                 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
5306                                                                                  AMDGPU_UCODE_ID_CP_MEC1);
5307                                 if (r)
5308                                         return -EINVAL;
5309                         }
5310                 }
5311         }
5312
5313         r = gfx_v8_0_cp_gfx_resume(adev);
5314         if (r)
5315                 return r;
5316
5317         if (amdgpu_sriov_vf(adev))
5318                 r = gfx_v8_0_kiq_resume(adev);
5319         else
5320                 r = gfx_v8_0_cp_compute_resume(adev);
5321         if (r)
5322                 return r;
5323
5324         gfx_v8_0_enable_gui_idle_interrupt(adev, true);
5325
5326         return 0;
5327 }
5328
5329 static void gfx_v8_0_cp_enable(struct amdgpu_device *adev, bool enable)
5330 {
5331         gfx_v8_0_cp_gfx_enable(adev, enable);
5332         gfx_v8_0_cp_compute_enable(adev, enable);
5333 }
5334
5335 static int gfx_v8_0_hw_init(void *handle)
5336 {
5337         int r;
5338         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5339
5340         gfx_v8_0_init_golden_registers(adev);
5341         gfx_v8_0_gpu_init(adev);
5342
5343         r = gfx_v8_0_rlc_resume(adev);
5344         if (r)
5345                 return r;
5346
5347         r = gfx_v8_0_cp_resume(adev);
5348
5349         return r;
5350 }
5351
5352 static int gfx_v8_0_hw_fini(void *handle)
5353 {
5354         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5355
5356         amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
5357         amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
5358         if (amdgpu_sriov_vf(adev)) {
5359                 pr_debug("For SRIOV client, shouldn't do anything.\n");
5360                 return 0;
5361         }
5362         gfx_v8_0_cp_enable(adev, false);
5363         gfx_v8_0_rlc_stop(adev);
5364         gfx_v8_0_cp_compute_fini(adev);
5365
5366         amdgpu_set_powergating_state(adev,
5367                         AMD_IP_BLOCK_TYPE_GFX, AMD_PG_STATE_UNGATE);
5368
5369         return 0;
5370 }
5371
5372 static int gfx_v8_0_suspend(void *handle)
5373 {
5374         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5375
5376         return gfx_v8_0_hw_fini(adev);
5377 }
5378
5379 static int gfx_v8_0_resume(void *handle)
5380 {
5381         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5382
5383         return gfx_v8_0_hw_init(adev);
5384 }
5385
5386 static bool gfx_v8_0_is_idle(void *handle)
5387 {
5388         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5389
5390         if (REG_GET_FIELD(RREG32(mmGRBM_STATUS), GRBM_STATUS, GUI_ACTIVE))
5391                 return false;
5392         else
5393                 return true;
5394 }
5395
5396 static int gfx_v8_0_wait_for_idle(void *handle)
5397 {
5398         unsigned i;
5399         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5400
5401         for (i = 0; i < adev->usec_timeout; i++) {
5402                 if (gfx_v8_0_is_idle(handle))
5403                         return 0;
5404
5405                 udelay(1);
5406         }
5407         return -ETIMEDOUT;
5408 }
5409
5410 static bool gfx_v8_0_check_soft_reset(void *handle)
5411 {
5412         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5413         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5414         u32 tmp;
5415
5416         /* GRBM_STATUS */
5417         tmp = RREG32(mmGRBM_STATUS);
5418         if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
5419                    GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
5420                    GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
5421                    GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
5422                    GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
5423                    GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK |
5424                    GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
5425                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5426                                                 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
5427                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5428                                                 GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
5429                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5430                                                 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
5431         }
5432
5433         /* GRBM_STATUS2 */
5434         tmp = RREG32(mmGRBM_STATUS2);
5435         if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
5436                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5437                                                 GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
5438
5439         if (REG_GET_FIELD(tmp, GRBM_STATUS2, CPF_BUSY) ||
5440             REG_GET_FIELD(tmp, GRBM_STATUS2, CPC_BUSY) ||
5441             REG_GET_FIELD(tmp, GRBM_STATUS2, CPG_BUSY)) {
5442                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5443                                                 SOFT_RESET_CPF, 1);
5444                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5445                                                 SOFT_RESET_CPC, 1);
5446                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5447                                                 SOFT_RESET_CPG, 1);
5448                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET,
5449                                                 SOFT_RESET_GRBM, 1);
5450         }
5451
5452         /* SRBM_STATUS */
5453         tmp = RREG32(mmSRBM_STATUS);
5454         if (REG_GET_FIELD(tmp, SRBM_STATUS, GRBM_RQ_PENDING))
5455                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5456                                                 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
5457         if (REG_GET_FIELD(tmp, SRBM_STATUS, SEM_BUSY))
5458                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5459                                                 SRBM_SOFT_RESET, SOFT_RESET_SEM, 1);
5460
5461         if (grbm_soft_reset || srbm_soft_reset) {
5462                 adev->gfx.grbm_soft_reset = grbm_soft_reset;
5463                 adev->gfx.srbm_soft_reset = srbm_soft_reset;
5464                 return true;
5465         } else {
5466                 adev->gfx.grbm_soft_reset = 0;
5467                 adev->gfx.srbm_soft_reset = 0;
5468                 return false;
5469         }
5470 }
5471
5472 static void gfx_v8_0_inactive_hqd(struct amdgpu_device *adev,
5473                                   struct amdgpu_ring *ring)
5474 {
5475         int i;
5476
5477         mutex_lock(&adev->srbm_mutex);
5478         vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5479         if (RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK) {
5480                 WREG32_FIELD(CP_HQD_DEQUEUE_REQUEST, DEQUEUE_REQ, 2);
5481                 for (i = 0; i < adev->usec_timeout; i++) {
5482                         if (!(RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK))
5483                                 break;
5484                         udelay(1);
5485                 }
5486         }
5487         vi_srbm_select(adev, 0, 0, 0, 0);
5488         mutex_unlock(&adev->srbm_mutex);
5489 }
5490
5491 static int gfx_v8_0_pre_soft_reset(void *handle)
5492 {
5493         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5494         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5495
5496         if ((!adev->gfx.grbm_soft_reset) &&
5497             (!adev->gfx.srbm_soft_reset))
5498                 return 0;
5499
5500         grbm_soft_reset = adev->gfx.grbm_soft_reset;
5501         srbm_soft_reset = adev->gfx.srbm_soft_reset;
5502
5503         /* stop the rlc */
5504         gfx_v8_0_rlc_stop(adev);
5505
5506         if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5507             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
5508                 /* Disable GFX parsing/prefetching */
5509                 gfx_v8_0_cp_gfx_enable(adev, false);
5510
5511         if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5512             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
5513             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
5514             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
5515                 int i;
5516
5517                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5518                         struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5519
5520                         gfx_v8_0_inactive_hqd(adev, ring);
5521                 }
5522                 /* Disable MEC parsing/prefetching */
5523                 gfx_v8_0_cp_compute_enable(adev, false);
5524         }
5525
5526        return 0;
5527 }
5528
5529 static int gfx_v8_0_soft_reset(void *handle)
5530 {
5531         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5532         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5533         u32 tmp;
5534
5535         if ((!adev->gfx.grbm_soft_reset) &&
5536             (!adev->gfx.srbm_soft_reset))
5537                 return 0;
5538
5539         grbm_soft_reset = adev->gfx.grbm_soft_reset;
5540         srbm_soft_reset = adev->gfx.srbm_soft_reset;
5541
5542         if (grbm_soft_reset || srbm_soft_reset) {
5543                 tmp = RREG32(mmGMCON_DEBUG);
5544                 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 1);
5545                 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 1);
5546                 WREG32(mmGMCON_DEBUG, tmp);
5547                 udelay(50);
5548         }
5549
5550         if (grbm_soft_reset) {
5551                 tmp = RREG32(mmGRBM_SOFT_RESET);
5552                 tmp |= grbm_soft_reset;
5553                 dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
5554                 WREG32(mmGRBM_SOFT_RESET, tmp);
5555                 tmp = RREG32(mmGRBM_SOFT_RESET);
5556
5557                 udelay(50);
5558
5559                 tmp &= ~grbm_soft_reset;
5560                 WREG32(mmGRBM_SOFT_RESET, tmp);
5561                 tmp = RREG32(mmGRBM_SOFT_RESET);
5562         }
5563
5564         if (srbm_soft_reset) {
5565                 tmp = RREG32(mmSRBM_SOFT_RESET);
5566                 tmp |= srbm_soft_reset;
5567                 dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
5568                 WREG32(mmSRBM_SOFT_RESET, tmp);
5569                 tmp = RREG32(mmSRBM_SOFT_RESET);
5570
5571                 udelay(50);
5572
5573                 tmp &= ~srbm_soft_reset;
5574                 WREG32(mmSRBM_SOFT_RESET, tmp);
5575                 tmp = RREG32(mmSRBM_SOFT_RESET);
5576         }
5577
5578         if (grbm_soft_reset || srbm_soft_reset) {
5579                 tmp = RREG32(mmGMCON_DEBUG);
5580                 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 0);
5581                 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 0);
5582                 WREG32(mmGMCON_DEBUG, tmp);
5583         }
5584
5585         /* Wait a little for things to settle down */
5586         udelay(50);
5587
5588         return 0;
5589 }
5590
5591 static void gfx_v8_0_init_hqd(struct amdgpu_device *adev,
5592                               struct amdgpu_ring *ring)
5593 {
5594         mutex_lock(&adev->srbm_mutex);
5595         vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5596         WREG32(mmCP_HQD_DEQUEUE_REQUEST, 0);
5597         WREG32(mmCP_HQD_PQ_RPTR, 0);
5598         WREG32(mmCP_HQD_PQ_WPTR, 0);
5599         vi_srbm_select(adev, 0, 0, 0, 0);
5600         mutex_unlock(&adev->srbm_mutex);
5601 }
5602
5603 static int gfx_v8_0_post_soft_reset(void *handle)
5604 {
5605         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5606         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5607
5608         if ((!adev->gfx.grbm_soft_reset) &&
5609             (!adev->gfx.srbm_soft_reset))
5610                 return 0;
5611
5612         grbm_soft_reset = adev->gfx.grbm_soft_reset;
5613         srbm_soft_reset = adev->gfx.srbm_soft_reset;
5614
5615         if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5616             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
5617                 gfx_v8_0_cp_gfx_resume(adev);
5618
5619         if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5620             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
5621             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
5622             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
5623                 int i;
5624
5625                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5626                         struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5627
5628                         gfx_v8_0_init_hqd(adev, ring);
5629                 }
5630                 gfx_v8_0_cp_compute_resume(adev);
5631         }
5632         gfx_v8_0_rlc_start(adev);
5633
5634         return 0;
5635 }
5636
5637 /**
5638  * gfx_v8_0_get_gpu_clock_counter - return GPU clock counter snapshot
5639  *
5640  * @adev: amdgpu_device pointer
5641  *
5642  * Fetches a GPU clock counter snapshot.
5643  * Returns the 64 bit clock counter snapshot.
5644  */
5645 static uint64_t gfx_v8_0_get_gpu_clock_counter(struct amdgpu_device *adev)
5646 {
5647         uint64_t clock;
5648
5649         mutex_lock(&adev->gfx.gpu_clock_mutex);
5650         WREG32(mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
5651         clock = (uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_LSB) |
5652                 ((uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
5653         mutex_unlock(&adev->gfx.gpu_clock_mutex);
5654         return clock;
5655 }
5656
5657 static void gfx_v8_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
5658                                           uint32_t vmid,
5659                                           uint32_t gds_base, uint32_t gds_size,
5660                                           uint32_t gws_base, uint32_t gws_size,
5661                                           uint32_t oa_base, uint32_t oa_size)
5662 {
5663         gds_base = gds_base >> AMDGPU_GDS_SHIFT;
5664         gds_size = gds_size >> AMDGPU_GDS_SHIFT;
5665
5666         gws_base = gws_base >> AMDGPU_GWS_SHIFT;
5667         gws_size = gws_size >> AMDGPU_GWS_SHIFT;
5668
5669         oa_base = oa_base >> AMDGPU_OA_SHIFT;
5670         oa_size = oa_size >> AMDGPU_OA_SHIFT;
5671
5672         /* GDS Base */
5673         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5674         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5675                                 WRITE_DATA_DST_SEL(0)));
5676         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_base);
5677         amdgpu_ring_write(ring, 0);
5678         amdgpu_ring_write(ring, gds_base);
5679
5680         /* GDS Size */
5681         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5682         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5683                                 WRITE_DATA_DST_SEL(0)));
5684         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_size);
5685         amdgpu_ring_write(ring, 0);
5686         amdgpu_ring_write(ring, gds_size);
5687
5688         /* GWS */
5689         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5690         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5691                                 WRITE_DATA_DST_SEL(0)));
5692         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].gws);
5693         amdgpu_ring_write(ring, 0);
5694         amdgpu_ring_write(ring, gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
5695
5696         /* OA */
5697         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5698         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5699                                 WRITE_DATA_DST_SEL(0)));
5700         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].oa);
5701         amdgpu_ring_write(ring, 0);
5702         amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << oa_base));
5703 }
5704
5705 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
5706 {
5707         WREG32(mmSQ_IND_INDEX,
5708                 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
5709                 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
5710                 (address << SQ_IND_INDEX__INDEX__SHIFT) |
5711                 (SQ_IND_INDEX__FORCE_READ_MASK));
5712         return RREG32(mmSQ_IND_DATA);
5713 }
5714
5715 static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
5716                            uint32_t wave, uint32_t thread,
5717                            uint32_t regno, uint32_t num, uint32_t *out)
5718 {
5719         WREG32(mmSQ_IND_INDEX,
5720                 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
5721                 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
5722                 (regno << SQ_IND_INDEX__INDEX__SHIFT) |
5723                 (thread << SQ_IND_INDEX__THREAD_ID__SHIFT) |
5724                 (SQ_IND_INDEX__FORCE_READ_MASK) |
5725                 (SQ_IND_INDEX__AUTO_INCR_MASK));
5726         while (num--)
5727                 *(out++) = RREG32(mmSQ_IND_DATA);
5728 }
5729
5730 static void gfx_v8_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
5731 {
5732         /* type 0 wave data */
5733         dst[(*no_fields)++] = 0;
5734         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS);
5735         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO);
5736         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI);
5737         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO);
5738         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI);
5739         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID);
5740         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0);
5741         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1);
5742         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC);
5743         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC);
5744         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS);
5745         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS);
5746         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_LO);
5747         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_HI);
5748         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_LO);
5749         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_HI);
5750         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0);
5751         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0);
5752 }
5753
5754 static void gfx_v8_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
5755                                      uint32_t wave, uint32_t start,
5756                                      uint32_t size, uint32_t *dst)
5757 {
5758         wave_read_regs(
5759                 adev, simd, wave, 0,
5760                 start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
5761 }
5762
5763
5764 static const struct amdgpu_gfx_funcs gfx_v8_0_gfx_funcs = {
5765         .get_gpu_clock_counter = &gfx_v8_0_get_gpu_clock_counter,
5766         .select_se_sh = &gfx_v8_0_select_se_sh,
5767         .read_wave_data = &gfx_v8_0_read_wave_data,
5768         .read_wave_sgprs = &gfx_v8_0_read_wave_sgprs,
5769 };
5770
5771 static int gfx_v8_0_early_init(void *handle)
5772 {
5773         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5774
5775         adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS;
5776         adev->gfx.num_compute_rings = GFX8_NUM_COMPUTE_RINGS;
5777         adev->gfx.funcs = &gfx_v8_0_gfx_funcs;
5778         gfx_v8_0_set_ring_funcs(adev);
5779         gfx_v8_0_set_irq_funcs(adev);
5780         gfx_v8_0_set_gds_init(adev);
5781         gfx_v8_0_set_rlc_funcs(adev);
5782
5783         return 0;
5784 }
5785
5786 static int gfx_v8_0_late_init(void *handle)
5787 {
5788         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5789         int r;
5790
5791         r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
5792         if (r)
5793                 return r;
5794
5795         r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
5796         if (r)
5797                 return r;
5798
5799         /* requires IBs so do in late init after IB pool is initialized */
5800         r = gfx_v8_0_do_edc_gpr_workarounds(adev);
5801         if (r)
5802                 return r;
5803
5804         amdgpu_set_powergating_state(adev,
5805                         AMD_IP_BLOCK_TYPE_GFX, AMD_PG_STATE_GATE);
5806
5807         return 0;
5808 }
5809
5810 static void gfx_v8_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
5811                                                        bool enable)
5812 {
5813         if ((adev->asic_type == CHIP_POLARIS11) ||
5814             (adev->asic_type == CHIP_POLARIS12))
5815                 /* Send msg to SMU via Powerplay */
5816                 amdgpu_set_powergating_state(adev,
5817                                              AMD_IP_BLOCK_TYPE_SMC,
5818                                              enable ?
5819                                              AMD_PG_STATE_GATE : AMD_PG_STATE_UNGATE);
5820
5821         WREG32_FIELD(RLC_PG_CNTL, STATIC_PER_CU_PG_ENABLE, enable ? 1 : 0);
5822 }
5823
5824 static void gfx_v8_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
5825                                                         bool enable)
5826 {
5827         WREG32_FIELD(RLC_PG_CNTL, DYN_PER_CU_PG_ENABLE, enable ? 1 : 0);
5828 }
5829
5830 static void polaris11_enable_gfx_quick_mg_power_gating(struct amdgpu_device *adev,
5831                 bool enable)
5832 {
5833         WREG32_FIELD(RLC_PG_CNTL, QUICK_PG_ENABLE, enable ? 1 : 0);
5834 }
5835
5836 static void cz_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
5837                                           bool enable)
5838 {
5839         WREG32_FIELD(RLC_PG_CNTL, GFX_POWER_GATING_ENABLE, enable ? 1 : 0);
5840 }
5841
5842 static void cz_enable_gfx_pipeline_power_gating(struct amdgpu_device *adev,
5843                                                 bool enable)
5844 {
5845         WREG32_FIELD(RLC_PG_CNTL, GFX_PIPELINE_PG_ENABLE, enable ? 1 : 0);
5846
5847         /* Read any GFX register to wake up GFX. */
5848         if (!enable)
5849                 RREG32(mmDB_RENDER_CONTROL);
5850 }
5851
5852 static void cz_update_gfx_cg_power_gating(struct amdgpu_device *adev,
5853                                           bool enable)
5854 {
5855         if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
5856                 cz_enable_gfx_cg_power_gating(adev, true);
5857                 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
5858                         cz_enable_gfx_pipeline_power_gating(adev, true);
5859         } else {
5860                 cz_enable_gfx_cg_power_gating(adev, false);
5861                 cz_enable_gfx_pipeline_power_gating(adev, false);
5862         }
5863 }
5864
5865 static int gfx_v8_0_set_powergating_state(void *handle,
5866                                           enum amd_powergating_state state)
5867 {
5868         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5869         bool enable = (state == AMD_PG_STATE_GATE);
5870
5871         if (amdgpu_sriov_vf(adev))
5872                 return 0;
5873
5874         switch (adev->asic_type) {
5875         case CHIP_CARRIZO:
5876         case CHIP_STONEY:
5877
5878                 if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
5879                         cz_enable_sck_slow_down_on_power_up(adev, true);
5880                         cz_enable_sck_slow_down_on_power_down(adev, true);
5881                 } else {
5882                         cz_enable_sck_slow_down_on_power_up(adev, false);
5883                         cz_enable_sck_slow_down_on_power_down(adev, false);
5884                 }
5885                 if (adev->pg_flags & AMD_PG_SUPPORT_CP)
5886                         cz_enable_cp_power_gating(adev, true);
5887                 else
5888                         cz_enable_cp_power_gating(adev, false);
5889
5890                 cz_update_gfx_cg_power_gating(adev, enable);
5891
5892                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5893                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5894                 else
5895                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5896
5897                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5898                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5899                 else
5900                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5901                 break;
5902         case CHIP_POLARIS11:
5903         case CHIP_POLARIS12:
5904                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5905                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5906                 else
5907                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5908
5909                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5910                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5911                 else
5912                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5913
5914                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_QUICK_MG) && enable)
5915                         polaris11_enable_gfx_quick_mg_power_gating(adev, true);
5916                 else
5917                         polaris11_enable_gfx_quick_mg_power_gating(adev, false);
5918                 break;
5919         default:
5920                 break;
5921         }
5922
5923         return 0;
5924 }
5925
5926 static void gfx_v8_0_get_clockgating_state(void *handle, u32 *flags)
5927 {
5928         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5929         int data;
5930
5931         if (amdgpu_sriov_vf(adev))
5932                 *flags = 0;
5933
5934         /* AMD_CG_SUPPORT_GFX_MGCG */
5935         data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5936         if (!(data & RLC_CGTT_MGCG_OVERRIDE__CPF_MASK))
5937                 *flags |= AMD_CG_SUPPORT_GFX_MGCG;
5938
5939         /* AMD_CG_SUPPORT_GFX_CGLG */
5940         data = RREG32(mmRLC_CGCG_CGLS_CTRL);
5941         if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
5942                 *flags |= AMD_CG_SUPPORT_GFX_CGCG;
5943
5944         /* AMD_CG_SUPPORT_GFX_CGLS */
5945         if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
5946                 *flags |= AMD_CG_SUPPORT_GFX_CGLS;
5947
5948         /* AMD_CG_SUPPORT_GFX_CGTS */
5949         data = RREG32(mmCGTS_SM_CTRL_REG);
5950         if (!(data & CGTS_SM_CTRL_REG__OVERRIDE_MASK))
5951                 *flags |= AMD_CG_SUPPORT_GFX_CGTS;
5952
5953         /* AMD_CG_SUPPORT_GFX_CGTS_LS */
5954         if (!(data & CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK))
5955                 *flags |= AMD_CG_SUPPORT_GFX_CGTS_LS;
5956
5957         /* AMD_CG_SUPPORT_GFX_RLC_LS */
5958         data = RREG32(mmRLC_MEM_SLP_CNTL);
5959         if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK)
5960                 *flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS;
5961
5962         /* AMD_CG_SUPPORT_GFX_CP_LS */
5963         data = RREG32(mmCP_MEM_SLP_CNTL);
5964         if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK)
5965                 *flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS;
5966 }
5967
5968 static void gfx_v8_0_send_serdes_cmd(struct amdgpu_device *adev,
5969                                      uint32_t reg_addr, uint32_t cmd)
5970 {
5971         uint32_t data;
5972
5973         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
5974
5975         WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5976         WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5977
5978         data = RREG32(mmRLC_SERDES_WR_CTRL);
5979         if (adev->asic_type == CHIP_STONEY)
5980                 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5981                           RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5982                           RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5983                           RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5984                           RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5985                           RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5986                           RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5987                           RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5988                           RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5989         else
5990                 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5991                           RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5992                           RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5993                           RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5994                           RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5995                           RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5996                           RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5997                           RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5998                           RLC_SERDES_WR_CTRL__BPM_DATA_MASK |
5999                           RLC_SERDES_WR_CTRL__REG_ADDR_MASK |
6000                           RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
6001         data |= (RLC_SERDES_WR_CTRL__RSVD_BPM_ADDR_MASK |
6002                  (cmd << RLC_SERDES_WR_CTRL__BPM_DATA__SHIFT) |
6003                  (reg_addr << RLC_SERDES_WR_CTRL__REG_ADDR__SHIFT) |
6004                  (0xff << RLC_SERDES_WR_CTRL__BPM_ADDR__SHIFT));
6005
6006         WREG32(mmRLC_SERDES_WR_CTRL, data);
6007 }
6008
6009 #define MSG_ENTER_RLC_SAFE_MODE     1
6010 #define MSG_EXIT_RLC_SAFE_MODE      0
6011 #define RLC_GPR_REG2__REQ_MASK 0x00000001
6012 #define RLC_GPR_REG2__REQ__SHIFT 0
6013 #define RLC_GPR_REG2__MESSAGE__SHIFT 0x00000001
6014 #define RLC_GPR_REG2__MESSAGE_MASK 0x0000001e
6015
6016 static void iceland_enter_rlc_safe_mode(struct amdgpu_device *adev)
6017 {
6018         u32 data;
6019         unsigned i;
6020
6021         data = RREG32(mmRLC_CNTL);
6022         if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK))
6023                 return;
6024
6025         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) {
6026                 data |= RLC_SAFE_MODE__CMD_MASK;
6027                 data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
6028                 data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
6029                 WREG32(mmRLC_SAFE_MODE, data);
6030
6031                 for (i = 0; i < adev->usec_timeout; i++) {
6032                         if ((RREG32(mmRLC_GPM_STAT) &
6033                              (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
6034                               RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) ==
6035                             (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
6036                              RLC_GPM_STAT__GFX_POWER_STATUS_MASK))
6037                                 break;
6038                         udelay(1);
6039                 }
6040
6041                 for (i = 0; i < adev->usec_timeout; i++) {
6042                         if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
6043                                 break;
6044                         udelay(1);
6045                 }
6046                 adev->gfx.rlc.in_safe_mode = true;
6047         }
6048 }
6049
6050 static void iceland_exit_rlc_safe_mode(struct amdgpu_device *adev)
6051 {
6052         u32 data = 0;
6053         unsigned i;
6054
6055         data = RREG32(mmRLC_CNTL);
6056         if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK))
6057                 return;
6058
6059         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) {
6060                 if (adev->gfx.rlc.in_safe_mode) {
6061                         data |= RLC_SAFE_MODE__CMD_MASK;
6062                         data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
6063                         WREG32(mmRLC_SAFE_MODE, data);
6064                         adev->gfx.rlc.in_safe_mode = false;
6065                 }
6066         }
6067
6068         for (i = 0; i < adev->usec_timeout; i++) {
6069                 if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
6070                         break;
6071                 udelay(1);
6072         }
6073 }
6074
6075 static const struct amdgpu_rlc_funcs iceland_rlc_funcs = {
6076         .enter_safe_mode = iceland_enter_rlc_safe_mode,
6077         .exit_safe_mode = iceland_exit_rlc_safe_mode
6078 };
6079
6080 static void gfx_v8_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
6081                                                       bool enable)
6082 {
6083         uint32_t temp, data;
6084
6085         adev->gfx.rlc.funcs->enter_safe_mode(adev);
6086
6087         /* It is disabled by HW by default */
6088         if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
6089                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
6090                         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS)
6091                                 /* 1 - RLC memory Light sleep */
6092                                 WREG32_FIELD(RLC_MEM_SLP_CNTL, RLC_MEM_LS_EN, 1);
6093
6094                         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS)
6095                                 WREG32_FIELD(CP_MEM_SLP_CNTL, CP_MEM_LS_EN, 1);
6096                 }
6097
6098                 /* 3 - RLC_CGTT_MGCG_OVERRIDE */
6099                 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
6100                 if (adev->flags & AMD_IS_APU)
6101                         data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
6102                                   RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
6103                                   RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK);
6104                 else
6105                         data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
6106                                   RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
6107                                   RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
6108                                   RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
6109
6110                 if (temp != data)
6111                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
6112
6113                 /* 4 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
6114                 gfx_v8_0_wait_for_rlc_serdes(adev);
6115
6116                 /* 5 - clear mgcg override */
6117                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
6118
6119                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS) {
6120                         /* 6 - Enable CGTS(Tree Shade) MGCG /MGLS */
6121                         temp = data = RREG32(mmCGTS_SM_CTRL_REG);
6122                         data &= ~(CGTS_SM_CTRL_REG__SM_MODE_MASK);
6123                         data |= (0x2 << CGTS_SM_CTRL_REG__SM_MODE__SHIFT);
6124                         data |= CGTS_SM_CTRL_REG__SM_MODE_ENABLE_MASK;
6125                         data &= ~CGTS_SM_CTRL_REG__OVERRIDE_MASK;
6126                         if ((adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) &&
6127                             (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS_LS))
6128                                 data &= ~CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK;
6129                         data |= CGTS_SM_CTRL_REG__ON_MONITOR_ADD_EN_MASK;
6130                         data |= (0x96 << CGTS_SM_CTRL_REG__ON_MONITOR_ADD__SHIFT);
6131                         if (temp != data)
6132                                 WREG32(mmCGTS_SM_CTRL_REG, data);
6133                 }
6134                 udelay(50);
6135
6136                 /* 7 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
6137                 gfx_v8_0_wait_for_rlc_serdes(adev);
6138         } else {
6139                 /* 1 - MGCG_OVERRIDE[0] for CP and MGCG_OVERRIDE[1] for RLC */
6140                 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
6141                 data |= (RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
6142                                 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
6143                                 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
6144                                 RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
6145                 if (temp != data)
6146                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
6147
6148                 /* 2 - disable MGLS in RLC */
6149                 data = RREG32(mmRLC_MEM_SLP_CNTL);
6150                 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
6151                         data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
6152                         WREG32(mmRLC_MEM_SLP_CNTL, data);
6153                 }
6154
6155                 /* 3 - disable MGLS in CP */
6156                 data = RREG32(mmCP_MEM_SLP_CNTL);
6157                 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
6158                         data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
6159                         WREG32(mmCP_MEM_SLP_CNTL, data);
6160                 }
6161
6162                 /* 4 - Disable CGTS(Tree Shade) MGCG and MGLS */
6163                 temp = data = RREG32(mmCGTS_SM_CTRL_REG);
6164                 data |= (CGTS_SM_CTRL_REG__OVERRIDE_MASK |
6165                                 CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK);
6166                 if (temp != data)
6167                         WREG32(mmCGTS_SM_CTRL_REG, data);
6168
6169                 /* 5 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
6170                 gfx_v8_0_wait_for_rlc_serdes(adev);
6171
6172                 /* 6 - set mgcg override */
6173                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, SET_BPM_SERDES_CMD);
6174
6175                 udelay(50);
6176
6177                 /* 7- wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
6178                 gfx_v8_0_wait_for_rlc_serdes(adev);
6179         }
6180
6181         adev->gfx.rlc.funcs->exit_safe_mode(adev);
6182 }
6183
6184 static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
6185                                                       bool enable)
6186 {
6187         uint32_t temp, temp1, data, data1;
6188
6189         temp = data = RREG32(mmRLC_CGCG_CGLS_CTRL);
6190
6191         adev->gfx.rlc.funcs->enter_safe_mode(adev);
6192
6193         if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
6194                 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
6195                 data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK;
6196                 if (temp1 != data1)
6197                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
6198
6199                 /* : wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
6200                 gfx_v8_0_wait_for_rlc_serdes(adev);
6201
6202                 /* 2 - clear cgcg override */
6203                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
6204
6205                 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
6206                 gfx_v8_0_wait_for_rlc_serdes(adev);
6207
6208                 /* 3 - write cmd to set CGLS */
6209                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, SET_BPM_SERDES_CMD);
6210
6211                 /* 4 - enable cgcg */
6212                 data |= RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
6213
6214                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
6215                         /* enable cgls*/
6216                         data |= RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
6217
6218                         temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
6219                         data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK;
6220
6221                         if (temp1 != data1)
6222                                 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
6223                 } else {
6224                         data &= ~RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
6225                 }
6226
6227                 if (temp != data)
6228                         WREG32(mmRLC_CGCG_CGLS_CTRL, data);
6229
6230                 /* 5 enable cntx_empty_int_enable/cntx_busy_int_enable/
6231                  * Cmp_busy/GFX_Idle interrupts
6232                  */
6233                 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
6234         } else {
6235                 /* disable cntx_empty_int_enable & GFX Idle interrupt */
6236                 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
6237
6238                 /* TEST CGCG */
6239                 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
6240                 data1 |= (RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK |
6241                                 RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK);
6242                 if (temp1 != data1)
6243                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
6244
6245                 /* read gfx register to wake up cgcg */
6246                 RREG32(mmCB_CGTT_SCLK_CTRL);
6247                 RREG32(mmCB_CGTT_SCLK_CTRL);
6248                 RREG32(mmCB_CGTT_SCLK_CTRL);
6249                 RREG32(mmCB_CGTT_SCLK_CTRL);
6250
6251                 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
6252                 gfx_v8_0_wait_for_rlc_serdes(adev);
6253
6254                 /* write cmd to Set CGCG Overrride */
6255                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, SET_BPM_SERDES_CMD);
6256
6257                 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
6258                 gfx_v8_0_wait_for_rlc_serdes(adev);
6259
6260                 /* write cmd to Clear CGLS */
6261                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, CLE_BPM_SERDES_CMD);
6262
6263                 /* disable cgcg, cgls should be disabled too. */
6264                 data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
6265                           RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
6266                 if (temp != data)
6267                         WREG32(mmRLC_CGCG_CGLS_CTRL, data);
6268         }
6269
6270         gfx_v8_0_wait_for_rlc_serdes(adev);
6271
6272         adev->gfx.rlc.funcs->exit_safe_mode(adev);
6273 }
6274 static int gfx_v8_0_update_gfx_clock_gating(struct amdgpu_device *adev,
6275                                             bool enable)
6276 {
6277         if (enable) {
6278                 /* CGCG/CGLS should be enabled after MGCG/MGLS/TS(CG/LS)
6279                  * ===  MGCG + MGLS + TS(CG/LS) ===
6280                  */
6281                 gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
6282                 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
6283         } else {
6284                 /* CGCG/CGLS should be disabled before MGCG/MGLS/TS(CG/LS)
6285                  * ===  CGCG + CGLS ===
6286                  */
6287                 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
6288                 gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
6289         }
6290         return 0;
6291 }
6292
6293 static int gfx_v8_0_tonga_update_gfx_clock_gating(struct amdgpu_device *adev,
6294                                           enum amd_clockgating_state state)
6295 {
6296         uint32_t msg_id, pp_state = 0;
6297         uint32_t pp_support_state = 0;
6298         void *pp_handle = adev->powerplay.pp_handle;
6299
6300         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) {
6301                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
6302                         pp_support_state = PP_STATE_SUPPORT_LS;
6303                         pp_state = PP_STATE_LS;
6304                 }
6305                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
6306                         pp_support_state |= PP_STATE_SUPPORT_CG;
6307                         pp_state |= PP_STATE_CG;
6308                 }
6309                 if (state == AMD_CG_STATE_UNGATE)
6310                         pp_state = 0;
6311
6312                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6313                                 PP_BLOCK_GFX_CG,
6314                                 pp_support_state,
6315                                 pp_state);
6316                 amd_set_clockgating_by_smu(pp_handle, msg_id);
6317         }
6318
6319         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) {
6320                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
6321                         pp_support_state = PP_STATE_SUPPORT_LS;
6322                         pp_state = PP_STATE_LS;
6323                 }
6324
6325                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
6326                         pp_support_state |= PP_STATE_SUPPORT_CG;
6327                         pp_state |= PP_STATE_CG;
6328                 }
6329
6330                 if (state == AMD_CG_STATE_UNGATE)
6331                         pp_state = 0;
6332
6333                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6334                                 PP_BLOCK_GFX_MG,
6335                                 pp_support_state,
6336                                 pp_state);
6337                 amd_set_clockgating_by_smu(pp_handle, msg_id);
6338         }
6339
6340         return 0;
6341 }
6342
6343 static int gfx_v8_0_polaris_update_gfx_clock_gating(struct amdgpu_device *adev,
6344                                           enum amd_clockgating_state state)
6345 {
6346
6347         uint32_t msg_id, pp_state = 0;
6348         uint32_t pp_support_state = 0;
6349         void *pp_handle = adev->powerplay.pp_handle;
6350
6351         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) {
6352                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
6353                         pp_support_state = PP_STATE_SUPPORT_LS;
6354                         pp_state = PP_STATE_LS;
6355                 }
6356                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
6357                         pp_support_state |= PP_STATE_SUPPORT_CG;
6358                         pp_state |= PP_STATE_CG;
6359                 }
6360                 if (state == AMD_CG_STATE_UNGATE)
6361                         pp_state = 0;
6362
6363                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6364                                 PP_BLOCK_GFX_CG,
6365                                 pp_support_state,
6366                                 pp_state);
6367                 amd_set_clockgating_by_smu(pp_handle, msg_id);
6368         }
6369
6370         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_3D_CGCG | AMD_CG_SUPPORT_GFX_3D_CGLS)) {
6371                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) {
6372                         pp_support_state = PP_STATE_SUPPORT_LS;
6373                         pp_state = PP_STATE_LS;
6374                 }
6375                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG) {
6376                         pp_support_state |= PP_STATE_SUPPORT_CG;
6377                         pp_state |= PP_STATE_CG;
6378                 }
6379                 if (state == AMD_CG_STATE_UNGATE)
6380                         pp_state = 0;
6381
6382                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6383                                 PP_BLOCK_GFX_3D,
6384                                 pp_support_state,
6385                                 pp_state);
6386                 amd_set_clockgating_by_smu(pp_handle, msg_id);
6387         }
6388
6389         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) {
6390                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
6391                         pp_support_state = PP_STATE_SUPPORT_LS;
6392                         pp_state = PP_STATE_LS;
6393                 }
6394
6395                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
6396                         pp_support_state |= PP_STATE_SUPPORT_CG;
6397                         pp_state |= PP_STATE_CG;
6398                 }
6399
6400                 if (state == AMD_CG_STATE_UNGATE)
6401                         pp_state = 0;
6402
6403                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6404                                 PP_BLOCK_GFX_MG,
6405                                 pp_support_state,
6406                                 pp_state);
6407                 amd_set_clockgating_by_smu(pp_handle, msg_id);
6408         }
6409
6410         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
6411                 pp_support_state = PP_STATE_SUPPORT_LS;
6412
6413                 if (state == AMD_CG_STATE_UNGATE)
6414                         pp_state = 0;
6415                 else
6416                         pp_state = PP_STATE_LS;
6417
6418                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6419                                 PP_BLOCK_GFX_RLC,
6420                                 pp_support_state,
6421                                 pp_state);
6422                 amd_set_clockgating_by_smu(pp_handle, msg_id);
6423         }
6424
6425         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
6426                 pp_support_state = PP_STATE_SUPPORT_LS;
6427
6428                 if (state == AMD_CG_STATE_UNGATE)
6429                         pp_state = 0;
6430                 else
6431                         pp_state = PP_STATE_LS;
6432                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6433                         PP_BLOCK_GFX_CP,
6434                         pp_support_state,
6435                         pp_state);
6436                 amd_set_clockgating_by_smu(pp_handle, msg_id);
6437         }
6438
6439         return 0;
6440 }
6441
6442 static int gfx_v8_0_set_clockgating_state(void *handle,
6443                                           enum amd_clockgating_state state)
6444 {
6445         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
6446
6447         if (amdgpu_sriov_vf(adev))
6448                 return 0;
6449
6450         switch (adev->asic_type) {
6451         case CHIP_FIJI:
6452         case CHIP_CARRIZO:
6453         case CHIP_STONEY:
6454                 gfx_v8_0_update_gfx_clock_gating(adev,
6455                                                  state == AMD_CG_STATE_GATE);
6456                 break;
6457         case CHIP_TONGA:
6458                 gfx_v8_0_tonga_update_gfx_clock_gating(adev, state);
6459                 break;
6460         case CHIP_POLARIS10:
6461         case CHIP_POLARIS11:
6462         case CHIP_POLARIS12:
6463                 gfx_v8_0_polaris_update_gfx_clock_gating(adev, state);
6464                 break;
6465         default:
6466                 break;
6467         }
6468         return 0;
6469 }
6470
6471 static u64 gfx_v8_0_ring_get_rptr(struct amdgpu_ring *ring)
6472 {
6473         return ring->adev->wb.wb[ring->rptr_offs];
6474 }
6475
6476 static u64 gfx_v8_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
6477 {
6478         struct amdgpu_device *adev = ring->adev;
6479
6480         if (ring->use_doorbell)
6481                 /* XXX check if swapping is necessary on BE */
6482                 return ring->adev->wb.wb[ring->wptr_offs];
6483         else
6484                 return RREG32(mmCP_RB0_WPTR);
6485 }
6486
6487 static void gfx_v8_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
6488 {
6489         struct amdgpu_device *adev = ring->adev;
6490
6491         if (ring->use_doorbell) {
6492                 /* XXX check if swapping is necessary on BE */
6493                 adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
6494                 WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
6495         } else {
6496                 WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
6497                 (void)RREG32(mmCP_RB0_WPTR);
6498         }
6499 }
6500
6501 static void gfx_v8_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
6502 {
6503         u32 ref_and_mask, reg_mem_engine;
6504
6505         if ((ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) ||
6506             (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)) {
6507                 switch (ring->me) {
6508                 case 1:
6509                         ref_and_mask = GPU_HDP_FLUSH_DONE__CP2_MASK << ring->pipe;
6510                         break;
6511                 case 2:
6512                         ref_and_mask = GPU_HDP_FLUSH_DONE__CP6_MASK << ring->pipe;
6513                         break;
6514                 default:
6515                         return;
6516                 }
6517                 reg_mem_engine = 0;
6518         } else {
6519                 ref_and_mask = GPU_HDP_FLUSH_DONE__CP0_MASK;
6520                 reg_mem_engine = WAIT_REG_MEM_ENGINE(1); /* pfp */
6521         }
6522
6523         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6524         amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
6525                                  WAIT_REG_MEM_FUNCTION(3) |  /* == */
6526                                  reg_mem_engine));
6527         amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_REQ);
6528         amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_DONE);
6529         amdgpu_ring_write(ring, ref_and_mask);
6530         amdgpu_ring_write(ring, ref_and_mask);
6531         amdgpu_ring_write(ring, 0x20); /* poll interval */
6532 }
6533
6534 static void gfx_v8_0_ring_emit_vgt_flush(struct amdgpu_ring *ring)
6535 {
6536         amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
6537         amdgpu_ring_write(ring, EVENT_TYPE(VS_PARTIAL_FLUSH) |
6538                 EVENT_INDEX(4));
6539
6540         amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
6541         amdgpu_ring_write(ring, EVENT_TYPE(VGT_FLUSH) |
6542                 EVENT_INDEX(0));
6543 }
6544
6545
6546 static void gfx_v8_0_ring_emit_hdp_invalidate(struct amdgpu_ring *ring)
6547 {
6548         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6549         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6550                                  WRITE_DATA_DST_SEL(0) |
6551                                  WR_CONFIRM));
6552         amdgpu_ring_write(ring, mmHDP_DEBUG0);
6553         amdgpu_ring_write(ring, 0);
6554         amdgpu_ring_write(ring, 1);
6555
6556 }
6557
6558 static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
6559                                       struct amdgpu_ib *ib,
6560                                       unsigned vm_id, bool ctx_switch)
6561 {
6562         u32 header, control = 0;
6563
6564         if (ib->flags & AMDGPU_IB_FLAG_CE)
6565                 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
6566         else
6567                 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
6568
6569         control |= ib->length_dw | (vm_id << 24);
6570
6571         if (amdgpu_sriov_vf(ring->adev) && ib->flags & AMDGPU_IB_FLAG_PREEMPT)
6572                 control |= INDIRECT_BUFFER_PRE_ENB(1);
6573
6574         amdgpu_ring_write(ring, header);
6575         amdgpu_ring_write(ring,
6576 #ifdef __BIG_ENDIAN
6577                           (2 << 0) |
6578 #endif
6579                           (ib->gpu_addr & 0xFFFFFFFC));
6580         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6581         amdgpu_ring_write(ring, control);
6582 }
6583
6584 static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
6585                                           struct amdgpu_ib *ib,
6586                                           unsigned vm_id, bool ctx_switch)
6587 {
6588         u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vm_id << 24);
6589
6590         amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
6591         amdgpu_ring_write(ring,
6592 #ifdef __BIG_ENDIAN
6593                                 (2 << 0) |
6594 #endif
6595                                 (ib->gpu_addr & 0xFFFFFFFC));
6596         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6597         amdgpu_ring_write(ring, control);
6598 }
6599
6600 static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr,
6601                                          u64 seq, unsigned flags)
6602 {
6603         bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6604         bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6605
6606         /* EVENT_WRITE_EOP - flush caches, send int */
6607         amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
6608         amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6609                                  EOP_TC_ACTION_EN |
6610                                  EOP_TC_WB_ACTION_EN |
6611                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6612                                  EVENT_INDEX(5)));
6613         amdgpu_ring_write(ring, addr & 0xfffffffc);
6614         amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
6615                           DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
6616         amdgpu_ring_write(ring, lower_32_bits(seq));
6617         amdgpu_ring_write(ring, upper_32_bits(seq));
6618
6619 }
6620
6621 static void gfx_v8_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
6622 {
6623         int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
6624         uint32_t seq = ring->fence_drv.sync_seq;
6625         uint64_t addr = ring->fence_drv.gpu_addr;
6626
6627         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6628         amdgpu_ring_write(ring, (WAIT_REG_MEM_MEM_SPACE(1) | /* memory */
6629                                  WAIT_REG_MEM_FUNCTION(3) | /* equal */
6630                                  WAIT_REG_MEM_ENGINE(usepfp))); /* pfp or me */
6631         amdgpu_ring_write(ring, addr & 0xfffffffc);
6632         amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
6633         amdgpu_ring_write(ring, seq);
6634         amdgpu_ring_write(ring, 0xffffffff);
6635         amdgpu_ring_write(ring, 4); /* poll interval */
6636 }
6637
6638 static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
6639                                         unsigned vm_id, uint64_t pd_addr)
6640 {
6641         int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
6642
6643         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6644         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
6645                                  WRITE_DATA_DST_SEL(0)) |
6646                                  WR_CONFIRM);
6647         if (vm_id < 8) {
6648                 amdgpu_ring_write(ring,
6649                                   (mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + vm_id));
6650         } else {
6651                 amdgpu_ring_write(ring,
6652                                   (mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vm_id - 8));
6653         }
6654         amdgpu_ring_write(ring, 0);
6655         amdgpu_ring_write(ring, pd_addr >> 12);
6656
6657         /* bits 0-15 are the VM contexts0-15 */
6658         /* invalidate the cache */
6659         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6660         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6661                                  WRITE_DATA_DST_SEL(0)));
6662         amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
6663         amdgpu_ring_write(ring, 0);
6664         amdgpu_ring_write(ring, 1 << vm_id);
6665
6666         /* wait for the invalidate to complete */
6667         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6668         amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
6669                                  WAIT_REG_MEM_FUNCTION(0) |  /* always */
6670                                  WAIT_REG_MEM_ENGINE(0))); /* me */
6671         amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
6672         amdgpu_ring_write(ring, 0);
6673         amdgpu_ring_write(ring, 0); /* ref */
6674         amdgpu_ring_write(ring, 0); /* mask */
6675         amdgpu_ring_write(ring, 0x20); /* poll interval */
6676
6677         /* compute doesn't have PFP */
6678         if (usepfp) {
6679                 /* sync PFP to ME, otherwise we might get invalid PFP reads */
6680                 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
6681                 amdgpu_ring_write(ring, 0x0);
6682         }
6683 }
6684
6685 static u64 gfx_v8_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
6686 {
6687         return ring->adev->wb.wb[ring->wptr_offs];
6688 }
6689
6690 static void gfx_v8_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
6691 {
6692         struct amdgpu_device *adev = ring->adev;
6693
6694         /* XXX check if swapping is necessary on BE */
6695         adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
6696         WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
6697 }
6698
6699 static void gfx_v8_0_ring_emit_fence_compute(struct amdgpu_ring *ring,
6700                                              u64 addr, u64 seq,
6701                                              unsigned flags)
6702 {
6703         bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6704         bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6705
6706         /* RELEASE_MEM - flush caches, send int */
6707         amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
6708         amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6709                                  EOP_TC_ACTION_EN |
6710                                  EOP_TC_WB_ACTION_EN |
6711                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6712                                  EVENT_INDEX(5)));
6713         amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
6714         amdgpu_ring_write(ring, addr & 0xfffffffc);
6715         amdgpu_ring_write(ring, upper_32_bits(addr));
6716         amdgpu_ring_write(ring, lower_32_bits(seq));
6717         amdgpu_ring_write(ring, upper_32_bits(seq));
6718 }
6719
6720 static void gfx_v8_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
6721                                          u64 seq, unsigned int flags)
6722 {
6723         /* we only allocate 32bit for each seq wb address */
6724         BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
6725
6726         /* write fence seq to the "addr" */
6727         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6728         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6729                                  WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
6730         amdgpu_ring_write(ring, lower_32_bits(addr));
6731         amdgpu_ring_write(ring, upper_32_bits(addr));
6732         amdgpu_ring_write(ring, lower_32_bits(seq));
6733
6734         if (flags & AMDGPU_FENCE_FLAG_INT) {
6735                 /* set register to trigger INT */
6736                 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6737                 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6738                                          WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
6739                 amdgpu_ring_write(ring, mmCPC_INT_STATUS);
6740                 amdgpu_ring_write(ring, 0);
6741                 amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
6742         }
6743 }
6744
6745 static void gfx_v8_ring_emit_sb(struct amdgpu_ring *ring)
6746 {
6747         amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
6748         amdgpu_ring_write(ring, 0);
6749 }
6750
6751 static void gfx_v8_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
6752 {
6753         uint32_t dw2 = 0;
6754
6755         if (amdgpu_sriov_vf(ring->adev))
6756                 gfx_v8_0_ring_emit_ce_meta_init(ring,
6757                         (flags & AMDGPU_VM_DOMAIN) ? AMDGPU_CSA_VADDR : ring->adev->virt.csa_vmid0_addr);
6758
6759         dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
6760         if (flags & AMDGPU_HAVE_CTX_SWITCH) {
6761                 gfx_v8_0_ring_emit_vgt_flush(ring);
6762                 /* set load_global_config & load_global_uconfig */
6763                 dw2 |= 0x8001;
6764                 /* set load_cs_sh_regs */
6765                 dw2 |= 0x01000000;
6766                 /* set load_per_context_state & load_gfx_sh_regs for GFX */
6767                 dw2 |= 0x10002;
6768
6769                 /* set load_ce_ram if preamble presented */
6770                 if (AMDGPU_PREAMBLE_IB_PRESENT & flags)
6771                         dw2 |= 0x10000000;
6772         } else {
6773                 /* still load_ce_ram if this is the first time preamble presented
6774                  * although there is no context switch happens.
6775                  */
6776                 if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags)
6777                         dw2 |= 0x10000000;
6778         }
6779
6780         amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
6781         amdgpu_ring_write(ring, dw2);
6782         amdgpu_ring_write(ring, 0);
6783
6784         if (amdgpu_sriov_vf(ring->adev))
6785                 gfx_v8_0_ring_emit_de_meta_init(ring,
6786                         (flags & AMDGPU_VM_DOMAIN) ? AMDGPU_CSA_VADDR : ring->adev->virt.csa_vmid0_addr);
6787 }
6788
6789 static unsigned gfx_v8_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring)
6790 {
6791         unsigned ret;
6792
6793         amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
6794         amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
6795         amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
6796         amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */
6797         ret = ring->wptr & ring->buf_mask;
6798         amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */
6799         return ret;
6800 }
6801
6802 static void gfx_v8_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset)
6803 {
6804         unsigned cur;
6805
6806         BUG_ON(offset > ring->buf_mask);
6807         BUG_ON(ring->ring[offset] != 0x55aa55aa);
6808
6809         cur = (ring->wptr & ring->buf_mask) - 1;
6810         if (likely(cur > offset))
6811                 ring->ring[offset] = cur - offset;
6812         else
6813                 ring->ring[offset] = (ring->ring_size >> 2) - offset + cur;
6814 }
6815
6816
6817 static void gfx_v8_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg)
6818 {
6819         struct amdgpu_device *adev = ring->adev;
6820
6821         amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
6822         amdgpu_ring_write(ring, 0 |     /* src: register*/
6823                                 (5 << 8) |      /* dst: memory */
6824                                 (1 << 20));     /* write confirm */
6825         amdgpu_ring_write(ring, reg);
6826         amdgpu_ring_write(ring, 0);
6827         amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
6828                                 adev->virt.reg_val_offs * 4));
6829         amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
6830                                 adev->virt.reg_val_offs * 4));
6831 }
6832
6833 static void gfx_v8_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
6834                                   uint32_t val)
6835 {
6836         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6837         amdgpu_ring_write(ring, (1 << 16)); /* no inc addr */
6838         amdgpu_ring_write(ring, reg);
6839         amdgpu_ring_write(ring, 0);
6840         amdgpu_ring_write(ring, val);
6841 }
6842
6843 static void gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
6844                                                  enum amdgpu_interrupt_state state)
6845 {
6846         WREG32_FIELD(CP_INT_CNTL_RING0, TIME_STAMP_INT_ENABLE,
6847                      state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6848 }
6849
6850 static void gfx_v8_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
6851                                                      int me, int pipe,
6852                                                      enum amdgpu_interrupt_state state)
6853 {
6854         /*
6855          * amdgpu controls only pipe 0 of MEC1. That's why this function only
6856          * handles the setting of interrupts for this specific pipe. All other
6857          * pipes' interrupts are set by amdkfd.
6858          */
6859
6860         if (me == 1) {
6861                 switch (pipe) {
6862                 case 0:
6863                         break;
6864                 default:
6865                         DRM_DEBUG("invalid pipe %d\n", pipe);
6866                         return;
6867                 }
6868         } else {
6869                 DRM_DEBUG("invalid me %d\n", me);
6870                 return;
6871         }
6872
6873         WREG32_FIELD(CP_ME1_PIPE0_INT_CNTL, TIME_STAMP_INT_ENABLE,
6874                      state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6875 }
6876
6877 static int gfx_v8_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
6878                                              struct amdgpu_irq_src *source,
6879                                              unsigned type,
6880                                              enum amdgpu_interrupt_state state)
6881 {
6882         WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_REG_INT_ENABLE,
6883                      state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6884
6885         return 0;
6886 }
6887
6888 static int gfx_v8_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
6889                                               struct amdgpu_irq_src *source,
6890                                               unsigned type,
6891                                               enum amdgpu_interrupt_state state)
6892 {
6893         WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_INSTR_INT_ENABLE,
6894                      state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6895
6896         return 0;
6897 }
6898
6899 static int gfx_v8_0_set_eop_interrupt_state(struct amdgpu_device *adev,
6900                                             struct amdgpu_irq_src *src,
6901                                             unsigned type,
6902                                             enum amdgpu_interrupt_state state)
6903 {
6904         switch (type) {
6905         case AMDGPU_CP_IRQ_GFX_EOP:
6906                 gfx_v8_0_set_gfx_eop_interrupt_state(adev, state);
6907                 break;
6908         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
6909                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
6910                 break;
6911         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
6912                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
6913                 break;
6914         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
6915                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
6916                 break;
6917         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
6918                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
6919                 break;
6920         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
6921                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
6922                 break;
6923         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
6924                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
6925                 break;
6926         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
6927                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
6928                 break;
6929         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
6930                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
6931                 break;
6932         default:
6933                 break;
6934         }
6935         return 0;
6936 }
6937
6938 static int gfx_v8_0_eop_irq(struct amdgpu_device *adev,
6939                             struct amdgpu_irq_src *source,
6940                             struct amdgpu_iv_entry *entry)
6941 {
6942         int i;
6943         u8 me_id, pipe_id, queue_id;
6944         struct amdgpu_ring *ring;
6945
6946         DRM_DEBUG("IH: CP EOP\n");
6947         me_id = (entry->ring_id & 0x0c) >> 2;
6948         pipe_id = (entry->ring_id & 0x03) >> 0;
6949         queue_id = (entry->ring_id & 0x70) >> 4;
6950
6951         switch (me_id) {
6952         case 0:
6953                 amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
6954                 break;
6955         case 1:
6956         case 2:
6957                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
6958                         ring = &adev->gfx.compute_ring[i];
6959                         /* Per-queue interrupt is supported for MEC starting from VI.
6960                           * The interrupt can only be enabled/disabled per pipe instead of per queue.
6961                           */
6962                         if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
6963                                 amdgpu_fence_process(ring);
6964                 }
6965                 break;
6966         }
6967         return 0;
6968 }
6969
6970 static int gfx_v8_0_priv_reg_irq(struct amdgpu_device *adev,
6971                                  struct amdgpu_irq_src *source,
6972                                  struct amdgpu_iv_entry *entry)
6973 {
6974         DRM_ERROR("Illegal register access in command stream\n");
6975         schedule_work(&adev->reset_work);
6976         return 0;
6977 }
6978
6979 static int gfx_v8_0_priv_inst_irq(struct amdgpu_device *adev,
6980                                   struct amdgpu_irq_src *source,
6981                                   struct amdgpu_iv_entry *entry)
6982 {
6983         DRM_ERROR("Illegal instruction in command stream\n");
6984         schedule_work(&adev->reset_work);
6985         return 0;
6986 }
6987
6988 static int gfx_v8_0_kiq_set_interrupt_state(struct amdgpu_device *adev,
6989                                             struct amdgpu_irq_src *src,
6990                                             unsigned int type,
6991                                             enum amdgpu_interrupt_state state)
6992 {
6993         struct amdgpu_ring *ring = &(adev->gfx.kiq.ring);
6994
6995         BUG_ON(ring->funcs->type != AMDGPU_RING_TYPE_KIQ);
6996
6997         switch (type) {
6998         case AMDGPU_CP_KIQ_IRQ_DRIVER0:
6999                 WREG32_FIELD(CPC_INT_CNTL, GENERIC2_INT_ENABLE,
7000                              state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
7001                 if (ring->me == 1)
7002                         WREG32_FIELD_OFFSET(CP_ME1_PIPE0_INT_CNTL,
7003                                      ring->pipe,
7004                                      GENERIC2_INT_ENABLE,
7005                                      state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
7006                 else
7007                         WREG32_FIELD_OFFSET(CP_ME2_PIPE0_INT_CNTL,
7008                                      ring->pipe,
7009                                      GENERIC2_INT_ENABLE,
7010                                      state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
7011                 break;
7012         default:
7013                 BUG(); /* kiq only support GENERIC2_INT now */
7014                 break;
7015         }
7016         return 0;
7017 }
7018
7019 static int gfx_v8_0_kiq_irq(struct amdgpu_device *adev,
7020                             struct amdgpu_irq_src *source,
7021                             struct amdgpu_iv_entry *entry)
7022 {
7023         u8 me_id, pipe_id, queue_id;
7024         struct amdgpu_ring *ring = &(adev->gfx.kiq.ring);
7025
7026         BUG_ON(ring->funcs->type != AMDGPU_RING_TYPE_KIQ);
7027
7028         me_id = (entry->ring_id & 0x0c) >> 2;
7029         pipe_id = (entry->ring_id & 0x03) >> 0;
7030         queue_id = (entry->ring_id & 0x70) >> 4;
7031         DRM_DEBUG("IH: CPC GENERIC2_INT, me:%d, pipe:%d, queue:%d\n",
7032                    me_id, pipe_id, queue_id);
7033
7034         amdgpu_fence_process(ring);
7035         return 0;
7036 }
7037
7038 static const struct amd_ip_funcs gfx_v8_0_ip_funcs = {
7039         .name = "gfx_v8_0",
7040         .early_init = gfx_v8_0_early_init,
7041         .late_init = gfx_v8_0_late_init,
7042         .sw_init = gfx_v8_0_sw_init,
7043         .sw_fini = gfx_v8_0_sw_fini,
7044         .hw_init = gfx_v8_0_hw_init,
7045         .hw_fini = gfx_v8_0_hw_fini,
7046         .suspend = gfx_v8_0_suspend,
7047         .resume = gfx_v8_0_resume,
7048         .is_idle = gfx_v8_0_is_idle,
7049         .wait_for_idle = gfx_v8_0_wait_for_idle,
7050         .check_soft_reset = gfx_v8_0_check_soft_reset,
7051         .pre_soft_reset = gfx_v8_0_pre_soft_reset,
7052         .soft_reset = gfx_v8_0_soft_reset,
7053         .post_soft_reset = gfx_v8_0_post_soft_reset,
7054         .set_clockgating_state = gfx_v8_0_set_clockgating_state,
7055         .set_powergating_state = gfx_v8_0_set_powergating_state,
7056         .get_clockgating_state = gfx_v8_0_get_clockgating_state,
7057 };
7058
7059 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
7060         .type = AMDGPU_RING_TYPE_GFX,
7061         .align_mask = 0xff,
7062         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
7063         .support_64bit_ptrs = false,
7064         .get_rptr = gfx_v8_0_ring_get_rptr,
7065         .get_wptr = gfx_v8_0_ring_get_wptr_gfx,
7066         .set_wptr = gfx_v8_0_ring_set_wptr_gfx,
7067         .emit_frame_size = /* maximum 215dw if count 16 IBs in */
7068                 5 +  /* COND_EXEC */
7069                 7 +  /* PIPELINE_SYNC */
7070                 19 + /* VM_FLUSH */
7071                 8 +  /* FENCE for VM_FLUSH */
7072                 20 + /* GDS switch */
7073                 4 + /* double SWITCH_BUFFER,
7074                        the first COND_EXEC jump to the place just
7075                            prior to this double SWITCH_BUFFER  */
7076                 5 + /* COND_EXEC */
7077                 7 +      /*     HDP_flush */
7078                 4 +      /*     VGT_flush */
7079                 14 + /* CE_META */
7080                 31 + /* DE_META */
7081                 3 + /* CNTX_CTRL */
7082                 5 + /* HDP_INVL */
7083                 8 + 8 + /* FENCE x2 */
7084                 2, /* SWITCH_BUFFER */
7085         .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_gfx */
7086         .emit_ib = gfx_v8_0_ring_emit_ib_gfx,
7087         .emit_fence = gfx_v8_0_ring_emit_fence_gfx,
7088         .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
7089         .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
7090         .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
7091         .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
7092         .emit_hdp_invalidate = gfx_v8_0_ring_emit_hdp_invalidate,
7093         .test_ring = gfx_v8_0_ring_test_ring,
7094         .test_ib = gfx_v8_0_ring_test_ib,
7095         .insert_nop = amdgpu_ring_insert_nop,
7096         .pad_ib = amdgpu_ring_generic_pad_ib,
7097         .emit_switch_buffer = gfx_v8_ring_emit_sb,
7098         .emit_cntxcntl = gfx_v8_ring_emit_cntxcntl,
7099         .init_cond_exec = gfx_v8_0_ring_emit_init_cond_exec,
7100         .patch_cond_exec = gfx_v8_0_ring_emit_patch_cond_exec,
7101 };
7102
7103 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
7104         .type = AMDGPU_RING_TYPE_COMPUTE,
7105         .align_mask = 0xff,
7106         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
7107         .support_64bit_ptrs = false,
7108         .get_rptr = gfx_v8_0_ring_get_rptr,
7109         .get_wptr = gfx_v8_0_ring_get_wptr_compute,
7110         .set_wptr = gfx_v8_0_ring_set_wptr_compute,
7111         .emit_frame_size =
7112                 20 + /* gfx_v8_0_ring_emit_gds_switch */
7113                 7 + /* gfx_v8_0_ring_emit_hdp_flush */
7114                 5 + /* gfx_v8_0_ring_emit_hdp_invalidate */
7115                 7 + /* gfx_v8_0_ring_emit_pipeline_sync */
7116                 17 + /* gfx_v8_0_ring_emit_vm_flush */
7117                 7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_compute x3 for user fence, vm fence */
7118         .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_compute */
7119         .emit_ib = gfx_v8_0_ring_emit_ib_compute,
7120         .emit_fence = gfx_v8_0_ring_emit_fence_compute,
7121         .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
7122         .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
7123         .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
7124         .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
7125         .emit_hdp_invalidate = gfx_v8_0_ring_emit_hdp_invalidate,
7126         .test_ring = gfx_v8_0_ring_test_ring,
7127         .test_ib = gfx_v8_0_ring_test_ib,
7128         .insert_nop = amdgpu_ring_insert_nop,
7129         .pad_ib = amdgpu_ring_generic_pad_ib,
7130 };
7131
7132 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_kiq = {
7133         .type = AMDGPU_RING_TYPE_KIQ,
7134         .align_mask = 0xff,
7135         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
7136         .support_64bit_ptrs = false,
7137         .get_rptr = gfx_v8_0_ring_get_rptr,
7138         .get_wptr = gfx_v8_0_ring_get_wptr_compute,
7139         .set_wptr = gfx_v8_0_ring_set_wptr_compute,
7140         .emit_frame_size =
7141                 20 + /* gfx_v8_0_ring_emit_gds_switch */
7142                 7 + /* gfx_v8_0_ring_emit_hdp_flush */
7143                 5 + /* gfx_v8_0_ring_emit_hdp_invalidate */
7144                 7 + /* gfx_v8_0_ring_emit_pipeline_sync */
7145                 17 + /* gfx_v8_0_ring_emit_vm_flush */
7146                 7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_kiq x3 for user fence, vm fence */
7147         .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_compute */
7148         .emit_ib = gfx_v8_0_ring_emit_ib_compute,
7149         .emit_fence = gfx_v8_0_ring_emit_fence_kiq,
7150         .test_ring = gfx_v8_0_ring_test_ring,
7151         .test_ib = gfx_v8_0_ring_test_ib,
7152         .insert_nop = amdgpu_ring_insert_nop,
7153         .pad_ib = amdgpu_ring_generic_pad_ib,
7154         .emit_rreg = gfx_v8_0_ring_emit_rreg,
7155         .emit_wreg = gfx_v8_0_ring_emit_wreg,
7156 };
7157
7158 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev)
7159 {
7160         int i;
7161
7162         adev->gfx.kiq.ring.funcs = &gfx_v8_0_ring_funcs_kiq;
7163
7164         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
7165                 adev->gfx.gfx_ring[i].funcs = &gfx_v8_0_ring_funcs_gfx;
7166
7167         for (i = 0; i < adev->gfx.num_compute_rings; i++)
7168                 adev->gfx.compute_ring[i].funcs = &gfx_v8_0_ring_funcs_compute;
7169 }
7170
7171 static const struct amdgpu_irq_src_funcs gfx_v8_0_eop_irq_funcs = {
7172         .set = gfx_v8_0_set_eop_interrupt_state,
7173         .process = gfx_v8_0_eop_irq,
7174 };
7175
7176 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_reg_irq_funcs = {
7177         .set = gfx_v8_0_set_priv_reg_fault_state,
7178         .process = gfx_v8_0_priv_reg_irq,
7179 };
7180
7181 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_inst_irq_funcs = {
7182         .set = gfx_v8_0_set_priv_inst_fault_state,
7183         .process = gfx_v8_0_priv_inst_irq,
7184 };
7185
7186 static const struct amdgpu_irq_src_funcs gfx_v8_0_kiq_irq_funcs = {
7187         .set = gfx_v8_0_kiq_set_interrupt_state,
7188         .process = gfx_v8_0_kiq_irq,
7189 };
7190
7191 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev)
7192 {
7193         adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
7194         adev->gfx.eop_irq.funcs = &gfx_v8_0_eop_irq_funcs;
7195
7196         adev->gfx.priv_reg_irq.num_types = 1;
7197         adev->gfx.priv_reg_irq.funcs = &gfx_v8_0_priv_reg_irq_funcs;
7198
7199         adev->gfx.priv_inst_irq.num_types = 1;
7200         adev->gfx.priv_inst_irq.funcs = &gfx_v8_0_priv_inst_irq_funcs;
7201
7202         adev->gfx.kiq.irq.num_types = AMDGPU_CP_KIQ_IRQ_LAST;
7203         adev->gfx.kiq.irq.funcs = &gfx_v8_0_kiq_irq_funcs;
7204 }
7205
7206 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev)
7207 {
7208         adev->gfx.rlc.funcs = &iceland_rlc_funcs;
7209 }
7210
7211 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev)
7212 {
7213         /* init asci gds info */
7214         adev->gds.mem.total_size = RREG32(mmGDS_VMID0_SIZE);
7215         adev->gds.gws.total_size = 64;
7216         adev->gds.oa.total_size = 16;
7217
7218         if (adev->gds.mem.total_size == 64 * 1024) {
7219                 adev->gds.mem.gfx_partition_size = 4096;
7220                 adev->gds.mem.cs_partition_size = 4096;
7221
7222                 adev->gds.gws.gfx_partition_size = 4;
7223                 adev->gds.gws.cs_partition_size = 4;
7224
7225                 adev->gds.oa.gfx_partition_size = 4;
7226                 adev->gds.oa.cs_partition_size = 1;
7227         } else {
7228                 adev->gds.mem.gfx_partition_size = 1024;
7229                 adev->gds.mem.cs_partition_size = 1024;
7230
7231                 adev->gds.gws.gfx_partition_size = 16;
7232                 adev->gds.gws.cs_partition_size = 16;
7233
7234                 adev->gds.oa.gfx_partition_size = 4;
7235                 adev->gds.oa.cs_partition_size = 4;
7236         }
7237 }
7238
7239 static void gfx_v8_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
7240                                                  u32 bitmap)
7241 {
7242         u32 data;
7243
7244         if (!bitmap)
7245                 return;
7246
7247         data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
7248         data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
7249
7250         WREG32(mmGC_USER_SHADER_ARRAY_CONFIG, data);
7251 }
7252
7253 static u32 gfx_v8_0_get_cu_active_bitmap(struct amdgpu_device *adev)
7254 {
7255         u32 data, mask;
7256
7257         data =  RREG32(mmCC_GC_SHADER_ARRAY_CONFIG) |
7258                 RREG32(mmGC_USER_SHADER_ARRAY_CONFIG);
7259
7260         mask = gfx_v8_0_create_bitmask(adev->gfx.config.max_cu_per_sh);
7261
7262         return ~REG_GET_FIELD(data, CC_GC_SHADER_ARRAY_CONFIG, INACTIVE_CUS) & mask;
7263 }
7264
7265 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev)
7266 {
7267         int i, j, k, counter, active_cu_number = 0;
7268         u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
7269         struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
7270         unsigned disable_masks[4 * 2];
7271
7272         memset(cu_info, 0, sizeof(*cu_info));
7273
7274         amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2);
7275
7276         mutex_lock(&adev->grbm_idx_mutex);
7277         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
7278                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
7279                         mask = 1;
7280                         ao_bitmap = 0;
7281                         counter = 0;
7282                         gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
7283                         if (i < 4 && j < 2)
7284                                 gfx_v8_0_set_user_cu_inactive_bitmap(
7285                                         adev, disable_masks[i * 2 + j]);
7286                         bitmap = gfx_v8_0_get_cu_active_bitmap(adev);
7287                         cu_info->bitmap[i][j] = bitmap;
7288
7289                         for (k = 0; k < 16; k ++) {
7290                                 if (bitmap & mask) {
7291                                         if (counter < 2)
7292                                                 ao_bitmap |= mask;
7293                                         counter ++;
7294                                 }
7295                                 mask <<= 1;
7296                         }
7297                         active_cu_number += counter;
7298                         ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
7299                 }
7300         }
7301         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
7302         mutex_unlock(&adev->grbm_idx_mutex);
7303
7304         cu_info->number = active_cu_number;
7305         cu_info->ao_cu_mask = ao_cu_mask;
7306 }
7307
7308 const struct amdgpu_ip_block_version gfx_v8_0_ip_block =
7309 {
7310         .type = AMD_IP_BLOCK_TYPE_GFX,
7311         .major = 8,
7312         .minor = 0,
7313         .rev = 0,
7314         .funcs = &gfx_v8_0_ip_funcs,
7315 };
7316
7317 const struct amdgpu_ip_block_version gfx_v8_1_ip_block =
7318 {
7319         .type = AMD_IP_BLOCK_TYPE_GFX,
7320         .major = 8,
7321         .minor = 1,
7322         .rev = 0,
7323         .funcs = &gfx_v8_0_ip_funcs,
7324 };
7325
7326 static void gfx_v8_0_ring_emit_ce_meta_init(struct amdgpu_ring *ring, uint64_t csa_addr)
7327 {
7328         uint64_t ce_payload_addr;
7329         int cnt_ce;
7330         static union {
7331                 struct vi_ce_ib_state regular;
7332                 struct vi_ce_ib_state_chained_ib chained;
7333         } ce_payload = {};
7334
7335         if (ring->adev->virt.chained_ib_support) {
7336                 ce_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data_chained_ib, ce_payload);
7337                 cnt_ce = (sizeof(ce_payload.chained) >> 2) + 4 - 2;
7338         } else {
7339                 ce_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data, ce_payload);
7340                 cnt_ce = (sizeof(ce_payload.regular) >> 2) + 4 - 2;
7341         }
7342
7343         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_ce));
7344         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
7345                                 WRITE_DATA_DST_SEL(8) |
7346                                 WR_CONFIRM) |
7347                                 WRITE_DATA_CACHE_POLICY(0));
7348         amdgpu_ring_write(ring, lower_32_bits(ce_payload_addr));
7349         amdgpu_ring_write(ring, upper_32_bits(ce_payload_addr));
7350         amdgpu_ring_write_multiple(ring, (void *)&ce_payload, cnt_ce - 2);
7351 }
7352
7353 static void gfx_v8_0_ring_emit_de_meta_init(struct amdgpu_ring *ring, uint64_t csa_addr)
7354 {
7355         uint64_t de_payload_addr, gds_addr;
7356         int cnt_de;
7357         static union {
7358                 struct vi_de_ib_state regular;
7359                 struct vi_de_ib_state_chained_ib chained;
7360         } de_payload = {};
7361
7362         gds_addr = csa_addr + 4096;
7363         if (ring->adev->virt.chained_ib_support) {
7364                 de_payload.chained.gds_backup_addrlo = lower_32_bits(gds_addr);
7365                 de_payload.chained.gds_backup_addrhi = upper_32_bits(gds_addr);
7366                 de_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data_chained_ib, de_payload);
7367                 cnt_de = (sizeof(de_payload.chained) >> 2) + 4 - 2;
7368         } else {
7369                 de_payload.regular.gds_backup_addrlo = lower_32_bits(gds_addr);
7370                 de_payload.regular.gds_backup_addrhi = upper_32_bits(gds_addr);
7371                 de_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data, de_payload);
7372                 cnt_de = (sizeof(de_payload.regular) >> 2) + 4 - 2;
7373         }
7374
7375         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_de));
7376         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
7377                                 WRITE_DATA_DST_SEL(8) |
7378                                 WR_CONFIRM) |
7379                                 WRITE_DATA_CACHE_POLICY(0));
7380         amdgpu_ring_write(ring, lower_32_bits(de_payload_addr));
7381         amdgpu_ring_write(ring, upper_32_bits(de_payload_addr));
7382         amdgpu_ring_write_multiple(ring, (void *)&de_payload, cnt_de - 2);
7383 }
7384
7385 /* create MQD for each compute queue */
7386 static int gfx_v8_0_compute_mqd_sw_init(struct amdgpu_device *adev)
7387 {
7388         struct amdgpu_ring *ring = NULL;
7389         int r, i;
7390
7391         /* create MQD for KIQ */
7392         ring = &adev->gfx.kiq.ring;
7393         if (!ring->mqd_obj) {
7394                 r = amdgpu_bo_create_kernel(adev, sizeof(struct vi_mqd), PAGE_SIZE,
7395                                             AMDGPU_GEM_DOMAIN_GTT, &ring->mqd_obj,
7396                                             &ring->mqd_gpu_addr, &ring->mqd_ptr);
7397                 if (r) {
7398                         dev_warn(adev->dev, "failed to create ring mqd ob (%d)", r);
7399                         return r;
7400                 }
7401
7402                 /* prepare MQD backup */
7403                 adev->gfx.mec.mqd_backup[AMDGPU_MAX_COMPUTE_RINGS] = kmalloc(sizeof(struct vi_mqd), GFP_KERNEL);
7404                 if (!adev->gfx.mec.mqd_backup[AMDGPU_MAX_COMPUTE_RINGS])
7405                                 dev_warn(adev->dev, "no memory to create MQD backup for ring %s\n", ring->name);
7406         }
7407
7408         /* create MQD for each KCQ */
7409         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
7410                 ring = &adev->gfx.compute_ring[i];
7411                 if (!ring->mqd_obj) {
7412                         r = amdgpu_bo_create_kernel(adev, sizeof(struct vi_mqd), PAGE_SIZE,
7413                                                     AMDGPU_GEM_DOMAIN_GTT, &ring->mqd_obj,
7414                                                     &ring->mqd_gpu_addr, &ring->mqd_ptr);
7415                         if (r) {
7416                                 dev_warn(adev->dev, "failed to create ring mqd ob (%d)", r);
7417                                 return r;
7418                         }
7419
7420                         /* prepare MQD backup */
7421                         adev->gfx.mec.mqd_backup[i] = kmalloc(sizeof(struct vi_mqd), GFP_KERNEL);
7422                         if (!adev->gfx.mec.mqd_backup[i])
7423                                 dev_warn(adev->dev, "no memory to create MQD backup for ring %s\n", ring->name);
7424                 }
7425         }
7426
7427         return 0;
7428 }
7429
7430 static void gfx_v8_0_compute_mqd_sw_fini(struct amdgpu_device *adev)
7431 {
7432         struct amdgpu_ring *ring = NULL;
7433         int i;
7434
7435         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
7436                 ring = &adev->gfx.compute_ring[i];
7437                 kfree(adev->gfx.mec.mqd_backup[i]);
7438                 amdgpu_bo_free_kernel(&ring->mqd_obj,
7439                                       &ring->mqd_gpu_addr,
7440                                       &ring->mqd_ptr);
7441         }
7442
7443         ring = &adev->gfx.kiq.ring;
7444         kfree(adev->gfx.mec.mqd_backup[AMDGPU_MAX_COMPUTE_RINGS]);
7445         amdgpu_bo_free_kernel(&ring->mqd_obj,
7446                               &ring->mqd_gpu_addr,
7447                               &ring->mqd_ptr);
7448 }
This page took 0.489893 seconds and 4 git commands to generate.