]> Git Repo - linux.git/blob - drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
Merge tag 'xtensa-20190715' of git://github.com/jcmvbkbc/linux-xtensa
[linux.git] / drivers / gpu / drm / amd / amdgpu / gfx_v9_0.c
1 /*
2  * Copyright 2016 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23
24 #include <linux/delay.h>
25 #include <linux/kernel.h>
26 #include <linux/firmware.h>
27 #include <linux/module.h>
28 #include <linux/pci.h>
29
30 #include "amdgpu.h"
31 #include "amdgpu_gfx.h"
32 #include "soc15.h"
33 #include "soc15d.h"
34 #include "amdgpu_atomfirmware.h"
35 #include "amdgpu_pm.h"
36
37 #include "gc/gc_9_0_offset.h"
38 #include "gc/gc_9_0_sh_mask.h"
39 #include "vega10_enum.h"
40 #include "hdp/hdp_4_0_offset.h"
41
42 #include "soc15.h"
43 #include "soc15_common.h"
44 #include "clearstate_gfx9.h"
45 #include "v9_structs.h"
46
47 #include "ivsrcid/gfx/irqsrcs_gfx_9_0.h"
48
49 #include "amdgpu_ras.h"
50
51 #define GFX9_NUM_GFX_RINGS     1
52 #define GFX9_MEC_HPD_SIZE 4096
53 #define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L
54 #define RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET 0x00000000L
55
56 #define mmPWR_MISC_CNTL_STATUS                                  0x0183
57 #define mmPWR_MISC_CNTL_STATUS_BASE_IDX                         0
58 #define PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN__SHIFT        0x0
59 #define PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT          0x1
60 #define PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK          0x00000001L
61 #define PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK            0x00000006L
62
63 MODULE_FIRMWARE("amdgpu/vega10_ce.bin");
64 MODULE_FIRMWARE("amdgpu/vega10_pfp.bin");
65 MODULE_FIRMWARE("amdgpu/vega10_me.bin");
66 MODULE_FIRMWARE("amdgpu/vega10_mec.bin");
67 MODULE_FIRMWARE("amdgpu/vega10_mec2.bin");
68 MODULE_FIRMWARE("amdgpu/vega10_rlc.bin");
69
70 MODULE_FIRMWARE("amdgpu/vega12_ce.bin");
71 MODULE_FIRMWARE("amdgpu/vega12_pfp.bin");
72 MODULE_FIRMWARE("amdgpu/vega12_me.bin");
73 MODULE_FIRMWARE("amdgpu/vega12_mec.bin");
74 MODULE_FIRMWARE("amdgpu/vega12_mec2.bin");
75 MODULE_FIRMWARE("amdgpu/vega12_rlc.bin");
76
77 MODULE_FIRMWARE("amdgpu/vega20_ce.bin");
78 MODULE_FIRMWARE("amdgpu/vega20_pfp.bin");
79 MODULE_FIRMWARE("amdgpu/vega20_me.bin");
80 MODULE_FIRMWARE("amdgpu/vega20_mec.bin");
81 MODULE_FIRMWARE("amdgpu/vega20_mec2.bin");
82 MODULE_FIRMWARE("amdgpu/vega20_rlc.bin");
83
84 MODULE_FIRMWARE("amdgpu/raven_ce.bin");
85 MODULE_FIRMWARE("amdgpu/raven_pfp.bin");
86 MODULE_FIRMWARE("amdgpu/raven_me.bin");
87 MODULE_FIRMWARE("amdgpu/raven_mec.bin");
88 MODULE_FIRMWARE("amdgpu/raven_mec2.bin");
89 MODULE_FIRMWARE("amdgpu/raven_rlc.bin");
90
91 MODULE_FIRMWARE("amdgpu/picasso_ce.bin");
92 MODULE_FIRMWARE("amdgpu/picasso_pfp.bin");
93 MODULE_FIRMWARE("amdgpu/picasso_me.bin");
94 MODULE_FIRMWARE("amdgpu/picasso_mec.bin");
95 MODULE_FIRMWARE("amdgpu/picasso_mec2.bin");
96 MODULE_FIRMWARE("amdgpu/picasso_rlc.bin");
97 MODULE_FIRMWARE("amdgpu/picasso_rlc_am4.bin");
98
99 MODULE_FIRMWARE("amdgpu/raven2_ce.bin");
100 MODULE_FIRMWARE("amdgpu/raven2_pfp.bin");
101 MODULE_FIRMWARE("amdgpu/raven2_me.bin");
102 MODULE_FIRMWARE("amdgpu/raven2_mec.bin");
103 MODULE_FIRMWARE("amdgpu/raven2_mec2.bin");
104 MODULE_FIRMWARE("amdgpu/raven2_rlc.bin");
105 MODULE_FIRMWARE("amdgpu/raven_kicker_rlc.bin");
106
107 static const struct soc15_reg_golden golden_settings_gc_9_0[] =
108 {
109         SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000400),
110         SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0x80000000, 0x80000000),
111         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
112         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
113         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
114         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
115         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000),
116         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800),
117         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800),
118         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x0000ff87),
119         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x0000ff8f),
120         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000),
121         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
122         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68),
123         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197),
124         SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
125         SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff),
126         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000000, 0x00000800),
127         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000000, 0x00000800),
128         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00000000, 0x00008000)
129 };
130
131 static const struct soc15_reg_golden golden_settings_gc_9_0_vg10[] =
132 {
133         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0x0000f000, 0x00012107),
134         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
135         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080),
136         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080),
137         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080),
138         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042),
139         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x2a114042),
140         SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080),
141         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0x00008000, 0x00048000),
142         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080),
143         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080),
144         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080),
145         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080),
146         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080),
147         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000),
148         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x01000107),
149         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x00001800, 0x00000800),
150         SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080)
151 };
152
153 static const struct soc15_reg_golden golden_settings_gc_9_0_vg20[] =
154 {
155         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x0f000080, 0x04000080),
156         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000),
157         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
158         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xf3e777ff, 0x22014042),
159         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xf3e777ff, 0x22014042),
160         SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0x00003e00, 0x00000400),
161         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xff840000, 0x04040000),
162         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00030000),
163         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff010f, 0x01000107),
164         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0x000b0000, 0x000b0000),
165         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01000000, 0x01000000)
166 };
167
168 static const struct soc15_reg_golden golden_settings_gc_9_1[] =
169 {
170         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
171         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080),
172         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080),
173         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080),
174         SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420),
175         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
176         SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080),
177         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
178         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
179         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
180         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080),
181         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080),
182         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080),
183         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080),
184         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080),
185         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
186         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
187         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003120),
188         SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
189         SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000000ff),
190         SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080),
191         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000000, 0x00000800),
192         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000000, 0x00000800),
193         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00000000, 0x00008000)
194 };
195
196 static const struct soc15_reg_golden golden_settings_gc_9_1_rv1[] =
197 {
198         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
199         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24000042),
200         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24000042),
201         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04048000),
202         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_MODE_CNTL_1, 0x06000000, 0x06000000),
203         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000),
204         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x00000800)
205 };
206
207 static const struct soc15_reg_golden golden_settings_gc_9_1_rv2[] =
208 {
209         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0xff7fffff, 0x04000000),
210         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
211         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0xff7fffff, 0x0a000000),
212         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x7f0fffff, 0x08000080),
213         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0xff8fffff, 0x08000080),
214         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x7f8fffff, 0x08000080),
215         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x26013041),
216         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x26013041),
217         SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x3f8fffff, 0x08000080),
218         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
219         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0xff0fffff, 0x08000080),
220         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0xff0fffff, 0x08000080),
221         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0xff0fffff, 0x08000080),
222         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0xff0fffff, 0x08000080),
223         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0xff0fffff, 0x08000080),
224         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
225         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010),
226         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000),
227         SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x3f8fffff, 0x08000080),
228 };
229
230 static const struct soc15_reg_golden golden_settings_gc_9_x_common[] =
231 {
232         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_SD_CNTL, 0xffffffff, 0x000001ff),
233         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_INDEX, 0xffffffff, 0x00000000),
234         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_DATA, 0xffffffff, 0x2544c382)
235 };
236
237 static const struct soc15_reg_golden golden_settings_gc_9_2_1[] =
238 {
239         SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420),
240         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
241         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
242         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
243         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
244         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000),
245         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800),
246         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800),
247         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x0000ff87),
248         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x0000ff8f),
249         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000),
250         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
251         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68),
252         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197),
253         SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
254         SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff)
255 };
256
257 static const struct soc15_reg_golden golden_settings_gc_9_2_1_vg12[] =
258 {
259         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x00000080, 0x04000080),
260         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
261         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000),
262         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24104041),
263         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24104041),
264         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
265         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff03ff, 0x01000107),
266         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
267         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x76325410),
268         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000),
269         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000000, 0x00000800),
270         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000000, 0x00000800),
271         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00000000, 0x00008000)
272 };
273
274 static const u32 GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[] =
275 {
276         mmRLC_SRM_INDEX_CNTL_ADDR_0 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
277         mmRLC_SRM_INDEX_CNTL_ADDR_1 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
278         mmRLC_SRM_INDEX_CNTL_ADDR_2 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
279         mmRLC_SRM_INDEX_CNTL_ADDR_3 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
280         mmRLC_SRM_INDEX_CNTL_ADDR_4 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
281         mmRLC_SRM_INDEX_CNTL_ADDR_5 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
282         mmRLC_SRM_INDEX_CNTL_ADDR_6 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
283         mmRLC_SRM_INDEX_CNTL_ADDR_7 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
284 };
285
286 static const u32 GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[] =
287 {
288         mmRLC_SRM_INDEX_CNTL_DATA_0 - mmRLC_SRM_INDEX_CNTL_DATA_0,
289         mmRLC_SRM_INDEX_CNTL_DATA_1 - mmRLC_SRM_INDEX_CNTL_DATA_0,
290         mmRLC_SRM_INDEX_CNTL_DATA_2 - mmRLC_SRM_INDEX_CNTL_DATA_0,
291         mmRLC_SRM_INDEX_CNTL_DATA_3 - mmRLC_SRM_INDEX_CNTL_DATA_0,
292         mmRLC_SRM_INDEX_CNTL_DATA_4 - mmRLC_SRM_INDEX_CNTL_DATA_0,
293         mmRLC_SRM_INDEX_CNTL_DATA_5 - mmRLC_SRM_INDEX_CNTL_DATA_0,
294         mmRLC_SRM_INDEX_CNTL_DATA_6 - mmRLC_SRM_INDEX_CNTL_DATA_0,
295         mmRLC_SRM_INDEX_CNTL_DATA_7 - mmRLC_SRM_INDEX_CNTL_DATA_0,
296 };
297
298 #define VEGA10_GB_ADDR_CONFIG_GOLDEN 0x2a114042
299 #define VEGA12_GB_ADDR_CONFIG_GOLDEN 0x24104041
300 #define RAVEN_GB_ADDR_CONFIG_GOLDEN 0x24000042
301 #define RAVEN2_GB_ADDR_CONFIG_GOLDEN 0x26013041
302
303 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev);
304 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev);
305 static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev);
306 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev);
307 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
308                                  struct amdgpu_cu_info *cu_info);
309 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev);
310 static void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num, u32 instance);
311 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring);
312 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring);
313
314 static void gfx_v9_0_init_golden_registers(struct amdgpu_device *adev)
315 {
316         switch (adev->asic_type) {
317         case CHIP_VEGA10:
318                 if (!amdgpu_virt_support_skip_setting(adev)) {
319                         soc15_program_register_sequence(adev,
320                                                          golden_settings_gc_9_0,
321                                                          ARRAY_SIZE(golden_settings_gc_9_0));
322                         soc15_program_register_sequence(adev,
323                                                          golden_settings_gc_9_0_vg10,
324                                                          ARRAY_SIZE(golden_settings_gc_9_0_vg10));
325                 }
326                 break;
327         case CHIP_VEGA12:
328                 soc15_program_register_sequence(adev,
329                                                 golden_settings_gc_9_2_1,
330                                                 ARRAY_SIZE(golden_settings_gc_9_2_1));
331                 soc15_program_register_sequence(adev,
332                                                 golden_settings_gc_9_2_1_vg12,
333                                                 ARRAY_SIZE(golden_settings_gc_9_2_1_vg12));
334                 break;
335         case CHIP_VEGA20:
336                 soc15_program_register_sequence(adev,
337                                                 golden_settings_gc_9_0,
338                                                 ARRAY_SIZE(golden_settings_gc_9_0));
339                 soc15_program_register_sequence(adev,
340                                                 golden_settings_gc_9_0_vg20,
341                                                 ARRAY_SIZE(golden_settings_gc_9_0_vg20));
342                 break;
343         case CHIP_RAVEN:
344                 soc15_program_register_sequence(adev, golden_settings_gc_9_1,
345                                                 ARRAY_SIZE(golden_settings_gc_9_1));
346                 if (adev->rev_id >= 8)
347                         soc15_program_register_sequence(adev,
348                                                         golden_settings_gc_9_1_rv2,
349                                                         ARRAY_SIZE(golden_settings_gc_9_1_rv2));
350                 else
351                         soc15_program_register_sequence(adev,
352                                                         golden_settings_gc_9_1_rv1,
353                                                         ARRAY_SIZE(golden_settings_gc_9_1_rv1));
354                 break;
355         default:
356                 break;
357         }
358
359         soc15_program_register_sequence(adev, golden_settings_gc_9_x_common,
360                                         (const u32)ARRAY_SIZE(golden_settings_gc_9_x_common));
361 }
362
363 static void gfx_v9_0_scratch_init(struct amdgpu_device *adev)
364 {
365         adev->gfx.scratch.num_reg = 8;
366         adev->gfx.scratch.reg_base = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0);
367         adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1;
368 }
369
370 static void gfx_v9_0_write_data_to_reg(struct amdgpu_ring *ring, int eng_sel,
371                                        bool wc, uint32_t reg, uint32_t val)
372 {
373         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
374         amdgpu_ring_write(ring, WRITE_DATA_ENGINE_SEL(eng_sel) |
375                                 WRITE_DATA_DST_SEL(0) |
376                                 (wc ? WR_CONFIRM : 0));
377         amdgpu_ring_write(ring, reg);
378         amdgpu_ring_write(ring, 0);
379         amdgpu_ring_write(ring, val);
380 }
381
382 static void gfx_v9_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel,
383                                   int mem_space, int opt, uint32_t addr0,
384                                   uint32_t addr1, uint32_t ref, uint32_t mask,
385                                   uint32_t inv)
386 {
387         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
388         amdgpu_ring_write(ring,
389                                  /* memory (1) or register (0) */
390                                  (WAIT_REG_MEM_MEM_SPACE(mem_space) |
391                                  WAIT_REG_MEM_OPERATION(opt) | /* wait */
392                                  WAIT_REG_MEM_FUNCTION(3) |  /* equal */
393                                  WAIT_REG_MEM_ENGINE(eng_sel)));
394
395         if (mem_space)
396                 BUG_ON(addr0 & 0x3); /* Dword align */
397         amdgpu_ring_write(ring, addr0);
398         amdgpu_ring_write(ring, addr1);
399         amdgpu_ring_write(ring, ref);
400         amdgpu_ring_write(ring, mask);
401         amdgpu_ring_write(ring, inv); /* poll interval */
402 }
403
404 static int gfx_v9_0_ring_test_ring(struct amdgpu_ring *ring)
405 {
406         struct amdgpu_device *adev = ring->adev;
407         uint32_t scratch;
408         uint32_t tmp = 0;
409         unsigned i;
410         int r;
411
412         r = amdgpu_gfx_scratch_get(adev, &scratch);
413         if (r)
414                 return r;
415
416         WREG32(scratch, 0xCAFEDEAD);
417         r = amdgpu_ring_alloc(ring, 3);
418         if (r)
419                 goto error_free_scratch;
420
421         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
422         amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
423         amdgpu_ring_write(ring, 0xDEADBEEF);
424         amdgpu_ring_commit(ring);
425
426         for (i = 0; i < adev->usec_timeout; i++) {
427                 tmp = RREG32(scratch);
428                 if (tmp == 0xDEADBEEF)
429                         break;
430                 udelay(1);
431         }
432
433         if (i >= adev->usec_timeout)
434                 r = -ETIMEDOUT;
435
436 error_free_scratch:
437         amdgpu_gfx_scratch_free(adev, scratch);
438         return r;
439 }
440
441 static int gfx_v9_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
442 {
443         struct amdgpu_device *adev = ring->adev;
444         struct amdgpu_ib ib;
445         struct dma_fence *f = NULL;
446
447         unsigned index;
448         uint64_t gpu_addr;
449         uint32_t tmp;
450         long r;
451
452         r = amdgpu_device_wb_get(adev, &index);
453         if (r)
454                 return r;
455
456         gpu_addr = adev->wb.gpu_addr + (index * 4);
457         adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
458         memset(&ib, 0, sizeof(ib));
459         r = amdgpu_ib_get(adev, NULL, 16, &ib);
460         if (r)
461                 goto err1;
462
463         ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3);
464         ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
465         ib.ptr[2] = lower_32_bits(gpu_addr);
466         ib.ptr[3] = upper_32_bits(gpu_addr);
467         ib.ptr[4] = 0xDEADBEEF;
468         ib.length_dw = 5;
469
470         r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
471         if (r)
472                 goto err2;
473
474         r = dma_fence_wait_timeout(f, false, timeout);
475         if (r == 0) {
476                 r = -ETIMEDOUT;
477                 goto err2;
478         } else if (r < 0) {
479                 goto err2;
480         }
481
482         tmp = adev->wb.wb[index];
483         if (tmp == 0xDEADBEEF)
484                 r = 0;
485         else
486                 r = -EINVAL;
487
488 err2:
489         amdgpu_ib_free(adev, &ib, NULL);
490         dma_fence_put(f);
491 err1:
492         amdgpu_device_wb_free(adev, index);
493         return r;
494 }
495
496
497 static void gfx_v9_0_free_microcode(struct amdgpu_device *adev)
498 {
499         release_firmware(adev->gfx.pfp_fw);
500         adev->gfx.pfp_fw = NULL;
501         release_firmware(adev->gfx.me_fw);
502         adev->gfx.me_fw = NULL;
503         release_firmware(adev->gfx.ce_fw);
504         adev->gfx.ce_fw = NULL;
505         release_firmware(adev->gfx.rlc_fw);
506         adev->gfx.rlc_fw = NULL;
507         release_firmware(adev->gfx.mec_fw);
508         adev->gfx.mec_fw = NULL;
509         release_firmware(adev->gfx.mec2_fw);
510         adev->gfx.mec2_fw = NULL;
511
512         kfree(adev->gfx.rlc.register_list_format);
513 }
514
515 static void gfx_v9_0_init_rlc_ext_microcode(struct amdgpu_device *adev)
516 {
517         const struct rlc_firmware_header_v2_1 *rlc_hdr;
518
519         rlc_hdr = (const struct rlc_firmware_header_v2_1 *)adev->gfx.rlc_fw->data;
520         adev->gfx.rlc_srlc_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_ucode_ver);
521         adev->gfx.rlc_srlc_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_feature_ver);
522         adev->gfx.rlc.save_restore_list_cntl_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_cntl_size_bytes);
523         adev->gfx.rlc.save_restore_list_cntl = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_cntl_offset_bytes);
524         adev->gfx.rlc_srlg_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_ucode_ver);
525         adev->gfx.rlc_srlg_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_feature_ver);
526         adev->gfx.rlc.save_restore_list_gpm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_gpm_size_bytes);
527         adev->gfx.rlc.save_restore_list_gpm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_gpm_offset_bytes);
528         adev->gfx.rlc_srls_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_ucode_ver);
529         adev->gfx.rlc_srls_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_feature_ver);
530         adev->gfx.rlc.save_restore_list_srm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_srm_size_bytes);
531         adev->gfx.rlc.save_restore_list_srm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_srm_offset_bytes);
532         adev->gfx.rlc.reg_list_format_direct_reg_list_length =
533                         le32_to_cpu(rlc_hdr->reg_list_format_direct_reg_list_length);
534 }
535
536 static void gfx_v9_0_check_fw_write_wait(struct amdgpu_device *adev)
537 {
538         adev->gfx.me_fw_write_wait = false;
539         adev->gfx.mec_fw_write_wait = false;
540
541         switch (adev->asic_type) {
542         case CHIP_VEGA10:
543                 if ((adev->gfx.me_fw_version >= 0x0000009c) &&
544                     (adev->gfx.me_feature_version >= 42) &&
545                     (adev->gfx.pfp_fw_version >=  0x000000b1) &&
546                     (adev->gfx.pfp_feature_version >= 42))
547                         adev->gfx.me_fw_write_wait = true;
548
549                 if ((adev->gfx.mec_fw_version >=  0x00000193) &&
550                     (adev->gfx.mec_feature_version >= 42))
551                         adev->gfx.mec_fw_write_wait = true;
552                 break;
553         case CHIP_VEGA12:
554                 if ((adev->gfx.me_fw_version >= 0x0000009c) &&
555                     (adev->gfx.me_feature_version >= 44) &&
556                     (adev->gfx.pfp_fw_version >=  0x000000b2) &&
557                     (adev->gfx.pfp_feature_version >= 44))
558                         adev->gfx.me_fw_write_wait = true;
559
560                 if ((adev->gfx.mec_fw_version >=  0x00000196) &&
561                     (adev->gfx.mec_feature_version >= 44))
562                         adev->gfx.mec_fw_write_wait = true;
563                 break;
564         case CHIP_VEGA20:
565                 if ((adev->gfx.me_fw_version >= 0x0000009c) &&
566                     (adev->gfx.me_feature_version >= 44) &&
567                     (adev->gfx.pfp_fw_version >=  0x000000b2) &&
568                     (adev->gfx.pfp_feature_version >= 44))
569                         adev->gfx.me_fw_write_wait = true;
570
571                 if ((adev->gfx.mec_fw_version >=  0x00000197) &&
572                     (adev->gfx.mec_feature_version >= 44))
573                         adev->gfx.mec_fw_write_wait = true;
574                 break;
575         case CHIP_RAVEN:
576                 if ((adev->gfx.me_fw_version >= 0x0000009c) &&
577                     (adev->gfx.me_feature_version >= 42) &&
578                     (adev->gfx.pfp_fw_version >=  0x000000b1) &&
579                     (adev->gfx.pfp_feature_version >= 42))
580                         adev->gfx.me_fw_write_wait = true;
581
582                 if ((adev->gfx.mec_fw_version >=  0x00000192) &&
583                     (adev->gfx.mec_feature_version >= 42))
584                         adev->gfx.mec_fw_write_wait = true;
585                 break;
586         default:
587                 break;
588         }
589 }
590
591 static void gfx_v9_0_check_if_need_gfxoff(struct amdgpu_device *adev)
592 {
593         switch (adev->asic_type) {
594         case CHIP_VEGA10:
595         case CHIP_VEGA12:
596         case CHIP_VEGA20:
597                 break;
598         case CHIP_RAVEN:
599                 if (adev->rev_id >= 0x8 || adev->pdev->device == 0x15d8)
600                         break;
601                 if ((adev->gfx.rlc_fw_version != 106 &&
602                      adev->gfx.rlc_fw_version < 531) ||
603                     (adev->gfx.rlc_fw_version == 53815) ||
604                     (adev->gfx.rlc_feature_version < 1) ||
605                     !adev->gfx.rlc.is_rlc_v2_1)
606                         adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
607                 break;
608         default:
609                 break;
610         }
611 }
612
613 static int gfx_v9_0_init_microcode(struct amdgpu_device *adev)
614 {
615         const char *chip_name;
616         char fw_name[30];
617         int err;
618         struct amdgpu_firmware_info *info = NULL;
619         const struct common_firmware_header *header = NULL;
620         const struct gfx_firmware_header_v1_0 *cp_hdr;
621         const struct rlc_firmware_header_v2_0 *rlc_hdr;
622         unsigned int *tmp = NULL;
623         unsigned int i = 0;
624         uint16_t version_major;
625         uint16_t version_minor;
626         uint32_t smu_version;
627
628         DRM_DEBUG("\n");
629
630         switch (adev->asic_type) {
631         case CHIP_VEGA10:
632                 chip_name = "vega10";
633                 break;
634         case CHIP_VEGA12:
635                 chip_name = "vega12";
636                 break;
637         case CHIP_VEGA20:
638                 chip_name = "vega20";
639                 break;
640         case CHIP_RAVEN:
641                 if (adev->rev_id >= 8)
642                         chip_name = "raven2";
643                 else if (adev->pdev->device == 0x15d8)
644                         chip_name = "picasso";
645                 else
646                         chip_name = "raven";
647                 break;
648         default:
649                 BUG();
650         }
651
652         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
653         err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
654         if (err)
655                 goto out;
656         err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
657         if (err)
658                 goto out;
659         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
660         adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
661         adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
662
663         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
664         err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
665         if (err)
666                 goto out;
667         err = amdgpu_ucode_validate(adev->gfx.me_fw);
668         if (err)
669                 goto out;
670         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
671         adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
672         adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
673
674         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
675         err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
676         if (err)
677                 goto out;
678         err = amdgpu_ucode_validate(adev->gfx.ce_fw);
679         if (err)
680                 goto out;
681         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
682         adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
683         adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
684
685         /*
686          * For Picasso && AM4 SOCKET board, we use picasso_rlc_am4.bin
687          * instead of picasso_rlc.bin.
688          * Judgment method:
689          * PCO AM4: revision >= 0xC8 && revision <= 0xCF
690          *          or revision >= 0xD8 && revision <= 0xDF
691          * otherwise is PCO FP5
692          */
693         if (!strcmp(chip_name, "picasso") &&
694                 (((adev->pdev->revision >= 0xC8) && (adev->pdev->revision <= 0xCF)) ||
695                 ((adev->pdev->revision >= 0xD8) && (adev->pdev->revision <= 0xDF))))
696                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc_am4.bin", chip_name);
697         else if (!strcmp(chip_name, "raven") && (amdgpu_pm_load_smu_firmware(adev, &smu_version) == 0) &&
698                 (smu_version >= 0x41e2b))
699                 /**
700                 *SMC is loaded by SBIOS on APU and it's able to get the SMU version directly.
701                 */
702                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_kicker_rlc.bin", chip_name);
703         else
704                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
705         err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
706         if (err)
707                 goto out;
708         err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
709         rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
710
711         version_major = le16_to_cpu(rlc_hdr->header.header_version_major);
712         version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor);
713         if (version_major == 2 && version_minor == 1)
714                 adev->gfx.rlc.is_rlc_v2_1 = true;
715
716         adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
717         adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
718         adev->gfx.rlc.save_and_restore_offset =
719                         le32_to_cpu(rlc_hdr->save_and_restore_offset);
720         adev->gfx.rlc.clear_state_descriptor_offset =
721                         le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
722         adev->gfx.rlc.avail_scratch_ram_locations =
723                         le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
724         adev->gfx.rlc.reg_restore_list_size =
725                         le32_to_cpu(rlc_hdr->reg_restore_list_size);
726         adev->gfx.rlc.reg_list_format_start =
727                         le32_to_cpu(rlc_hdr->reg_list_format_start);
728         adev->gfx.rlc.reg_list_format_separate_start =
729                         le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
730         adev->gfx.rlc.starting_offsets_start =
731                         le32_to_cpu(rlc_hdr->starting_offsets_start);
732         adev->gfx.rlc.reg_list_format_size_bytes =
733                         le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
734         adev->gfx.rlc.reg_list_size_bytes =
735                         le32_to_cpu(rlc_hdr->reg_list_size_bytes);
736         adev->gfx.rlc.register_list_format =
737                         kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
738                                 adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
739         if (!adev->gfx.rlc.register_list_format) {
740                 err = -ENOMEM;
741                 goto out;
742         }
743
744         tmp = (unsigned int *)((uintptr_t)rlc_hdr +
745                         le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
746         for (i = 0 ; i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2); i++)
747                 adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]);
748
749         adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
750
751         tmp = (unsigned int *)((uintptr_t)rlc_hdr +
752                         le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
753         for (i = 0 ; i < (adev->gfx.rlc.reg_list_size_bytes >> 2); i++)
754                 adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
755
756         if (adev->gfx.rlc.is_rlc_v2_1)
757                 gfx_v9_0_init_rlc_ext_microcode(adev);
758
759         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
760         err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
761         if (err)
762                 goto out;
763         err = amdgpu_ucode_validate(adev->gfx.mec_fw);
764         if (err)
765                 goto out;
766         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
767         adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
768         adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
769
770
771         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
772         err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
773         if (!err) {
774                 err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
775                 if (err)
776                         goto out;
777                 cp_hdr = (const struct gfx_firmware_header_v1_0 *)
778                 adev->gfx.mec2_fw->data;
779                 adev->gfx.mec2_fw_version =
780                 le32_to_cpu(cp_hdr->header.ucode_version);
781                 adev->gfx.mec2_feature_version =
782                 le32_to_cpu(cp_hdr->ucode_feature_version);
783         } else {
784                 err = 0;
785                 adev->gfx.mec2_fw = NULL;
786         }
787
788         if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
789                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
790                 info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
791                 info->fw = adev->gfx.pfp_fw;
792                 header = (const struct common_firmware_header *)info->fw->data;
793                 adev->firmware.fw_size +=
794                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
795
796                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
797                 info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
798                 info->fw = adev->gfx.me_fw;
799                 header = (const struct common_firmware_header *)info->fw->data;
800                 adev->firmware.fw_size +=
801                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
802
803                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
804                 info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
805                 info->fw = adev->gfx.ce_fw;
806                 header = (const struct common_firmware_header *)info->fw->data;
807                 adev->firmware.fw_size +=
808                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
809
810                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
811                 info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
812                 info->fw = adev->gfx.rlc_fw;
813                 header = (const struct common_firmware_header *)info->fw->data;
814                 adev->firmware.fw_size +=
815                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
816
817                 if (adev->gfx.rlc.is_rlc_v2_1 &&
818                     adev->gfx.rlc.save_restore_list_cntl_size_bytes &&
819                     adev->gfx.rlc.save_restore_list_gpm_size_bytes &&
820                     adev->gfx.rlc.save_restore_list_srm_size_bytes) {
821                         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL];
822                         info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL;
823                         info->fw = adev->gfx.rlc_fw;
824                         adev->firmware.fw_size +=
825                                 ALIGN(adev->gfx.rlc.save_restore_list_cntl_size_bytes, PAGE_SIZE);
826
827                         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM];
828                         info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM;
829                         info->fw = adev->gfx.rlc_fw;
830                         adev->firmware.fw_size +=
831                                 ALIGN(adev->gfx.rlc.save_restore_list_gpm_size_bytes, PAGE_SIZE);
832
833                         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM];
834                         info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM;
835                         info->fw = adev->gfx.rlc_fw;
836                         adev->firmware.fw_size +=
837                                 ALIGN(adev->gfx.rlc.save_restore_list_srm_size_bytes, PAGE_SIZE);
838                 }
839
840                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
841                 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
842                 info->fw = adev->gfx.mec_fw;
843                 header = (const struct common_firmware_header *)info->fw->data;
844                 cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data;
845                 adev->firmware.fw_size +=
846                         ALIGN(le32_to_cpu(header->ucode_size_bytes) - le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
847
848                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1_JT];
849                 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1_JT;
850                 info->fw = adev->gfx.mec_fw;
851                 adev->firmware.fw_size +=
852                         ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
853
854                 if (adev->gfx.mec2_fw) {
855                         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
856                         info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
857                         info->fw = adev->gfx.mec2_fw;
858                         header = (const struct common_firmware_header *)info->fw->data;
859                         cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data;
860                         adev->firmware.fw_size +=
861                                 ALIGN(le32_to_cpu(header->ucode_size_bytes) - le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
862                         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2_JT];
863                         info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2_JT;
864                         info->fw = adev->gfx.mec2_fw;
865                         adev->firmware.fw_size +=
866                                 ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
867                 }
868
869         }
870
871 out:
872         gfx_v9_0_check_if_need_gfxoff(adev);
873         gfx_v9_0_check_fw_write_wait(adev);
874         if (err) {
875                 dev_err(adev->dev,
876                         "gfx9: Failed to load firmware \"%s\"\n",
877                         fw_name);
878                 release_firmware(adev->gfx.pfp_fw);
879                 adev->gfx.pfp_fw = NULL;
880                 release_firmware(adev->gfx.me_fw);
881                 adev->gfx.me_fw = NULL;
882                 release_firmware(adev->gfx.ce_fw);
883                 adev->gfx.ce_fw = NULL;
884                 release_firmware(adev->gfx.rlc_fw);
885                 adev->gfx.rlc_fw = NULL;
886                 release_firmware(adev->gfx.mec_fw);
887                 adev->gfx.mec_fw = NULL;
888                 release_firmware(adev->gfx.mec2_fw);
889                 adev->gfx.mec2_fw = NULL;
890         }
891         return err;
892 }
893
894 static u32 gfx_v9_0_get_csb_size(struct amdgpu_device *adev)
895 {
896         u32 count = 0;
897         const struct cs_section_def *sect = NULL;
898         const struct cs_extent_def *ext = NULL;
899
900         /* begin clear state */
901         count += 2;
902         /* context control state */
903         count += 3;
904
905         for (sect = gfx9_cs_data; sect->section != NULL; ++sect) {
906                 for (ext = sect->section; ext->extent != NULL; ++ext) {
907                         if (sect->id == SECT_CONTEXT)
908                                 count += 2 + ext->reg_count;
909                         else
910                                 return 0;
911                 }
912         }
913
914         /* end clear state */
915         count += 2;
916         /* clear state */
917         count += 2;
918
919         return count;
920 }
921
922 static void gfx_v9_0_get_csb_buffer(struct amdgpu_device *adev,
923                                     volatile u32 *buffer)
924 {
925         u32 count = 0, i;
926         const struct cs_section_def *sect = NULL;
927         const struct cs_extent_def *ext = NULL;
928
929         if (adev->gfx.rlc.cs_data == NULL)
930                 return;
931         if (buffer == NULL)
932                 return;
933
934         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
935         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
936
937         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
938         buffer[count++] = cpu_to_le32(0x80000000);
939         buffer[count++] = cpu_to_le32(0x80000000);
940
941         for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
942                 for (ext = sect->section; ext->extent != NULL; ++ext) {
943                         if (sect->id == SECT_CONTEXT) {
944                                 buffer[count++] =
945                                         cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
946                                 buffer[count++] = cpu_to_le32(ext->reg_index -
947                                                 PACKET3_SET_CONTEXT_REG_START);
948                                 for (i = 0; i < ext->reg_count; i++)
949                                         buffer[count++] = cpu_to_le32(ext->extent[i]);
950                         } else {
951                                 return;
952                         }
953                 }
954         }
955
956         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
957         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
958
959         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
960         buffer[count++] = cpu_to_le32(0);
961 }
962
963 static void gfx_v9_0_init_always_on_cu_mask(struct amdgpu_device *adev)
964 {
965         struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
966         uint32_t pg_always_on_cu_num = 2;
967         uint32_t always_on_cu_num;
968         uint32_t i, j, k;
969         uint32_t mask, cu_bitmap, counter;
970
971         if (adev->flags & AMD_IS_APU)
972                 always_on_cu_num = 4;
973         else if (adev->asic_type == CHIP_VEGA12)
974                 always_on_cu_num = 8;
975         else
976                 always_on_cu_num = 12;
977
978         mutex_lock(&adev->grbm_idx_mutex);
979         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
980                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
981                         mask = 1;
982                         cu_bitmap = 0;
983                         counter = 0;
984                         gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
985
986                         for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
987                                 if (cu_info->bitmap[i][j] & mask) {
988                                         if (counter == pg_always_on_cu_num)
989                                                 WREG32_SOC15(GC, 0, mmRLC_PG_ALWAYS_ON_CU_MASK, cu_bitmap);
990                                         if (counter < always_on_cu_num)
991                                                 cu_bitmap |= mask;
992                                         else
993                                                 break;
994                                         counter++;
995                                 }
996                                 mask <<= 1;
997                         }
998
999                         WREG32_SOC15(GC, 0, mmRLC_LB_ALWAYS_ACTIVE_CU_MASK, cu_bitmap);
1000                         cu_info->ao_cu_bitmap[i][j] = cu_bitmap;
1001                 }
1002         }
1003         gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1004         mutex_unlock(&adev->grbm_idx_mutex);
1005 }
1006
1007 static void gfx_v9_0_init_lbpw(struct amdgpu_device *adev)
1008 {
1009         uint32_t data;
1010
1011         /* set mmRLC_LB_THR_CONFIG_1/2/3/4 */
1012         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F);
1013         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x0333A5A7);
1014         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077);
1015         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x30 | 0x40 << 8 | 0x02FA << 16));
1016
1017         /* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */
1018         WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000);
1019
1020         /* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */
1021         WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000500);
1022
1023         mutex_lock(&adev->grbm_idx_mutex);
1024         /* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/
1025         gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1026         WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff);
1027
1028         /* set mmRLC_LB_PARAMS = 0x003F_1006 */
1029         data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003);
1030         data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010);
1031         data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F);
1032         WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data);
1033
1034         /* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */
1035         data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7);
1036         data &= 0x0000FFFF;
1037         data |= 0x00C00000;
1038         WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data);
1039
1040         /*
1041          * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xF (4 CUs AON for Raven),
1042          * programmed in gfx_v9_0_init_always_on_cu_mask()
1043          */
1044
1045         /* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved,
1046          * but used for RLC_LB_CNTL configuration */
1047         data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK;
1048         data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09);
1049         data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000);
1050         WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data);
1051         mutex_unlock(&adev->grbm_idx_mutex);
1052
1053         gfx_v9_0_init_always_on_cu_mask(adev);
1054 }
1055
1056 static void gfx_v9_4_init_lbpw(struct amdgpu_device *adev)
1057 {
1058         uint32_t data;
1059
1060         /* set mmRLC_LB_THR_CONFIG_1/2/3/4 */
1061         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F);
1062         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x033388F8);
1063         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077);
1064         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x10 | 0x27 << 8 | 0x02FA << 16));
1065
1066         /* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */
1067         WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000);
1068
1069         /* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */
1070         WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000800);
1071
1072         mutex_lock(&adev->grbm_idx_mutex);
1073         /* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/
1074         gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1075         WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff);
1076
1077         /* set mmRLC_LB_PARAMS = 0x003F_1006 */
1078         data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003);
1079         data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010);
1080         data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F);
1081         WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data);
1082
1083         /* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */
1084         data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7);
1085         data &= 0x0000FFFF;
1086         data |= 0x00C00000;
1087         WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data);
1088
1089         /*
1090          * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xFFF (12 CUs AON),
1091          * programmed in gfx_v9_0_init_always_on_cu_mask()
1092          */
1093
1094         /* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved,
1095          * but used for RLC_LB_CNTL configuration */
1096         data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK;
1097         data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09);
1098         data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000);
1099         WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data);
1100         mutex_unlock(&adev->grbm_idx_mutex);
1101
1102         gfx_v9_0_init_always_on_cu_mask(adev);
1103 }
1104
1105 static void gfx_v9_0_enable_lbpw(struct amdgpu_device *adev, bool enable)
1106 {
1107         WREG32_FIELD15(GC, 0, RLC_LB_CNTL, LOAD_BALANCE_ENABLE, enable ? 1 : 0);
1108 }
1109
1110 static int gfx_v9_0_cp_jump_table_num(struct amdgpu_device *adev)
1111 {
1112         return 5;
1113 }
1114
1115 static int gfx_v9_0_rlc_init(struct amdgpu_device *adev)
1116 {
1117         const struct cs_section_def *cs_data;
1118         int r;
1119
1120         adev->gfx.rlc.cs_data = gfx9_cs_data;
1121
1122         cs_data = adev->gfx.rlc.cs_data;
1123
1124         if (cs_data) {
1125                 /* init clear state block */
1126                 r = amdgpu_gfx_rlc_init_csb(adev);
1127                 if (r)
1128                         return r;
1129         }
1130
1131         if (adev->asic_type == CHIP_RAVEN) {
1132                 /* TODO: double check the cp_table_size for RV */
1133                 adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
1134                 r = amdgpu_gfx_rlc_init_cpt(adev);
1135                 if (r)
1136                         return r;
1137         }
1138
1139         switch (adev->asic_type) {
1140         case CHIP_RAVEN:
1141                 gfx_v9_0_init_lbpw(adev);
1142                 break;
1143         case CHIP_VEGA20:
1144                 gfx_v9_4_init_lbpw(adev);
1145                 break;
1146         default:
1147                 break;
1148         }
1149
1150         return 0;
1151 }
1152
1153 static int gfx_v9_0_csb_vram_pin(struct amdgpu_device *adev)
1154 {
1155         int r;
1156
1157         r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false);
1158         if (unlikely(r != 0))
1159                 return r;
1160
1161         r = amdgpu_bo_pin(adev->gfx.rlc.clear_state_obj,
1162                         AMDGPU_GEM_DOMAIN_VRAM);
1163         if (!r)
1164                 adev->gfx.rlc.clear_state_gpu_addr =
1165                         amdgpu_bo_gpu_offset(adev->gfx.rlc.clear_state_obj);
1166
1167         amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1168
1169         return r;
1170 }
1171
1172 static void gfx_v9_0_csb_vram_unpin(struct amdgpu_device *adev)
1173 {
1174         int r;
1175
1176         if (!adev->gfx.rlc.clear_state_obj)
1177                 return;
1178
1179         r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, true);
1180         if (likely(r == 0)) {
1181                 amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj);
1182                 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1183         }
1184 }
1185
1186 static void gfx_v9_0_mec_fini(struct amdgpu_device *adev)
1187 {
1188         amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
1189         amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_obj, NULL, NULL);
1190 }
1191
1192 static int gfx_v9_0_mec_init(struct amdgpu_device *adev)
1193 {
1194         int r;
1195         u32 *hpd;
1196         const __le32 *fw_data;
1197         unsigned fw_size;
1198         u32 *fw;
1199         size_t mec_hpd_size;
1200
1201         const struct gfx_firmware_header_v1_0 *mec_hdr;
1202
1203         bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
1204
1205         /* take ownership of the relevant compute queues */
1206         amdgpu_gfx_compute_queue_acquire(adev);
1207         mec_hpd_size = adev->gfx.num_compute_rings * GFX9_MEC_HPD_SIZE;
1208
1209         r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
1210                                       AMDGPU_GEM_DOMAIN_VRAM,
1211                                       &adev->gfx.mec.hpd_eop_obj,
1212                                       &adev->gfx.mec.hpd_eop_gpu_addr,
1213                                       (void **)&hpd);
1214         if (r) {
1215                 dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
1216                 gfx_v9_0_mec_fini(adev);
1217                 return r;
1218         }
1219
1220         memset(hpd, 0, adev->gfx.mec.hpd_eop_obj->tbo.mem.size);
1221
1222         amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
1223         amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1224
1225         mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1226
1227         fw_data = (const __le32 *)
1228                 (adev->gfx.mec_fw->data +
1229                  le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
1230         fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
1231
1232         r = amdgpu_bo_create_reserved(adev, mec_hdr->header.ucode_size_bytes,
1233                                       PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
1234                                       &adev->gfx.mec.mec_fw_obj,
1235                                       &adev->gfx.mec.mec_fw_gpu_addr,
1236                                       (void **)&fw);
1237         if (r) {
1238                 dev_warn(adev->dev, "(%d) create mec firmware bo failed\n", r);
1239                 gfx_v9_0_mec_fini(adev);
1240                 return r;
1241         }
1242
1243         memcpy(fw, fw_data, fw_size);
1244
1245         amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj);
1246         amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj);
1247
1248         return 0;
1249 }
1250
1251 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
1252 {
1253         WREG32_SOC15(GC, 0, mmSQ_IND_INDEX,
1254                 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
1255                 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
1256                 (address << SQ_IND_INDEX__INDEX__SHIFT) |
1257                 (SQ_IND_INDEX__FORCE_READ_MASK));
1258         return RREG32_SOC15(GC, 0, mmSQ_IND_DATA);
1259 }
1260
1261 static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
1262                            uint32_t wave, uint32_t thread,
1263                            uint32_t regno, uint32_t num, uint32_t *out)
1264 {
1265         WREG32_SOC15(GC, 0, mmSQ_IND_INDEX,
1266                 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
1267                 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
1268                 (regno << SQ_IND_INDEX__INDEX__SHIFT) |
1269                 (thread << SQ_IND_INDEX__THREAD_ID__SHIFT) |
1270                 (SQ_IND_INDEX__FORCE_READ_MASK) |
1271                 (SQ_IND_INDEX__AUTO_INCR_MASK));
1272         while (num--)
1273                 *(out++) = RREG32_SOC15(GC, 0, mmSQ_IND_DATA);
1274 }
1275
1276 static void gfx_v9_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
1277 {
1278         /* type 1 wave data */
1279         dst[(*no_fields)++] = 1;
1280         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS);
1281         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO);
1282         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI);
1283         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO);
1284         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI);
1285         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID);
1286         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0);
1287         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1);
1288         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC);
1289         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC);
1290         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS);
1291         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS);
1292         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0);
1293         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0);
1294 }
1295
1296 static void gfx_v9_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
1297                                      uint32_t wave, uint32_t start,
1298                                      uint32_t size, uint32_t *dst)
1299 {
1300         wave_read_regs(
1301                 adev, simd, wave, 0,
1302                 start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
1303 }
1304
1305 static void gfx_v9_0_read_wave_vgprs(struct amdgpu_device *adev, uint32_t simd,
1306                                      uint32_t wave, uint32_t thread,
1307                                      uint32_t start, uint32_t size,
1308                                      uint32_t *dst)
1309 {
1310         wave_read_regs(
1311                 adev, simd, wave, thread,
1312                 start + SQIND_WAVE_VGPRS_OFFSET, size, dst);
1313 }
1314
1315 static void gfx_v9_0_select_me_pipe_q(struct amdgpu_device *adev,
1316                                   u32 me, u32 pipe, u32 q)
1317 {
1318         soc15_grbm_select(adev, me, pipe, q, 0);
1319 }
1320
1321 static const struct amdgpu_gfx_funcs gfx_v9_0_gfx_funcs = {
1322         .get_gpu_clock_counter = &gfx_v9_0_get_gpu_clock_counter,
1323         .select_se_sh = &gfx_v9_0_select_se_sh,
1324         .read_wave_data = &gfx_v9_0_read_wave_data,
1325         .read_wave_sgprs = &gfx_v9_0_read_wave_sgprs,
1326         .read_wave_vgprs = &gfx_v9_0_read_wave_vgprs,
1327         .select_me_pipe_q = &gfx_v9_0_select_me_pipe_q
1328 };
1329
1330 static int gfx_v9_0_gpu_early_init(struct amdgpu_device *adev)
1331 {
1332         u32 gb_addr_config;
1333         int err;
1334
1335         adev->gfx.funcs = &gfx_v9_0_gfx_funcs;
1336
1337         switch (adev->asic_type) {
1338         case CHIP_VEGA10:
1339                 adev->gfx.config.max_hw_contexts = 8;
1340                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1341                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1342                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1343                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1344                 gb_addr_config = VEGA10_GB_ADDR_CONFIG_GOLDEN;
1345                 break;
1346         case CHIP_VEGA12:
1347                 adev->gfx.config.max_hw_contexts = 8;
1348                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1349                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1350                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1351                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1352                 gb_addr_config = VEGA12_GB_ADDR_CONFIG_GOLDEN;
1353                 DRM_INFO("fix gfx.config for vega12\n");
1354                 break;
1355         case CHIP_VEGA20:
1356                 adev->gfx.config.max_hw_contexts = 8;
1357                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1358                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1359                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1360                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1361                 gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
1362                 gb_addr_config &= ~0xf3e777ff;
1363                 gb_addr_config |= 0x22014042;
1364                 /* check vbios table if gpu info is not available */
1365                 err = amdgpu_atomfirmware_get_gfx_info(adev);
1366                 if (err)
1367                         return err;
1368                 break;
1369         case CHIP_RAVEN:
1370                 adev->gfx.config.max_hw_contexts = 8;
1371                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1372                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1373                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1374                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1375                 if (adev->rev_id >= 8)
1376                         gb_addr_config = RAVEN2_GB_ADDR_CONFIG_GOLDEN;
1377                 else
1378                         gb_addr_config = RAVEN_GB_ADDR_CONFIG_GOLDEN;
1379                 break;
1380         default:
1381                 BUG();
1382                 break;
1383         }
1384
1385         adev->gfx.config.gb_addr_config = gb_addr_config;
1386
1387         adev->gfx.config.gb_addr_config_fields.num_pipes = 1 <<
1388                         REG_GET_FIELD(
1389                                         adev->gfx.config.gb_addr_config,
1390                                         GB_ADDR_CONFIG,
1391                                         NUM_PIPES);
1392
1393         adev->gfx.config.max_tile_pipes =
1394                 adev->gfx.config.gb_addr_config_fields.num_pipes;
1395
1396         adev->gfx.config.gb_addr_config_fields.num_banks = 1 <<
1397                         REG_GET_FIELD(
1398                                         adev->gfx.config.gb_addr_config,
1399                                         GB_ADDR_CONFIG,
1400                                         NUM_BANKS);
1401         adev->gfx.config.gb_addr_config_fields.max_compress_frags = 1 <<
1402                         REG_GET_FIELD(
1403                                         adev->gfx.config.gb_addr_config,
1404                                         GB_ADDR_CONFIG,
1405                                         MAX_COMPRESSED_FRAGS);
1406         adev->gfx.config.gb_addr_config_fields.num_rb_per_se = 1 <<
1407                         REG_GET_FIELD(
1408                                         adev->gfx.config.gb_addr_config,
1409                                         GB_ADDR_CONFIG,
1410                                         NUM_RB_PER_SE);
1411         adev->gfx.config.gb_addr_config_fields.num_se = 1 <<
1412                         REG_GET_FIELD(
1413                                         adev->gfx.config.gb_addr_config,
1414                                         GB_ADDR_CONFIG,
1415                                         NUM_SHADER_ENGINES);
1416         adev->gfx.config.gb_addr_config_fields.pipe_interleave_size = 1 << (8 +
1417                         REG_GET_FIELD(
1418                                         adev->gfx.config.gb_addr_config,
1419                                         GB_ADDR_CONFIG,
1420                                         PIPE_INTERLEAVE_SIZE));
1421
1422         return 0;
1423 }
1424
1425 static int gfx_v9_0_ngg_create_buf(struct amdgpu_device *adev,
1426                                    struct amdgpu_ngg_buf *ngg_buf,
1427                                    int size_se,
1428                                    int default_size_se)
1429 {
1430         int r;
1431
1432         if (size_se < 0) {
1433                 dev_err(adev->dev, "Buffer size is invalid: %d\n", size_se);
1434                 return -EINVAL;
1435         }
1436         size_se = size_se ? size_se : default_size_se;
1437
1438         ngg_buf->size = size_se * adev->gfx.config.max_shader_engines;
1439         r = amdgpu_bo_create_kernel(adev, ngg_buf->size,
1440                                     PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM,
1441                                     &ngg_buf->bo,
1442                                     &ngg_buf->gpu_addr,
1443                                     NULL);
1444         if (r) {
1445                 dev_err(adev->dev, "(%d) failed to create NGG buffer\n", r);
1446                 return r;
1447         }
1448         ngg_buf->bo_size = amdgpu_bo_size(ngg_buf->bo);
1449
1450         return r;
1451 }
1452
1453 static int gfx_v9_0_ngg_fini(struct amdgpu_device *adev)
1454 {
1455         int i;
1456
1457         for (i = 0; i < NGG_BUF_MAX; i++)
1458                 amdgpu_bo_free_kernel(&adev->gfx.ngg.buf[i].bo,
1459                                       &adev->gfx.ngg.buf[i].gpu_addr,
1460                                       NULL);
1461
1462         memset(&adev->gfx.ngg.buf[0], 0,
1463                         sizeof(struct amdgpu_ngg_buf) * NGG_BUF_MAX);
1464
1465         adev->gfx.ngg.init = false;
1466
1467         return 0;
1468 }
1469
1470 static int gfx_v9_0_ngg_init(struct amdgpu_device *adev)
1471 {
1472         int r;
1473
1474         if (!amdgpu_ngg || adev->gfx.ngg.init == true)
1475                 return 0;
1476
1477         /* GDS reserve memory: 64 bytes alignment */
1478         adev->gfx.ngg.gds_reserve_size = ALIGN(5 * 4, 0x40);
1479         adev->gds.gds_size -= adev->gfx.ngg.gds_reserve_size;
1480         adev->gfx.ngg.gds_reserve_addr = RREG32_SOC15(GC, 0, mmGDS_VMID0_BASE);
1481         adev->gfx.ngg.gds_reserve_addr += RREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE);
1482
1483         /* Primitive Buffer */
1484         r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_PRIM],
1485                                     amdgpu_prim_buf_per_se,
1486                                     64 * 1024);
1487         if (r) {
1488                 dev_err(adev->dev, "Failed to create Primitive Buffer\n");
1489                 goto err;
1490         }
1491
1492         /* Position Buffer */
1493         r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_POS],
1494                                     amdgpu_pos_buf_per_se,
1495                                     256 * 1024);
1496         if (r) {
1497                 dev_err(adev->dev, "Failed to create Position Buffer\n");
1498                 goto err;
1499         }
1500
1501         /* Control Sideband */
1502         r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_CNTL],
1503                                     amdgpu_cntl_sb_buf_per_se,
1504                                     256);
1505         if (r) {
1506                 dev_err(adev->dev, "Failed to create Control Sideband Buffer\n");
1507                 goto err;
1508         }
1509
1510         /* Parameter Cache, not created by default */
1511         if (amdgpu_param_buf_per_se <= 0)
1512                 goto out;
1513
1514         r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_PARAM],
1515                                     amdgpu_param_buf_per_se,
1516                                     512 * 1024);
1517         if (r) {
1518                 dev_err(adev->dev, "Failed to create Parameter Cache\n");
1519                 goto err;
1520         }
1521
1522 out:
1523         adev->gfx.ngg.init = true;
1524         return 0;
1525 err:
1526         gfx_v9_0_ngg_fini(adev);
1527         return r;
1528 }
1529
1530 static int gfx_v9_0_ngg_en(struct amdgpu_device *adev)
1531 {
1532         struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
1533         int r;
1534         u32 data, base;
1535
1536         if (!amdgpu_ngg)
1537                 return 0;
1538
1539         /* Program buffer size */
1540         data = REG_SET_FIELD(0, WD_BUF_RESOURCE_1, INDEX_BUF_SIZE,
1541                              adev->gfx.ngg.buf[NGG_PRIM].size >> 8);
1542         data = REG_SET_FIELD(data, WD_BUF_RESOURCE_1, POS_BUF_SIZE,
1543                              adev->gfx.ngg.buf[NGG_POS].size >> 8);
1544         WREG32_SOC15(GC, 0, mmWD_BUF_RESOURCE_1, data);
1545
1546         data = REG_SET_FIELD(0, WD_BUF_RESOURCE_2, CNTL_SB_BUF_SIZE,
1547                              adev->gfx.ngg.buf[NGG_CNTL].size >> 8);
1548         data = REG_SET_FIELD(data, WD_BUF_RESOURCE_2, PARAM_BUF_SIZE,
1549                              adev->gfx.ngg.buf[NGG_PARAM].size >> 10);
1550         WREG32_SOC15(GC, 0, mmWD_BUF_RESOURCE_2, data);
1551
1552         /* Program buffer base address */
1553         base = lower_32_bits(adev->gfx.ngg.buf[NGG_PRIM].gpu_addr);
1554         data = REG_SET_FIELD(0, WD_INDEX_BUF_BASE, BASE, base);
1555         WREG32_SOC15(GC, 0, mmWD_INDEX_BUF_BASE, data);
1556
1557         base = upper_32_bits(adev->gfx.ngg.buf[NGG_PRIM].gpu_addr);
1558         data = REG_SET_FIELD(0, WD_INDEX_BUF_BASE_HI, BASE_HI, base);
1559         WREG32_SOC15(GC, 0, mmWD_INDEX_BUF_BASE_HI, data);
1560
1561         base = lower_32_bits(adev->gfx.ngg.buf[NGG_POS].gpu_addr);
1562         data = REG_SET_FIELD(0, WD_POS_BUF_BASE, BASE, base);
1563         WREG32_SOC15(GC, 0, mmWD_POS_BUF_BASE, data);
1564
1565         base = upper_32_bits(adev->gfx.ngg.buf[NGG_POS].gpu_addr);
1566         data = REG_SET_FIELD(0, WD_POS_BUF_BASE_HI, BASE_HI, base);
1567         WREG32_SOC15(GC, 0, mmWD_POS_BUF_BASE_HI, data);
1568
1569         base = lower_32_bits(adev->gfx.ngg.buf[NGG_CNTL].gpu_addr);
1570         data = REG_SET_FIELD(0, WD_CNTL_SB_BUF_BASE, BASE, base);
1571         WREG32_SOC15(GC, 0, mmWD_CNTL_SB_BUF_BASE, data);
1572
1573         base = upper_32_bits(adev->gfx.ngg.buf[NGG_CNTL].gpu_addr);
1574         data = REG_SET_FIELD(0, WD_CNTL_SB_BUF_BASE_HI, BASE_HI, base);
1575         WREG32_SOC15(GC, 0, mmWD_CNTL_SB_BUF_BASE_HI, data);
1576
1577         /* Clear GDS reserved memory */
1578         r = amdgpu_ring_alloc(ring, 17);
1579         if (r) {
1580                 DRM_ERROR("amdgpu: NGG failed to lock ring %s (%d).\n",
1581                           ring->name, r);
1582                 return r;
1583         }
1584
1585         gfx_v9_0_write_data_to_reg(ring, 0, false,
1586                                    SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE),
1587                                    (adev->gds.gds_size +
1588                                     adev->gfx.ngg.gds_reserve_size));
1589
1590         amdgpu_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
1591         amdgpu_ring_write(ring, (PACKET3_DMA_DATA_CP_SYNC |
1592                                 PACKET3_DMA_DATA_DST_SEL(1) |
1593                                 PACKET3_DMA_DATA_SRC_SEL(2)));
1594         amdgpu_ring_write(ring, 0);
1595         amdgpu_ring_write(ring, 0);
1596         amdgpu_ring_write(ring, adev->gfx.ngg.gds_reserve_addr);
1597         amdgpu_ring_write(ring, 0);
1598         amdgpu_ring_write(ring, PACKET3_DMA_DATA_CMD_RAW_WAIT |
1599                                 adev->gfx.ngg.gds_reserve_size);
1600
1601         gfx_v9_0_write_data_to_reg(ring, 0, false,
1602                                    SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE), 0);
1603
1604         amdgpu_ring_commit(ring);
1605
1606         return 0;
1607 }
1608
1609 static int gfx_v9_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
1610                                       int mec, int pipe, int queue)
1611 {
1612         int r;
1613         unsigned irq_type;
1614         struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
1615
1616         ring = &adev->gfx.compute_ring[ring_id];
1617
1618         /* mec0 is me1 */
1619         ring->me = mec + 1;
1620         ring->pipe = pipe;
1621         ring->queue = queue;
1622
1623         ring->ring_obj = NULL;
1624         ring->use_doorbell = true;
1625         ring->doorbell_index = (adev->doorbell_index.mec_ring0 + ring_id) << 1;
1626         ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
1627                                 + (ring_id * GFX9_MEC_HPD_SIZE);
1628         sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
1629
1630         irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
1631                 + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
1632                 + ring->pipe;
1633
1634         /* type-2 packets are deprecated on MEC, use type-3 instead */
1635         r = amdgpu_ring_init(adev, ring, 1024,
1636                              &adev->gfx.eop_irq, irq_type);
1637         if (r)
1638                 return r;
1639
1640
1641         return 0;
1642 }
1643
1644 static int gfx_v9_0_sw_init(void *handle)
1645 {
1646         int i, j, k, r, ring_id;
1647         struct amdgpu_ring *ring;
1648         struct amdgpu_kiq *kiq;
1649         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1650
1651         switch (adev->asic_type) {
1652         case CHIP_VEGA10:
1653         case CHIP_VEGA12:
1654         case CHIP_VEGA20:
1655         case CHIP_RAVEN:
1656                 adev->gfx.mec.num_mec = 2;
1657                 break;
1658         default:
1659                 adev->gfx.mec.num_mec = 1;
1660                 break;
1661         }
1662
1663         adev->gfx.mec.num_pipe_per_mec = 4;
1664         adev->gfx.mec.num_queue_per_pipe = 8;
1665
1666         /* EOP Event */
1667         r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_EOP_INTERRUPT, &adev->gfx.eop_irq);
1668         if (r)
1669                 return r;
1670
1671         /* Privileged reg */
1672         r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_REG_FAULT,
1673                               &adev->gfx.priv_reg_irq);
1674         if (r)
1675                 return r;
1676
1677         /* Privileged inst */
1678         r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_INSTR_FAULT,
1679                               &adev->gfx.priv_inst_irq);
1680         if (r)
1681                 return r;
1682
1683         /* ECC error */
1684         r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_ECC_ERROR,
1685                               &adev->gfx.cp_ecc_error_irq);
1686         if (r)
1687                 return r;
1688
1689         /* FUE error */
1690         r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_FUE_ERROR,
1691                               &adev->gfx.cp_ecc_error_irq);
1692         if (r)
1693                 return r;
1694
1695         adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
1696
1697         gfx_v9_0_scratch_init(adev);
1698
1699         r = gfx_v9_0_init_microcode(adev);
1700         if (r) {
1701                 DRM_ERROR("Failed to load gfx firmware!\n");
1702                 return r;
1703         }
1704
1705         r = adev->gfx.rlc.funcs->init(adev);
1706         if (r) {
1707                 DRM_ERROR("Failed to init rlc BOs!\n");
1708                 return r;
1709         }
1710
1711         r = gfx_v9_0_mec_init(adev);
1712         if (r) {
1713                 DRM_ERROR("Failed to init MEC BOs!\n");
1714                 return r;
1715         }
1716
1717         /* set up the gfx ring */
1718         for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
1719                 ring = &adev->gfx.gfx_ring[i];
1720                 ring->ring_obj = NULL;
1721                 if (!i)
1722                         sprintf(ring->name, "gfx");
1723                 else
1724                         sprintf(ring->name, "gfx_%d", i);
1725                 ring->use_doorbell = true;
1726                 ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1;
1727                 r = amdgpu_ring_init(adev, ring, 1024,
1728                                      &adev->gfx.eop_irq, AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP);
1729                 if (r)
1730                         return r;
1731         }
1732
1733         /* set up the compute queues - allocate horizontally across pipes */
1734         ring_id = 0;
1735         for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
1736                 for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
1737                         for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
1738                                 if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, j))
1739                                         continue;
1740
1741                                 r = gfx_v9_0_compute_ring_init(adev,
1742                                                                ring_id,
1743                                                                i, k, j);
1744                                 if (r)
1745                                         return r;
1746
1747                                 ring_id++;
1748                         }
1749                 }
1750         }
1751
1752         r = amdgpu_gfx_kiq_init(adev, GFX9_MEC_HPD_SIZE);
1753         if (r) {
1754                 DRM_ERROR("Failed to init KIQ BOs!\n");
1755                 return r;
1756         }
1757
1758         kiq = &adev->gfx.kiq;
1759         r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
1760         if (r)
1761                 return r;
1762
1763         /* create MQD for all compute queues as wel as KIQ for SRIOV case */
1764         r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct v9_mqd_allocation));
1765         if (r)
1766                 return r;
1767
1768         adev->gfx.ce_ram_size = 0x8000;
1769
1770         r = gfx_v9_0_gpu_early_init(adev);
1771         if (r)
1772                 return r;
1773
1774         r = gfx_v9_0_ngg_init(adev);
1775         if (r)
1776                 return r;
1777
1778         return 0;
1779 }
1780
1781
1782 static int gfx_v9_0_sw_fini(void *handle)
1783 {
1784         int i;
1785         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1786
1787         if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX) &&
1788                         adev->gfx.ras_if) {
1789                 struct ras_common_if *ras_if = adev->gfx.ras_if;
1790                 struct ras_ih_if ih_info = {
1791                         .head = *ras_if,
1792                 };
1793
1794                 amdgpu_ras_debugfs_remove(adev, ras_if);
1795                 amdgpu_ras_sysfs_remove(adev, ras_if);
1796                 amdgpu_ras_interrupt_remove_handler(adev,  &ih_info);
1797                 amdgpu_ras_feature_enable(adev, ras_if, 0);
1798                 kfree(ras_if);
1799         }
1800
1801         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
1802                 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
1803         for (i = 0; i < adev->gfx.num_compute_rings; i++)
1804                 amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
1805
1806         amdgpu_gfx_mqd_sw_fini(adev);
1807         amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq);
1808         amdgpu_gfx_kiq_fini(adev);
1809
1810         gfx_v9_0_mec_fini(adev);
1811         gfx_v9_0_ngg_fini(adev);
1812         amdgpu_bo_unref(&adev->gfx.rlc.clear_state_obj);
1813         if (adev->asic_type == CHIP_RAVEN) {
1814                 amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj,
1815                                 &adev->gfx.rlc.cp_table_gpu_addr,
1816                                 (void **)&adev->gfx.rlc.cp_table_ptr);
1817         }
1818         gfx_v9_0_free_microcode(adev);
1819
1820         return 0;
1821 }
1822
1823
1824 static void gfx_v9_0_tiling_mode_table_init(struct amdgpu_device *adev)
1825 {
1826         /* TODO */
1827 }
1828
1829 static void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num, u32 instance)
1830 {
1831         u32 data;
1832
1833         if (instance == 0xffffffff)
1834                 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
1835         else
1836                 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
1837
1838         if (se_num == 0xffffffff)
1839                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
1840         else
1841                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
1842
1843         if (sh_num == 0xffffffff)
1844                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
1845         else
1846                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
1847
1848         WREG32_SOC15_RLC_SHADOW(GC, 0, mmGRBM_GFX_INDEX, data);
1849 }
1850
1851 static u32 gfx_v9_0_get_rb_active_bitmap(struct amdgpu_device *adev)
1852 {
1853         u32 data, mask;
1854
1855         data = RREG32_SOC15(GC, 0, mmCC_RB_BACKEND_DISABLE);
1856         data |= RREG32_SOC15(GC, 0, mmGC_USER_RB_BACKEND_DISABLE);
1857
1858         data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK;
1859         data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT;
1860
1861         mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se /
1862                                          adev->gfx.config.max_sh_per_se);
1863
1864         return (~data) & mask;
1865 }
1866
1867 static void gfx_v9_0_setup_rb(struct amdgpu_device *adev)
1868 {
1869         int i, j;
1870         u32 data;
1871         u32 active_rbs = 0;
1872         u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
1873                                         adev->gfx.config.max_sh_per_se;
1874
1875         mutex_lock(&adev->grbm_idx_mutex);
1876         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
1877                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
1878                         gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
1879                         data = gfx_v9_0_get_rb_active_bitmap(adev);
1880                         active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
1881                                                rb_bitmap_width_per_sh);
1882                 }
1883         }
1884         gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1885         mutex_unlock(&adev->grbm_idx_mutex);
1886
1887         adev->gfx.config.backend_enable_mask = active_rbs;
1888         adev->gfx.config.num_rbs = hweight32(active_rbs);
1889 }
1890
1891 #define DEFAULT_SH_MEM_BASES    (0x6000)
1892 #define FIRST_COMPUTE_VMID      (8)
1893 #define LAST_COMPUTE_VMID       (16)
1894 static void gfx_v9_0_init_compute_vmid(struct amdgpu_device *adev)
1895 {
1896         int i;
1897         uint32_t sh_mem_config;
1898         uint32_t sh_mem_bases;
1899
1900         /*
1901          * Configure apertures:
1902          * LDS:         0x60000000'00000000 - 0x60000001'00000000 (4GB)
1903          * Scratch:     0x60000001'00000000 - 0x60000002'00000000 (4GB)
1904          * GPUVM:       0x60010000'00000000 - 0x60020000'00000000 (1TB)
1905          */
1906         sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
1907
1908         sh_mem_config = SH_MEM_ADDRESS_MODE_64 |
1909                         SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
1910                         SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT;
1911
1912         mutex_lock(&adev->srbm_mutex);
1913         for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
1914                 soc15_grbm_select(adev, 0, 0, 0, i);
1915                 /* CP and shaders */
1916                 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, sh_mem_config);
1917                 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, sh_mem_bases);
1918         }
1919         soc15_grbm_select(adev, 0, 0, 0, 0);
1920         mutex_unlock(&adev->srbm_mutex);
1921 }
1922
1923 static void gfx_v9_0_constants_init(struct amdgpu_device *adev)
1924 {
1925         u32 tmp;
1926         int i;
1927
1928         WREG32_FIELD15_RLC(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff);
1929
1930         gfx_v9_0_tiling_mode_table_init(adev);
1931
1932         gfx_v9_0_setup_rb(adev);
1933         gfx_v9_0_get_cu_info(adev, &adev->gfx.cu_info);
1934         adev->gfx.config.db_debug2 = RREG32_SOC15(GC, 0, mmDB_DEBUG2);
1935
1936         /* XXX SH_MEM regs */
1937         /* where to put LDS, scratch, GPUVM in FSA64 space */
1938         mutex_lock(&adev->srbm_mutex);
1939         for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB].num_ids; i++) {
1940                 soc15_grbm_select(adev, 0, 0, 0, i);
1941                 /* CP and shaders */
1942                 if (i == 0) {
1943                         tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
1944                                             SH_MEM_ALIGNMENT_MODE_UNALIGNED);
1945                         WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp);
1946                         WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, 0);
1947                 } else {
1948                         tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
1949                                             SH_MEM_ALIGNMENT_MODE_UNALIGNED);
1950                         WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp);
1951                         tmp = REG_SET_FIELD(0, SH_MEM_BASES, PRIVATE_BASE,
1952                                 (adev->gmc.private_aperture_start >> 48));
1953                         tmp = REG_SET_FIELD(tmp, SH_MEM_BASES, SHARED_BASE,
1954                                 (adev->gmc.shared_aperture_start >> 48));
1955                         WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, tmp);
1956                 }
1957         }
1958         soc15_grbm_select(adev, 0, 0, 0, 0);
1959
1960         mutex_unlock(&adev->srbm_mutex);
1961
1962         gfx_v9_0_init_compute_vmid(adev);
1963 }
1964
1965 static void gfx_v9_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
1966 {
1967         u32 i, j, k;
1968         u32 mask;
1969
1970         mutex_lock(&adev->grbm_idx_mutex);
1971         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
1972                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
1973                         gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
1974                         for (k = 0; k < adev->usec_timeout; k++) {
1975                                 if (RREG32_SOC15(GC, 0, mmRLC_SERDES_CU_MASTER_BUSY) == 0)
1976                                         break;
1977                                 udelay(1);
1978                         }
1979                         if (k == adev->usec_timeout) {
1980                                 gfx_v9_0_select_se_sh(adev, 0xffffffff,
1981                                                       0xffffffff, 0xffffffff);
1982                                 mutex_unlock(&adev->grbm_idx_mutex);
1983                                 DRM_INFO("Timeout wait for RLC serdes %u,%u\n",
1984                                          i, j);
1985                                 return;
1986                         }
1987                 }
1988         }
1989         gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1990         mutex_unlock(&adev->grbm_idx_mutex);
1991
1992         mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
1993                 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
1994                 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
1995                 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
1996         for (k = 0; k < adev->usec_timeout; k++) {
1997                 if ((RREG32_SOC15(GC, 0, mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
1998                         break;
1999                 udelay(1);
2000         }
2001 }
2002
2003 static void gfx_v9_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
2004                                                bool enable)
2005 {
2006         u32 tmp = RREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0);
2007
2008         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
2009         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
2010         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
2011         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
2012
2013         WREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0, tmp);
2014 }
2015
2016 static void gfx_v9_0_init_csb(struct amdgpu_device *adev)
2017 {
2018         /* csib */
2019         WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_HI),
2020                         adev->gfx.rlc.clear_state_gpu_addr >> 32);
2021         WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_LO),
2022                         adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
2023         WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_LENGTH),
2024                         adev->gfx.rlc.clear_state_size);
2025 }
2026
2027 static void gfx_v9_1_parse_ind_reg_list(int *register_list_format,
2028                                 int indirect_offset,
2029                                 int list_size,
2030                                 int *unique_indirect_regs,
2031                                 int unique_indirect_reg_count,
2032                                 int *indirect_start_offsets,
2033                                 int *indirect_start_offsets_count,
2034                                 int max_start_offsets_count)
2035 {
2036         int idx;
2037
2038         for (; indirect_offset < list_size; indirect_offset++) {
2039                 WARN_ON(*indirect_start_offsets_count >= max_start_offsets_count);
2040                 indirect_start_offsets[*indirect_start_offsets_count] = indirect_offset;
2041                 *indirect_start_offsets_count = *indirect_start_offsets_count + 1;
2042
2043                 while (register_list_format[indirect_offset] != 0xFFFFFFFF) {
2044                         indirect_offset += 2;
2045
2046                         /* look for the matching indice */
2047                         for (idx = 0; idx < unique_indirect_reg_count; idx++) {
2048                                 if (unique_indirect_regs[idx] ==
2049                                         register_list_format[indirect_offset] ||
2050                                         !unique_indirect_regs[idx])
2051                                         break;
2052                         }
2053
2054                         BUG_ON(idx >= unique_indirect_reg_count);
2055
2056                         if (!unique_indirect_regs[idx])
2057                                 unique_indirect_regs[idx] = register_list_format[indirect_offset];
2058
2059                         indirect_offset++;
2060                 }
2061         }
2062 }
2063
2064 static int gfx_v9_1_init_rlc_save_restore_list(struct amdgpu_device *adev)
2065 {
2066         int unique_indirect_regs[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
2067         int unique_indirect_reg_count = 0;
2068
2069         int indirect_start_offsets[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
2070         int indirect_start_offsets_count = 0;
2071
2072         int list_size = 0;
2073         int i = 0, j = 0;
2074         u32 tmp = 0;
2075
2076         u32 *register_list_format =
2077                 kmemdup(adev->gfx.rlc.register_list_format,
2078                         adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
2079         if (!register_list_format)
2080                 return -ENOMEM;
2081
2082         /* setup unique_indirect_regs array and indirect_start_offsets array */
2083         unique_indirect_reg_count = ARRAY_SIZE(unique_indirect_regs);
2084         gfx_v9_1_parse_ind_reg_list(register_list_format,
2085                                     adev->gfx.rlc.reg_list_format_direct_reg_list_length,
2086                                     adev->gfx.rlc.reg_list_format_size_bytes >> 2,
2087                                     unique_indirect_regs,
2088                                     unique_indirect_reg_count,
2089                                     indirect_start_offsets,
2090                                     &indirect_start_offsets_count,
2091                                     ARRAY_SIZE(indirect_start_offsets));
2092
2093         /* enable auto inc in case it is disabled */
2094         tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL));
2095         tmp |= RLC_SRM_CNTL__AUTO_INCR_ADDR_MASK;
2096         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL), tmp);
2097
2098         /* write register_restore table to offset 0x0 using RLC_SRM_ARAM_ADDR/DATA */
2099         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_ADDR),
2100                 RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET);
2101         for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
2102                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_DATA),
2103                         adev->gfx.rlc.register_restore[i]);
2104
2105         /* load indirect register */
2106         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2107                 adev->gfx.rlc.reg_list_format_start);
2108
2109         /* direct register portion */
2110         for (i = 0; i < adev->gfx.rlc.reg_list_format_direct_reg_list_length; i++)
2111                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA),
2112                         register_list_format[i]);
2113
2114         /* indirect register portion */
2115         while (i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2)) {
2116                 if (register_list_format[i] == 0xFFFFFFFF) {
2117                         WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2118                         continue;
2119                 }
2120
2121                 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2122                 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2123
2124                 for (j = 0; j < unique_indirect_reg_count; j++) {
2125                         if (register_list_format[i] == unique_indirect_regs[j]) {
2126                                 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, j);
2127                                 break;
2128                         }
2129                 }
2130
2131                 BUG_ON(j >= unique_indirect_reg_count);
2132
2133                 i++;
2134         }
2135
2136         /* set save/restore list size */
2137         list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
2138         list_size = list_size >> 1;
2139         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2140                 adev->gfx.rlc.reg_restore_list_size);
2141         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA), list_size);
2142
2143         /* write the starting offsets to RLC scratch ram */
2144         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2145                 adev->gfx.rlc.starting_offsets_start);
2146         for (i = 0; i < ARRAY_SIZE(indirect_start_offsets); i++)
2147                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA),
2148                        indirect_start_offsets[i]);
2149
2150         /* load unique indirect regs*/
2151         for (i = 0; i < ARRAY_SIZE(unique_indirect_regs); i++) {
2152                 if (unique_indirect_regs[i] != 0) {
2153                         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_ADDR_0)
2154                                + GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[i],
2155                                unique_indirect_regs[i] & 0x3FFFF);
2156
2157                         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_DATA_0)
2158                                + GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[i],
2159                                unique_indirect_regs[i] >> 20);
2160                 }
2161         }
2162
2163         kfree(register_list_format);
2164         return 0;
2165 }
2166
2167 static void gfx_v9_0_enable_save_restore_machine(struct amdgpu_device *adev)
2168 {
2169         WREG32_FIELD15(GC, 0, RLC_SRM_CNTL, SRM_ENABLE, 1);
2170 }
2171
2172 static void pwr_10_0_gfxip_control_over_cgpg(struct amdgpu_device *adev,
2173                                              bool enable)
2174 {
2175         uint32_t data = 0;
2176         uint32_t default_data = 0;
2177
2178         default_data = data = RREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS));
2179         if (enable == true) {
2180                 /* enable GFXIP control over CGPG */
2181                 data |= PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK;
2182                 if(default_data != data)
2183                         WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2184
2185                 /* update status */
2186                 data &= ~PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK;
2187                 data |= (2 << PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT);
2188                 if(default_data != data)
2189                         WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2190         } else {
2191                 /* restore GFXIP control over GCPG */
2192                 data &= ~PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK;
2193                 if(default_data != data)
2194                         WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2195         }
2196 }
2197
2198 static void gfx_v9_0_init_gfx_power_gating(struct amdgpu_device *adev)
2199 {
2200         uint32_t data = 0;
2201
2202         if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
2203                               AMD_PG_SUPPORT_GFX_SMG |
2204                               AMD_PG_SUPPORT_GFX_DMG)) {
2205                 /* init IDLE_POLL_COUNT = 60 */
2206                 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL));
2207                 data &= ~CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT_MASK;
2208                 data |= (0x60 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
2209                 WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL), data);
2210
2211                 /* init RLC PG Delay */
2212                 data = 0;
2213                 data |= (0x10 << RLC_PG_DELAY__POWER_UP_DELAY__SHIFT);
2214                 data |= (0x10 << RLC_PG_DELAY__POWER_DOWN_DELAY__SHIFT);
2215                 data |= (0x10 << RLC_PG_DELAY__CMD_PROPAGATE_DELAY__SHIFT);
2216                 data |= (0x40 << RLC_PG_DELAY__MEM_SLEEP_DELAY__SHIFT);
2217                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY), data);
2218
2219                 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2));
2220                 data &= ~RLC_PG_DELAY_2__SERDES_CMD_DELAY_MASK;
2221                 data |= (0x4 << RLC_PG_DELAY_2__SERDES_CMD_DELAY__SHIFT);
2222                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2), data);
2223
2224                 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3));
2225                 data &= ~RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG_MASK;
2226                 data |= (0xff << RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG__SHIFT);
2227                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3), data);
2228
2229                 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL));
2230                 data &= ~RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD_MASK;
2231
2232                 /* program GRBM_REG_SAVE_GFX_IDLE_THRESHOLD to 0x55f0 */
2233                 data |= (0x55f0 << RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD__SHIFT);
2234                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL), data);
2235
2236                 pwr_10_0_gfxip_control_over_cgpg(adev, true);
2237         }
2238 }
2239
2240 static void gfx_v9_0_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
2241                                                 bool enable)
2242 {
2243         uint32_t data = 0;
2244         uint32_t default_data = 0;
2245
2246         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2247         data = REG_SET_FIELD(data, RLC_PG_CNTL,
2248                              SMU_CLK_SLOWDOWN_ON_PU_ENABLE,
2249                              enable ? 1 : 0);
2250         if (default_data != data)
2251                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2252 }
2253
2254 static void gfx_v9_0_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
2255                                                 bool enable)
2256 {
2257         uint32_t data = 0;
2258         uint32_t default_data = 0;
2259
2260         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2261         data = REG_SET_FIELD(data, RLC_PG_CNTL,
2262                              SMU_CLK_SLOWDOWN_ON_PD_ENABLE,
2263                              enable ? 1 : 0);
2264         if(default_data != data)
2265                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2266 }
2267
2268 static void gfx_v9_0_enable_cp_power_gating(struct amdgpu_device *adev,
2269                                         bool enable)
2270 {
2271         uint32_t data = 0;
2272         uint32_t default_data = 0;
2273
2274         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2275         data = REG_SET_FIELD(data, RLC_PG_CNTL,
2276                              CP_PG_DISABLE,
2277                              enable ? 0 : 1);
2278         if(default_data != data)
2279                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2280 }
2281
2282 static void gfx_v9_0_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
2283                                                 bool enable)
2284 {
2285         uint32_t data, default_data;
2286
2287         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2288         data = REG_SET_FIELD(data, RLC_PG_CNTL,
2289                              GFX_POWER_GATING_ENABLE,
2290                              enable ? 1 : 0);
2291         if(default_data != data)
2292                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2293 }
2294
2295 static void gfx_v9_0_enable_gfx_pipeline_powergating(struct amdgpu_device *adev,
2296                                                 bool enable)
2297 {
2298         uint32_t data, default_data;
2299
2300         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2301         data = REG_SET_FIELD(data, RLC_PG_CNTL,
2302                              GFX_PIPELINE_PG_ENABLE,
2303                              enable ? 1 : 0);
2304         if(default_data != data)
2305                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2306
2307         if (!enable)
2308                 /* read any GFX register to wake up GFX */
2309                 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmDB_RENDER_CONTROL));
2310 }
2311
2312 static void gfx_v9_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
2313                                                        bool enable)
2314 {
2315         uint32_t data, default_data;
2316
2317         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2318         data = REG_SET_FIELD(data, RLC_PG_CNTL,
2319                              STATIC_PER_CU_PG_ENABLE,
2320                              enable ? 1 : 0);
2321         if(default_data != data)
2322                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2323 }
2324
2325 static void gfx_v9_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
2326                                                 bool enable)
2327 {
2328         uint32_t data, default_data;
2329
2330         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2331         data = REG_SET_FIELD(data, RLC_PG_CNTL,
2332                              DYN_PER_CU_PG_ENABLE,
2333                              enable ? 1 : 0);
2334         if(default_data != data)
2335                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2336 }
2337
2338 static void gfx_v9_0_init_pg(struct amdgpu_device *adev)
2339 {
2340         gfx_v9_0_init_csb(adev);
2341
2342         /*
2343          * Rlc save restore list is workable since v2_1.
2344          * And it's needed by gfxoff feature.
2345          */
2346         if (adev->gfx.rlc.is_rlc_v2_1) {
2347                 gfx_v9_1_init_rlc_save_restore_list(adev);
2348                 gfx_v9_0_enable_save_restore_machine(adev);
2349         }
2350
2351         if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
2352                               AMD_PG_SUPPORT_GFX_SMG |
2353                               AMD_PG_SUPPORT_GFX_DMG |
2354                               AMD_PG_SUPPORT_CP |
2355                               AMD_PG_SUPPORT_GDS |
2356                               AMD_PG_SUPPORT_RLC_SMU_HS)) {
2357                 WREG32(mmRLC_JUMP_TABLE_RESTORE,
2358                        adev->gfx.rlc.cp_table_gpu_addr >> 8);
2359                 gfx_v9_0_init_gfx_power_gating(adev);
2360         }
2361 }
2362
2363 void gfx_v9_0_rlc_stop(struct amdgpu_device *adev)
2364 {
2365         WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 0);
2366         gfx_v9_0_enable_gui_idle_interrupt(adev, false);
2367         gfx_v9_0_wait_for_rlc_serdes(adev);
2368 }
2369
2370 static void gfx_v9_0_rlc_reset(struct amdgpu_device *adev)
2371 {
2372         WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
2373         udelay(50);
2374         WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
2375         udelay(50);
2376 }
2377
2378 static void gfx_v9_0_rlc_start(struct amdgpu_device *adev)
2379 {
2380 #ifdef AMDGPU_RLC_DEBUG_RETRY
2381         u32 rlc_ucode_ver;
2382 #endif
2383
2384         WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 1);
2385         udelay(50);
2386
2387         /* carrizo do enable cp interrupt after cp inited */
2388         if (!(adev->flags & AMD_IS_APU)) {
2389                 gfx_v9_0_enable_gui_idle_interrupt(adev, true);
2390                 udelay(50);
2391         }
2392
2393 #ifdef AMDGPU_RLC_DEBUG_RETRY
2394         /* RLC_GPM_GENERAL_6 : RLC Ucode version */
2395         rlc_ucode_ver = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_6);
2396         if(rlc_ucode_ver == 0x108) {
2397                 DRM_INFO("Using rlc debug ucode. mmRLC_GPM_GENERAL_6 ==0x08%x / fw_ver == %i \n",
2398                                 rlc_ucode_ver, adev->gfx.rlc_fw_version);
2399                 /* RLC_GPM_TIMER_INT_3 : Timer interval in RefCLK cycles,
2400                  * default is 0x9C4 to create a 100us interval */
2401                 WREG32_SOC15(GC, 0, mmRLC_GPM_TIMER_INT_3, 0x9C4);
2402                 /* RLC_GPM_GENERAL_12 : Minimum gap between wptr and rptr
2403                  * to disable the page fault retry interrupts, default is
2404                  * 0x100 (256) */
2405                 WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_12, 0x100);
2406         }
2407 #endif
2408 }
2409
2410 static int gfx_v9_0_rlc_load_microcode(struct amdgpu_device *adev)
2411 {
2412         const struct rlc_firmware_header_v2_0 *hdr;
2413         const __le32 *fw_data;
2414         unsigned i, fw_size;
2415
2416         if (!adev->gfx.rlc_fw)
2417                 return -EINVAL;
2418
2419         hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
2420         amdgpu_ucode_print_rlc_hdr(&hdr->header);
2421
2422         fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
2423                            le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2424         fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
2425
2426         WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR,
2427                         RLCG_UCODE_LOADING_START_ADDRESS);
2428         for (i = 0; i < fw_size; i++)
2429                 WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
2430         WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
2431
2432         return 0;
2433 }
2434
2435 static int gfx_v9_0_rlc_resume(struct amdgpu_device *adev)
2436 {
2437         int r;
2438
2439         if (amdgpu_sriov_vf(adev)) {
2440                 gfx_v9_0_init_csb(adev);
2441                 return 0;
2442         }
2443
2444         adev->gfx.rlc.funcs->stop(adev);
2445
2446         /* disable CG */
2447         WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, 0);
2448
2449         gfx_v9_0_init_pg(adev);
2450
2451         if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
2452                 /* legacy rlc firmware loading */
2453                 r = gfx_v9_0_rlc_load_microcode(adev);
2454                 if (r)
2455                         return r;
2456         }
2457
2458         switch (adev->asic_type) {
2459         case CHIP_RAVEN:
2460                 if (amdgpu_lbpw == 0)
2461                         gfx_v9_0_enable_lbpw(adev, false);
2462                 else
2463                         gfx_v9_0_enable_lbpw(adev, true);
2464                 break;
2465         case CHIP_VEGA20:
2466                 if (amdgpu_lbpw > 0)
2467                         gfx_v9_0_enable_lbpw(adev, true);
2468                 else
2469                         gfx_v9_0_enable_lbpw(adev, false);
2470                 break;
2471         default:
2472                 break;
2473         }
2474
2475         adev->gfx.rlc.funcs->start(adev);
2476
2477         return 0;
2478 }
2479
2480 static void gfx_v9_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
2481 {
2482         int i;
2483         u32 tmp = RREG32_SOC15(GC, 0, mmCP_ME_CNTL);
2484
2485         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, enable ? 0 : 1);
2486         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, enable ? 0 : 1);
2487         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, enable ? 0 : 1);
2488         if (!enable) {
2489                 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2490                         adev->gfx.gfx_ring[i].sched.ready = false;
2491         }
2492         WREG32_SOC15_RLC(GC, 0, mmCP_ME_CNTL, tmp);
2493         udelay(50);
2494 }
2495
2496 static int gfx_v9_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
2497 {
2498         const struct gfx_firmware_header_v1_0 *pfp_hdr;
2499         const struct gfx_firmware_header_v1_0 *ce_hdr;
2500         const struct gfx_firmware_header_v1_0 *me_hdr;
2501         const __le32 *fw_data;
2502         unsigned i, fw_size;
2503
2504         if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
2505                 return -EINVAL;
2506
2507         pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
2508                 adev->gfx.pfp_fw->data;
2509         ce_hdr = (const struct gfx_firmware_header_v1_0 *)
2510                 adev->gfx.ce_fw->data;
2511         me_hdr = (const struct gfx_firmware_header_v1_0 *)
2512                 adev->gfx.me_fw->data;
2513
2514         amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
2515         amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
2516         amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
2517
2518         gfx_v9_0_cp_gfx_enable(adev, false);
2519
2520         /* PFP */
2521         fw_data = (const __le32 *)
2522                 (adev->gfx.pfp_fw->data +
2523                  le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
2524         fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
2525         WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, 0);
2526         for (i = 0; i < fw_size; i++)
2527                 WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
2528         WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
2529
2530         /* CE */
2531         fw_data = (const __le32 *)
2532                 (adev->gfx.ce_fw->data +
2533                  le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
2534         fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
2535         WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, 0);
2536         for (i = 0; i < fw_size; i++)
2537                 WREG32_SOC15(GC, 0, mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
2538         WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
2539
2540         /* ME */
2541         fw_data = (const __le32 *)
2542                 (adev->gfx.me_fw->data +
2543                  le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
2544         fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
2545         WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, 0);
2546         for (i = 0; i < fw_size; i++)
2547                 WREG32_SOC15(GC, 0, mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
2548         WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version);
2549
2550         return 0;
2551 }
2552
2553 static int gfx_v9_0_cp_gfx_start(struct amdgpu_device *adev)
2554 {
2555         struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
2556         const struct cs_section_def *sect = NULL;
2557         const struct cs_extent_def *ext = NULL;
2558         int r, i, tmp;
2559
2560         /* init the CP */
2561         WREG32_SOC15(GC, 0, mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
2562         WREG32_SOC15(GC, 0, mmCP_DEVICE_ID, 1);
2563
2564         gfx_v9_0_cp_gfx_enable(adev, true);
2565
2566         r = amdgpu_ring_alloc(ring, gfx_v9_0_get_csb_size(adev) + 4 + 3);
2567         if (r) {
2568                 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
2569                 return r;
2570         }
2571
2572         amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
2573         amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
2574
2575         amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
2576         amdgpu_ring_write(ring, 0x80000000);
2577         amdgpu_ring_write(ring, 0x80000000);
2578
2579         for (sect = gfx9_cs_data; sect->section != NULL; ++sect) {
2580                 for (ext = sect->section; ext->extent != NULL; ++ext) {
2581                         if (sect->id == SECT_CONTEXT) {
2582                                 amdgpu_ring_write(ring,
2583                                        PACKET3(PACKET3_SET_CONTEXT_REG,
2584                                                ext->reg_count));
2585                                 amdgpu_ring_write(ring,
2586                                        ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
2587                                 for (i = 0; i < ext->reg_count; i++)
2588                                         amdgpu_ring_write(ring, ext->extent[i]);
2589                         }
2590                 }
2591         }
2592
2593         amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
2594         amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
2595
2596         amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
2597         amdgpu_ring_write(ring, 0);
2598
2599         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
2600         amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
2601         amdgpu_ring_write(ring, 0x8000);
2602         amdgpu_ring_write(ring, 0x8000);
2603
2604         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG,1));
2605         tmp = (PACKET3_SET_UCONFIG_REG_INDEX_TYPE |
2606                 (SOC15_REG_OFFSET(GC, 0, mmVGT_INDEX_TYPE) - PACKET3_SET_UCONFIG_REG_START));
2607         amdgpu_ring_write(ring, tmp);
2608         amdgpu_ring_write(ring, 0);
2609
2610         amdgpu_ring_commit(ring);
2611
2612         return 0;
2613 }
2614
2615 static int gfx_v9_0_cp_gfx_resume(struct amdgpu_device *adev)
2616 {
2617         struct amdgpu_ring *ring;
2618         u32 tmp;
2619         u32 rb_bufsz;
2620         u64 rb_addr, rptr_addr, wptr_gpu_addr;
2621
2622         /* Set the write pointer delay */
2623         WREG32_SOC15(GC, 0, mmCP_RB_WPTR_DELAY, 0);
2624
2625         /* set the RB to use vmid 0 */
2626         WREG32_SOC15(GC, 0, mmCP_RB_VMID, 0);
2627
2628         /* Set ring buffer size */
2629         ring = &adev->gfx.gfx_ring[0];
2630         rb_bufsz = order_base_2(ring->ring_size / 8);
2631         tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
2632         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
2633 #ifdef __BIG_ENDIAN
2634         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
2635 #endif
2636         WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp);
2637
2638         /* Initialize the ring buffer's write pointers */
2639         ring->wptr = 0;
2640         WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
2641         WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
2642
2643         /* set the wb address wether it's enabled or not */
2644         rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
2645         WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
2646         WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & CP_RB_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK);
2647
2648         wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
2649         WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr));
2650         WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr));
2651
2652         mdelay(1);
2653         WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp);
2654
2655         rb_addr = ring->gpu_addr >> 8;
2656         WREG32_SOC15(GC, 0, mmCP_RB0_BASE, rb_addr);
2657         WREG32_SOC15(GC, 0, mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
2658
2659         tmp = RREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL);
2660         if (ring->use_doorbell) {
2661                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
2662                                     DOORBELL_OFFSET, ring->doorbell_index);
2663                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
2664                                     DOORBELL_EN, 1);
2665         } else {
2666                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0);
2667         }
2668         WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL, tmp);
2669
2670         tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
2671                         DOORBELL_RANGE_LOWER, ring->doorbell_index);
2672         WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
2673
2674         WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_UPPER,
2675                        CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
2676
2677
2678         /* start the ring */
2679         gfx_v9_0_cp_gfx_start(adev);
2680         ring->sched.ready = true;
2681
2682         return 0;
2683 }
2684
2685 static void gfx_v9_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
2686 {
2687         int i;
2688
2689         if (enable) {
2690                 WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL, 0);
2691         } else {
2692                 WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL,
2693                         (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
2694                 for (i = 0; i < adev->gfx.num_compute_rings; i++)
2695                         adev->gfx.compute_ring[i].sched.ready = false;
2696                 adev->gfx.kiq.ring.sched.ready = false;
2697         }
2698         udelay(50);
2699 }
2700
2701 static int gfx_v9_0_cp_compute_load_microcode(struct amdgpu_device *adev)
2702 {
2703         const struct gfx_firmware_header_v1_0 *mec_hdr;
2704         const __le32 *fw_data;
2705         unsigned i;
2706         u32 tmp;
2707
2708         if (!adev->gfx.mec_fw)
2709                 return -EINVAL;
2710
2711         gfx_v9_0_cp_compute_enable(adev, false);
2712
2713         mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
2714         amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
2715
2716         fw_data = (const __le32 *)
2717                 (adev->gfx.mec_fw->data +
2718                  le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
2719         tmp = 0;
2720         tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, VMID, 0);
2721         tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0);
2722         WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_CNTL, tmp);
2723
2724         WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_LO,
2725                 adev->gfx.mec.mec_fw_gpu_addr & 0xFFFFF000);
2726         WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_HI,
2727                 upper_32_bits(adev->gfx.mec.mec_fw_gpu_addr));
2728
2729         /* MEC1 */
2730         WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR,
2731                          mec_hdr->jt_offset);
2732         for (i = 0; i < mec_hdr->jt_size; i++)
2733                 WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_DATA,
2734                         le32_to_cpup(fw_data + mec_hdr->jt_offset + i));
2735
2736         WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR,
2737                         adev->gfx.mec_fw_version);
2738         /* Todo : Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */
2739
2740         return 0;
2741 }
2742
2743 /* KIQ functions */
2744 static void gfx_v9_0_kiq_setting(struct amdgpu_ring *ring)
2745 {
2746         uint32_t tmp;
2747         struct amdgpu_device *adev = ring->adev;
2748
2749         /* tell RLC which is KIQ queue */
2750         tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS);
2751         tmp &= 0xffffff00;
2752         tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
2753         WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
2754         tmp |= 0x80;
2755         WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
2756 }
2757
2758 static int gfx_v9_0_kiq_kcq_enable(struct amdgpu_device *adev)
2759 {
2760         struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
2761         uint64_t queue_mask = 0;
2762         int r, i;
2763
2764         for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) {
2765                 if (!test_bit(i, adev->gfx.mec.queue_bitmap))
2766                         continue;
2767
2768                 /* This situation may be hit in the future if a new HW
2769                  * generation exposes more than 64 queues. If so, the
2770                  * definition of queue_mask needs updating */
2771                 if (WARN_ON(i >= (sizeof(queue_mask)*8))) {
2772                         DRM_ERROR("Invalid KCQ enabled: %d\n", i);
2773                         break;
2774                 }
2775
2776                 queue_mask |= (1ull << i);
2777         }
2778
2779         r = amdgpu_ring_alloc(kiq_ring, (7 * adev->gfx.num_compute_rings) + 8);
2780         if (r) {
2781                 DRM_ERROR("Failed to lock KIQ (%d).\n", r);
2782                 return r;
2783         }
2784
2785         /* set resources */
2786         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
2787         amdgpu_ring_write(kiq_ring, PACKET3_SET_RESOURCES_VMID_MASK(0) |
2788                           PACKET3_SET_RESOURCES_QUEUE_TYPE(0)); /* vmid_mask:0 queue_type:0 (KIQ) */
2789         amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask)); /* queue mask lo */
2790         amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask)); /* queue mask hi */
2791         amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */
2792         amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */
2793         amdgpu_ring_write(kiq_ring, 0); /* oac mask */
2794         amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */
2795         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
2796                 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
2797                 uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
2798                 uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
2799
2800                 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
2801                 /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
2802                 amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
2803                                   PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */
2804                                   PACKET3_MAP_QUEUES_VMID(0) | /* VMID */
2805                                   PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
2806                                   PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
2807                                   PACKET3_MAP_QUEUES_ME((ring->me == 1 ? 0 : 1)) |
2808                                   PACKET3_MAP_QUEUES_QUEUE_TYPE(0) | /*queue_type: normal compute queue */
2809                                   PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) | /* alloc format: all_on_one_pipe */
2810                                   PACKET3_MAP_QUEUES_ENGINE_SEL(0) | /* engine_sel: compute */
2811                                   PACKET3_MAP_QUEUES_NUM_QUEUES(1)); /* num_queues: must be 1 */
2812                 amdgpu_ring_write(kiq_ring, PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index));
2813                 amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
2814                 amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
2815                 amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
2816                 amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
2817         }
2818
2819         r = amdgpu_ring_test_helper(kiq_ring);
2820         if (r)
2821                 DRM_ERROR("KCQ enable failed\n");
2822
2823         return r;
2824 }
2825
2826 static int gfx_v9_0_mqd_init(struct amdgpu_ring *ring)
2827 {
2828         struct amdgpu_device *adev = ring->adev;
2829         struct v9_mqd *mqd = ring->mqd_ptr;
2830         uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
2831         uint32_t tmp;
2832
2833         mqd->header = 0xC0310800;
2834         mqd->compute_pipelinestat_enable = 0x00000001;
2835         mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
2836         mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
2837         mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
2838         mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
2839         mqd->compute_misc_reserved = 0x00000003;
2840
2841         mqd->dynamic_cu_mask_addr_lo =
2842                 lower_32_bits(ring->mqd_gpu_addr
2843                               + offsetof(struct v9_mqd_allocation, dynamic_cu_mask));
2844         mqd->dynamic_cu_mask_addr_hi =
2845                 upper_32_bits(ring->mqd_gpu_addr
2846                               + offsetof(struct v9_mqd_allocation, dynamic_cu_mask));
2847
2848         eop_base_addr = ring->eop_gpu_addr >> 8;
2849         mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
2850         mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
2851
2852         /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
2853         tmp = RREG32_SOC15(GC, 0, mmCP_HQD_EOP_CONTROL);
2854         tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
2855                         (order_base_2(GFX9_MEC_HPD_SIZE / 4) - 1));
2856
2857         mqd->cp_hqd_eop_control = tmp;
2858
2859         /* enable doorbell? */
2860         tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
2861
2862         if (ring->use_doorbell) {
2863                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
2864                                     DOORBELL_OFFSET, ring->doorbell_index);
2865                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
2866                                     DOORBELL_EN, 1);
2867                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
2868                                     DOORBELL_SOURCE, 0);
2869                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
2870                                     DOORBELL_HIT, 0);
2871         } else {
2872                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
2873                                          DOORBELL_EN, 0);
2874         }
2875
2876         mqd->cp_hqd_pq_doorbell_control = tmp;
2877
2878         /* disable the queue if it's active */
2879         ring->wptr = 0;
2880         mqd->cp_hqd_dequeue_request = 0;
2881         mqd->cp_hqd_pq_rptr = 0;
2882         mqd->cp_hqd_pq_wptr_lo = 0;
2883         mqd->cp_hqd_pq_wptr_hi = 0;
2884
2885         /* set the pointer to the MQD */
2886         mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
2887         mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
2888
2889         /* set MQD vmid to 0 */
2890         tmp = RREG32_SOC15(GC, 0, mmCP_MQD_CONTROL);
2891         tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
2892         mqd->cp_mqd_control = tmp;
2893
2894         /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
2895         hqd_gpu_addr = ring->gpu_addr >> 8;
2896         mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
2897         mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
2898
2899         /* set up the HQD, this is similar to CP_RB0_CNTL */
2900         tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_CONTROL);
2901         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
2902                             (order_base_2(ring->ring_size / 4) - 1));
2903         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
2904                         ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
2905 #ifdef __BIG_ENDIAN
2906         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
2907 #endif
2908         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
2909         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
2910         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
2911         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
2912         mqd->cp_hqd_pq_control = tmp;
2913
2914         /* set the wb address whether it's enabled or not */
2915         wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
2916         mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
2917         mqd->cp_hqd_pq_rptr_report_addr_hi =
2918                 upper_32_bits(wb_gpu_addr) & 0xffff;
2919
2920         /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
2921         wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
2922         mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
2923         mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
2924
2925         tmp = 0;
2926         /* enable the doorbell if requested */
2927         if (ring->use_doorbell) {
2928                 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
2929                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
2930                                 DOORBELL_OFFSET, ring->doorbell_index);
2931
2932                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
2933                                          DOORBELL_EN, 1);
2934                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
2935                                          DOORBELL_SOURCE, 0);
2936                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
2937                                          DOORBELL_HIT, 0);
2938         }
2939
2940         mqd->cp_hqd_pq_doorbell_control = tmp;
2941
2942         /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
2943         ring->wptr = 0;
2944         mqd->cp_hqd_pq_rptr = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR);
2945
2946         /* set the vmid for the queue */
2947         mqd->cp_hqd_vmid = 0;
2948
2949         tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PERSISTENT_STATE);
2950         tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
2951         mqd->cp_hqd_persistent_state = tmp;
2952
2953         /* set MIN_IB_AVAIL_SIZE */
2954         tmp = RREG32_SOC15(GC, 0, mmCP_HQD_IB_CONTROL);
2955         tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
2956         mqd->cp_hqd_ib_control = tmp;
2957
2958         /* activate the queue */
2959         mqd->cp_hqd_active = 1;
2960
2961         return 0;
2962 }
2963
2964 static int gfx_v9_0_kiq_init_register(struct amdgpu_ring *ring)
2965 {
2966         struct amdgpu_device *adev = ring->adev;
2967         struct v9_mqd *mqd = ring->mqd_ptr;
2968         int j;
2969
2970         /* disable wptr polling */
2971         WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
2972
2973         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR,
2974                mqd->cp_hqd_eop_base_addr_lo);
2975         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI,
2976                mqd->cp_hqd_eop_base_addr_hi);
2977
2978         /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
2979         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_CONTROL,
2980                mqd->cp_hqd_eop_control);
2981
2982         /* enable doorbell? */
2983         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
2984                mqd->cp_hqd_pq_doorbell_control);
2985
2986         /* disable the queue if it's active */
2987         if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) {
2988                 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
2989                 for (j = 0; j < adev->usec_timeout; j++) {
2990                         if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
2991                                 break;
2992                         udelay(1);
2993                 }
2994                 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST,
2995                        mqd->cp_hqd_dequeue_request);
2996                 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR,
2997                        mqd->cp_hqd_pq_rptr);
2998                 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO,
2999                        mqd->cp_hqd_pq_wptr_lo);
3000                 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI,
3001                        mqd->cp_hqd_pq_wptr_hi);
3002         }
3003
3004         /* set the pointer to the MQD */
3005         WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR,
3006                mqd->cp_mqd_base_addr_lo);
3007         WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR_HI,
3008                mqd->cp_mqd_base_addr_hi);
3009
3010         /* set MQD vmid to 0 */
3011         WREG32_SOC15_RLC(GC, 0, mmCP_MQD_CONTROL,
3012                mqd->cp_mqd_control);
3013
3014         /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3015         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE,
3016                mqd->cp_hqd_pq_base_lo);
3017         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE_HI,
3018                mqd->cp_hqd_pq_base_hi);
3019
3020         /* set up the HQD, this is similar to CP_RB0_CNTL */
3021         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_CONTROL,
3022                mqd->cp_hqd_pq_control);
3023
3024         /* set the wb address whether it's enabled or not */
3025         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR,
3026                                 mqd->cp_hqd_pq_rptr_report_addr_lo);
3027         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
3028                                 mqd->cp_hqd_pq_rptr_report_addr_hi);
3029
3030         /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
3031         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR,
3032                mqd->cp_hqd_pq_wptr_poll_addr_lo);
3033         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
3034                mqd->cp_hqd_pq_wptr_poll_addr_hi);
3035
3036         /* enable the doorbell if requested */
3037         if (ring->use_doorbell) {
3038                 WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_LOWER,
3039                                         (adev->doorbell_index.kiq * 2) << 2);
3040                 WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER,
3041                                         (adev->doorbell_index.userqueue_end * 2) << 2);
3042         }
3043
3044         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
3045                mqd->cp_hqd_pq_doorbell_control);
3046
3047         /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3048         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO,
3049                mqd->cp_hqd_pq_wptr_lo);
3050         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI,
3051                mqd->cp_hqd_pq_wptr_hi);
3052
3053         /* set the vmid for the queue */
3054         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_VMID, mqd->cp_hqd_vmid);
3055
3056         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE,
3057                mqd->cp_hqd_persistent_state);
3058
3059         /* activate the queue */
3060         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE,
3061                mqd->cp_hqd_active);
3062
3063         if (ring->use_doorbell)
3064                 WREG32_FIELD15(GC, 0, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
3065
3066         return 0;
3067 }
3068
3069 static int gfx_v9_0_kiq_fini_register(struct amdgpu_ring *ring)
3070 {
3071         struct amdgpu_device *adev = ring->adev;
3072         int j;
3073
3074         /* disable the queue if it's active */
3075         if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) {
3076
3077                 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
3078
3079                 for (j = 0; j < adev->usec_timeout; j++) {
3080                         if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
3081                                 break;
3082                         udelay(1);
3083                 }
3084
3085                 if (j == AMDGPU_MAX_USEC_TIMEOUT) {
3086                         DRM_DEBUG("KIQ dequeue request failed.\n");
3087
3088                         /* Manual disable if dequeue request times out */
3089                         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE, 0);
3090                 }
3091
3092                 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST,
3093                       0);
3094         }
3095
3096         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IQ_TIMER, 0);
3097         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IB_CONTROL, 0);
3098         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE, 0);
3099         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0x40000000);
3100         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0);
3101         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR, 0);
3102         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI, 0);
3103         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO, 0);
3104
3105         return 0;
3106 }
3107
3108 static int gfx_v9_0_kiq_init_queue(struct amdgpu_ring *ring)
3109 {
3110         struct amdgpu_device *adev = ring->adev;
3111         struct v9_mqd *mqd = ring->mqd_ptr;
3112         int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS;
3113
3114         gfx_v9_0_kiq_setting(ring);
3115
3116         if (adev->in_gpu_reset) { /* for GPU_RESET case */
3117                 /* reset MQD to a clean status */
3118                 if (adev->gfx.mec.mqd_backup[mqd_idx])
3119                         memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation));
3120
3121                 /* reset ring buffer */
3122                 ring->wptr = 0;
3123                 amdgpu_ring_clear_ring(ring);
3124
3125                 mutex_lock(&adev->srbm_mutex);
3126                 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3127                 gfx_v9_0_kiq_init_register(ring);
3128                 soc15_grbm_select(adev, 0, 0, 0, 0);
3129                 mutex_unlock(&adev->srbm_mutex);
3130         } else {
3131                 memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation));
3132                 ((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
3133                 ((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
3134                 mutex_lock(&adev->srbm_mutex);
3135                 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3136                 gfx_v9_0_mqd_init(ring);
3137                 gfx_v9_0_kiq_init_register(ring);
3138                 soc15_grbm_select(adev, 0, 0, 0, 0);
3139                 mutex_unlock(&adev->srbm_mutex);
3140
3141                 if (adev->gfx.mec.mqd_backup[mqd_idx])
3142                         memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation));
3143         }
3144
3145         return 0;
3146 }
3147
3148 static int gfx_v9_0_kcq_init_queue(struct amdgpu_ring *ring)
3149 {
3150         struct amdgpu_device *adev = ring->adev;
3151         struct v9_mqd *mqd = ring->mqd_ptr;
3152         int mqd_idx = ring - &adev->gfx.compute_ring[0];
3153
3154         if (!adev->in_gpu_reset && !adev->in_suspend) {
3155                 memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation));
3156                 ((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
3157                 ((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
3158                 mutex_lock(&adev->srbm_mutex);
3159                 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3160                 gfx_v9_0_mqd_init(ring);
3161                 soc15_grbm_select(adev, 0, 0, 0, 0);
3162                 mutex_unlock(&adev->srbm_mutex);
3163
3164                 if (adev->gfx.mec.mqd_backup[mqd_idx])
3165                         memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation));
3166         } else if (adev->in_gpu_reset) { /* for GPU_RESET case */
3167                 /* reset MQD to a clean status */
3168                 if (adev->gfx.mec.mqd_backup[mqd_idx])
3169                         memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation));
3170
3171                 /* reset ring buffer */
3172                 ring->wptr = 0;
3173                 amdgpu_ring_clear_ring(ring);
3174         } else {
3175                 amdgpu_ring_clear_ring(ring);
3176         }
3177
3178         return 0;
3179 }
3180
3181 static int gfx_v9_0_kiq_resume(struct amdgpu_device *adev)
3182 {
3183         struct amdgpu_ring *ring;
3184         int r;
3185
3186         ring = &adev->gfx.kiq.ring;
3187
3188         r = amdgpu_bo_reserve(ring->mqd_obj, false);
3189         if (unlikely(r != 0))
3190                 return r;
3191
3192         r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
3193         if (unlikely(r != 0))
3194                 return r;
3195
3196         gfx_v9_0_kiq_init_queue(ring);
3197         amdgpu_bo_kunmap(ring->mqd_obj);
3198         ring->mqd_ptr = NULL;
3199         amdgpu_bo_unreserve(ring->mqd_obj);
3200         ring->sched.ready = true;
3201         return 0;
3202 }
3203
3204 static int gfx_v9_0_kcq_resume(struct amdgpu_device *adev)
3205 {
3206         struct amdgpu_ring *ring = NULL;
3207         int r = 0, i;
3208
3209         gfx_v9_0_cp_compute_enable(adev, true);
3210
3211         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3212                 ring = &adev->gfx.compute_ring[i];
3213
3214                 r = amdgpu_bo_reserve(ring->mqd_obj, false);
3215                 if (unlikely(r != 0))
3216                         goto done;
3217                 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
3218                 if (!r) {
3219                         r = gfx_v9_0_kcq_init_queue(ring);
3220                         amdgpu_bo_kunmap(ring->mqd_obj);
3221                         ring->mqd_ptr = NULL;
3222                 }
3223                 amdgpu_bo_unreserve(ring->mqd_obj);
3224                 if (r)
3225                         goto done;
3226         }
3227
3228         r = gfx_v9_0_kiq_kcq_enable(adev);
3229 done:
3230         return r;
3231 }
3232
3233 static int gfx_v9_0_cp_resume(struct amdgpu_device *adev)
3234 {
3235         int r, i;
3236         struct amdgpu_ring *ring;
3237
3238         if (!(adev->flags & AMD_IS_APU))
3239                 gfx_v9_0_enable_gui_idle_interrupt(adev, false);
3240
3241         if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
3242                 /* legacy firmware loading */
3243                 r = gfx_v9_0_cp_gfx_load_microcode(adev);
3244                 if (r)
3245                         return r;
3246
3247                 r = gfx_v9_0_cp_compute_load_microcode(adev);
3248                 if (r)
3249                         return r;
3250         }
3251
3252         r = gfx_v9_0_kiq_resume(adev);
3253         if (r)
3254                 return r;
3255
3256         r = gfx_v9_0_cp_gfx_resume(adev);
3257         if (r)
3258                 return r;
3259
3260         r = gfx_v9_0_kcq_resume(adev);
3261         if (r)
3262                 return r;
3263
3264         ring = &adev->gfx.gfx_ring[0];
3265         r = amdgpu_ring_test_helper(ring);
3266         if (r)
3267                 return r;
3268
3269         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3270                 ring = &adev->gfx.compute_ring[i];
3271                 amdgpu_ring_test_helper(ring);
3272         }
3273
3274         gfx_v9_0_enable_gui_idle_interrupt(adev, true);
3275
3276         return 0;
3277 }
3278
3279 static void gfx_v9_0_cp_enable(struct amdgpu_device *adev, bool enable)
3280 {
3281         gfx_v9_0_cp_gfx_enable(adev, enable);
3282         gfx_v9_0_cp_compute_enable(adev, enable);
3283 }
3284
3285 static int gfx_v9_0_hw_init(void *handle)
3286 {
3287         int r;
3288         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3289
3290         gfx_v9_0_init_golden_registers(adev);
3291
3292         gfx_v9_0_constants_init(adev);
3293
3294         r = gfx_v9_0_csb_vram_pin(adev);
3295         if (r)
3296                 return r;
3297
3298         r = adev->gfx.rlc.funcs->resume(adev);
3299         if (r)
3300                 return r;
3301
3302         r = gfx_v9_0_cp_resume(adev);
3303         if (r)
3304                 return r;
3305
3306         r = gfx_v9_0_ngg_en(adev);
3307         if (r)
3308                 return r;
3309
3310         return r;
3311 }
3312
3313 static int gfx_v9_0_kcq_disable(struct amdgpu_device *adev)
3314 {
3315         int r, i;
3316         struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
3317
3318         r = amdgpu_ring_alloc(kiq_ring, 6 * adev->gfx.num_compute_rings);
3319         if (r)
3320                 DRM_ERROR("Failed to lock KIQ (%d).\n", r);
3321
3322         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3323                 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
3324
3325                 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
3326                 amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
3327                                                 PACKET3_UNMAP_QUEUES_ACTION(1) | /* RESET_QUEUES */
3328                                                 PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) |
3329                                                 PACKET3_UNMAP_QUEUES_ENGINE_SEL(0) |
3330                                                 PACKET3_UNMAP_QUEUES_NUM_QUEUES(1));
3331                 amdgpu_ring_write(kiq_ring, PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
3332                 amdgpu_ring_write(kiq_ring, 0);
3333                 amdgpu_ring_write(kiq_ring, 0);
3334                 amdgpu_ring_write(kiq_ring, 0);
3335         }
3336         r = amdgpu_ring_test_helper(kiq_ring);
3337         if (r)
3338                 DRM_ERROR("KCQ disable failed\n");
3339
3340         return r;
3341 }
3342
3343 static int gfx_v9_0_hw_fini(void *handle)
3344 {
3345         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3346
3347         amdgpu_irq_put(adev, &adev->gfx.cp_ecc_error_irq, 0);
3348         amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
3349         amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
3350
3351         /* disable KCQ to avoid CPC touch memory not valid anymore */
3352         gfx_v9_0_kcq_disable(adev);
3353
3354         if (amdgpu_sriov_vf(adev)) {
3355                 gfx_v9_0_cp_gfx_enable(adev, false);
3356                 /* must disable polling for SRIOV when hw finished, otherwise
3357                  * CPC engine may still keep fetching WB address which is already
3358                  * invalid after sw finished and trigger DMAR reading error in
3359                  * hypervisor side.
3360                  */
3361                 WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
3362                 return 0;
3363         }
3364
3365         /* Use deinitialize sequence from CAIL when unbinding device from driver,
3366          * otherwise KIQ is hanging when binding back
3367          */
3368         if (!adev->in_gpu_reset && !adev->in_suspend) {
3369                 mutex_lock(&adev->srbm_mutex);
3370                 soc15_grbm_select(adev, adev->gfx.kiq.ring.me,
3371                                 adev->gfx.kiq.ring.pipe,
3372                                 adev->gfx.kiq.ring.queue, 0);
3373                 gfx_v9_0_kiq_fini_register(&adev->gfx.kiq.ring);
3374                 soc15_grbm_select(adev, 0, 0, 0, 0);
3375                 mutex_unlock(&adev->srbm_mutex);
3376         }
3377
3378         gfx_v9_0_cp_enable(adev, false);
3379         adev->gfx.rlc.funcs->stop(adev);
3380
3381         gfx_v9_0_csb_vram_unpin(adev);
3382
3383         return 0;
3384 }
3385
3386 static int gfx_v9_0_suspend(void *handle)
3387 {
3388         return gfx_v9_0_hw_fini(handle);
3389 }
3390
3391 static int gfx_v9_0_resume(void *handle)
3392 {
3393         return gfx_v9_0_hw_init(handle);
3394 }
3395
3396 static bool gfx_v9_0_is_idle(void *handle)
3397 {
3398         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3399
3400         if (REG_GET_FIELD(RREG32_SOC15(GC, 0, mmGRBM_STATUS),
3401                                 GRBM_STATUS, GUI_ACTIVE))
3402                 return false;
3403         else
3404                 return true;
3405 }
3406
3407 static int gfx_v9_0_wait_for_idle(void *handle)
3408 {
3409         unsigned i;
3410         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3411
3412         for (i = 0; i < adev->usec_timeout; i++) {
3413                 if (gfx_v9_0_is_idle(handle))
3414                         return 0;
3415                 udelay(1);
3416         }
3417         return -ETIMEDOUT;
3418 }
3419
3420 static int gfx_v9_0_soft_reset(void *handle)
3421 {
3422         u32 grbm_soft_reset = 0;
3423         u32 tmp;
3424         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3425
3426         /* GRBM_STATUS */
3427         tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS);
3428         if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
3429                    GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
3430                    GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
3431                    GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
3432                    GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
3433                    GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK)) {
3434                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
3435                                                 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
3436                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
3437                                                 GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
3438         }
3439
3440         if (tmp & (GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
3441                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
3442                                                 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
3443         }
3444
3445         /* GRBM_STATUS2 */
3446         tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS2);
3447         if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
3448                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
3449                                                 GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
3450
3451
3452         if (grbm_soft_reset) {
3453                 /* stop the rlc */
3454                 adev->gfx.rlc.funcs->stop(adev);
3455
3456                 /* Disable GFX parsing/prefetching */
3457                 gfx_v9_0_cp_gfx_enable(adev, false);
3458
3459                 /* Disable MEC parsing/prefetching */
3460                 gfx_v9_0_cp_compute_enable(adev, false);
3461
3462                 if (grbm_soft_reset) {
3463                         tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
3464                         tmp |= grbm_soft_reset;
3465                         dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
3466                         WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
3467                         tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
3468
3469                         udelay(50);
3470
3471                         tmp &= ~grbm_soft_reset;
3472                         WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
3473                         tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
3474                 }
3475
3476                 /* Wait a little for things to settle down */
3477                 udelay(50);
3478         }
3479         return 0;
3480 }
3481
3482 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev)
3483 {
3484         uint64_t clock;
3485
3486         mutex_lock(&adev->gfx.gpu_clock_mutex);
3487         WREG32_SOC15(GC, 0, mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
3488         clock = (uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_LSB) |
3489                 ((uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
3490         mutex_unlock(&adev->gfx.gpu_clock_mutex);
3491         return clock;
3492 }
3493
3494 static void gfx_v9_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
3495                                           uint32_t vmid,
3496                                           uint32_t gds_base, uint32_t gds_size,
3497                                           uint32_t gws_base, uint32_t gws_size,
3498                                           uint32_t oa_base, uint32_t oa_size)
3499 {
3500         struct amdgpu_device *adev = ring->adev;
3501
3502         /* GDS Base */
3503         gfx_v9_0_write_data_to_reg(ring, 0, false,
3504                                    SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_BASE) + 2 * vmid,
3505                                    gds_base);
3506
3507         /* GDS Size */
3508         gfx_v9_0_write_data_to_reg(ring, 0, false,
3509                                    SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE) + 2 * vmid,
3510                                    gds_size);
3511
3512         /* GWS */
3513         gfx_v9_0_write_data_to_reg(ring, 0, false,
3514                                    SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID0) + vmid,
3515                                    gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
3516
3517         /* OA */
3518         gfx_v9_0_write_data_to_reg(ring, 0, false,
3519                                    SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID0) + vmid,
3520                                    (1 << (oa_size + oa_base)) - (1 << oa_base));
3521 }
3522
3523 static const u32 vgpr_init_compute_shader[] =
3524 {
3525         0xb07c0000, 0xbe8000ff,
3526         0x000000f8, 0xbf110800,
3527         0x7e000280, 0x7e020280,
3528         0x7e040280, 0x7e060280,
3529         0x7e080280, 0x7e0a0280,
3530         0x7e0c0280, 0x7e0e0280,
3531         0x80808800, 0xbe803200,
3532         0xbf84fff5, 0xbf9c0000,
3533         0xd28c0001, 0x0001007f,
3534         0xd28d0001, 0x0002027e,
3535         0x10020288, 0xb8810904,
3536         0xb7814000, 0xd1196a01,
3537         0x00000301, 0xbe800087,
3538         0xbefc00c1, 0xd89c4000,
3539         0x00020201, 0xd89cc080,
3540         0x00040401, 0x320202ff,
3541         0x00000800, 0x80808100,
3542         0xbf84fff8, 0x7e020280,
3543         0xbf810000, 0x00000000,
3544 };
3545
3546 static const u32 sgpr_init_compute_shader[] =
3547 {
3548         0xb07c0000, 0xbe8000ff,
3549         0x0000005f, 0xbee50080,
3550         0xbe812c65, 0xbe822c65,
3551         0xbe832c65, 0xbe842c65,
3552         0xbe852c65, 0xb77c0005,
3553         0x80808500, 0xbf84fff8,
3554         0xbe800080, 0xbf810000,
3555 };
3556
3557 static const struct soc15_reg_entry vgpr_init_regs[] = {
3558    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff },
3559    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff },
3560    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff },
3561    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff },
3562    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x1000000 }, /* CU_GROUP_COUNT=1 */
3563    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 256*2 },
3564    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 1 },
3565    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
3566    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x100007f }, /* VGPRS=15 (256 logical VGPRs, SGPRS=1 (16 SGPRs, BULKY=1 */
3567    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x400000 },  /* 64KB LDS */
3568 };
3569
3570 static const struct soc15_reg_entry sgpr_init_regs[] = {
3571    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff },
3572    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff },
3573    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff },
3574    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff },
3575    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x1000000 }, /* CU_GROUP_COUNT=1 */
3576    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 256*2 },
3577    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 1 },
3578    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
3579    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x340 }, /* SGPRS=13 (112 GPRS) */
3580    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x0 },
3581 };
3582
3583 static const struct soc15_reg_entry sec_ded_counter_registers[] = {
3584    { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT), 0, 1, 1},
3585    { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT), 0, 1, 1},
3586    { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT), 0, 1, 1},
3587    { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT), 0, 1, 1},
3588    { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT), 0, 1, 1},
3589    { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT), 0, 1, 1},
3590    { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT), 0, 1, 1},
3591    { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT), 0, 1, 1},
3592    { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT), 0, 1, 1},
3593    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT), 0, 1, 1},
3594    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_GRBM_CNT), 0, 1, 1},
3595    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_DED), 0, 1, 1},
3596    { SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT), 0, 4, 1},
3597    { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 0, 4, 6},
3598    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_DED_CNT), 0, 4, 16},
3599    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_INFO), 0, 4, 16},
3600    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_SEC_CNT), 0, 4, 16},
3601    { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 1, 16},
3602    { SOC15_REG_ENTRY(GC, 0, mmTCP_ATC_EDC_GATCL1_CNT), 0, 4, 16},
3603    { SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT), 0, 4, 16},
3604    { SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 0, 4, 16},
3605    { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 0, 4, 6},
3606    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 0, 4, 16},
3607    { SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 0, 4, 16},
3608    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), 0, 1, 1},
3609    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 0, 1, 1},
3610    { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 1, 32},
3611    { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 1, 32},
3612    { SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT), 0, 1, 72},
3613    { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0, 1, 16},
3614    { SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT), 0, 1, 2},
3615    { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 0, 4, 6},
3616 };
3617
3618 static int gfx_v9_0_do_edc_gds_workarounds(struct amdgpu_device *adev)
3619 {
3620         struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
3621         int i, r;
3622
3623         r = amdgpu_ring_alloc(ring, 7);
3624         if (r) {
3625                 DRM_ERROR("amdgpu: GDS workarounds failed to lock ring %s (%d).\n",
3626                         ring->name, r);
3627                 return r;
3628         }
3629
3630         WREG32_SOC15(GC, 0, mmGDS_VMID0_BASE, 0x00000000);
3631         WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, adev->gds.gds_size);
3632
3633         amdgpu_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
3634         amdgpu_ring_write(ring, (PACKET3_DMA_DATA_CP_SYNC |
3635                                 PACKET3_DMA_DATA_DST_SEL(1) |
3636                                 PACKET3_DMA_DATA_SRC_SEL(2) |
3637                                 PACKET3_DMA_DATA_ENGINE(0)));
3638         amdgpu_ring_write(ring, 0);
3639         amdgpu_ring_write(ring, 0);
3640         amdgpu_ring_write(ring, 0);
3641         amdgpu_ring_write(ring, 0);
3642         amdgpu_ring_write(ring, PACKET3_DMA_DATA_CMD_RAW_WAIT |
3643                                 adev->gds.gds_size);
3644
3645         amdgpu_ring_commit(ring);
3646
3647         for (i = 0; i < adev->usec_timeout; i++) {
3648                 if (ring->wptr == gfx_v9_0_ring_get_rptr_compute(ring))
3649                         break;
3650                 udelay(1);
3651         }
3652
3653         if (i >= adev->usec_timeout)
3654                 r = -ETIMEDOUT;
3655
3656         WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, 0x00000000);
3657
3658         return r;
3659 }
3660
3661 static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
3662 {
3663         struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
3664         struct amdgpu_ib ib;
3665         struct dma_fence *f = NULL;
3666         int r, i, j, k;
3667         unsigned total_size, vgpr_offset, sgpr_offset;
3668         u64 gpu_addr;
3669
3670         /* only support when RAS is enabled */
3671         if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
3672                 return 0;
3673
3674         /* bail if the compute ring is not ready */
3675         if (!ring->sched.ready)
3676                 return 0;
3677
3678         total_size =
3679                 ((ARRAY_SIZE(vgpr_init_regs) * 3) + 4 + 5 + 2) * 4;
3680         total_size +=
3681                 ((ARRAY_SIZE(sgpr_init_regs) * 3) + 4 + 5 + 2) * 4;
3682         total_size = ALIGN(total_size, 256);
3683         vgpr_offset = total_size;
3684         total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256);
3685         sgpr_offset = total_size;
3686         total_size += sizeof(sgpr_init_compute_shader);
3687
3688         /* allocate an indirect buffer to put the commands in */
3689         memset(&ib, 0, sizeof(ib));
3690         r = amdgpu_ib_get(adev, NULL, total_size, &ib);
3691         if (r) {
3692                 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
3693                 return r;
3694         }
3695
3696         /* load the compute shaders */
3697         for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++)
3698                 ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i];
3699
3700         for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
3701                 ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
3702
3703         /* init the ib length to 0 */
3704         ib.length_dw = 0;
3705
3706         /* VGPR */
3707         /* write the register state for the compute dispatch */
3708         for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i++) {
3709                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
3710                 ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(vgpr_init_regs[i])
3711                                                                 - PACKET3_SET_SH_REG_START;
3712                 ib.ptr[ib.length_dw++] = vgpr_init_regs[i].reg_value;
3713         }
3714         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
3715         gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
3716         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
3717         ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
3718                                                         - PACKET3_SET_SH_REG_START;
3719         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
3720         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
3721
3722         /* write dispatch packet */
3723         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
3724         ib.ptr[ib.length_dw++] = 128; /* x */
3725         ib.ptr[ib.length_dw++] = 1; /* y */
3726         ib.ptr[ib.length_dw++] = 1; /* z */
3727         ib.ptr[ib.length_dw++] =
3728                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
3729
3730         /* write CS partial flush packet */
3731         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
3732         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
3733
3734         /* SGPR */
3735         /* write the register state for the compute dispatch */
3736         for (i = 0; i < ARRAY_SIZE(sgpr_init_regs); i++) {
3737                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
3738                 ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(sgpr_init_regs[i])
3739                                                                 - PACKET3_SET_SH_REG_START;
3740                 ib.ptr[ib.length_dw++] = sgpr_init_regs[i].reg_value;
3741         }
3742         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
3743         gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
3744         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
3745         ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
3746                                                         - PACKET3_SET_SH_REG_START;
3747         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
3748         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
3749
3750         /* write dispatch packet */
3751         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
3752         ib.ptr[ib.length_dw++] = 128; /* x */
3753         ib.ptr[ib.length_dw++] = 1; /* y */
3754         ib.ptr[ib.length_dw++] = 1; /* z */
3755         ib.ptr[ib.length_dw++] =
3756                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
3757
3758         /* write CS partial flush packet */
3759         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
3760         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
3761
3762         /* shedule the ib on the ring */
3763         r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
3764         if (r) {
3765                 DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
3766                 goto fail;
3767         }
3768
3769         /* wait for the GPU to finish processing the IB */
3770         r = dma_fence_wait(f, false);
3771         if (r) {
3772                 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
3773                 goto fail;
3774         }
3775
3776         /* read back registers to clear the counters */
3777         mutex_lock(&adev->grbm_idx_mutex);
3778         for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++) {
3779                 for (j = 0; j < sec_ded_counter_registers[i].se_num; j++) {
3780                         for (k = 0; k < sec_ded_counter_registers[i].instance; k++) {
3781                                 gfx_v9_0_select_se_sh(adev, j, 0x0, k);
3782                                 RREG32(SOC15_REG_ENTRY_OFFSET(sec_ded_counter_registers[i]));
3783                         }
3784                 }
3785         }
3786         WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, 0xe0000000);
3787         mutex_unlock(&adev->grbm_idx_mutex);
3788
3789 fail:
3790         amdgpu_ib_free(adev, &ib, NULL);
3791         dma_fence_put(f);
3792
3793         return r;
3794 }
3795
3796 static int gfx_v9_0_early_init(void *handle)
3797 {
3798         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3799
3800         adev->gfx.num_gfx_rings = GFX9_NUM_GFX_RINGS;
3801         adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
3802         gfx_v9_0_set_ring_funcs(adev);
3803         gfx_v9_0_set_irq_funcs(adev);
3804         gfx_v9_0_set_gds_init(adev);
3805         gfx_v9_0_set_rlc_funcs(adev);
3806
3807         return 0;
3808 }
3809
3810 static int gfx_v9_0_process_ras_data_cb(struct amdgpu_device *adev,
3811                 struct amdgpu_iv_entry *entry);
3812
3813 static int gfx_v9_0_ecc_late_init(void *handle)
3814 {
3815         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3816         struct ras_common_if **ras_if = &adev->gfx.ras_if;
3817         struct ras_ih_if ih_info = {
3818                 .cb = gfx_v9_0_process_ras_data_cb,
3819         };
3820         struct ras_fs_if fs_info = {
3821                 .sysfs_name = "gfx_err_count",
3822                 .debugfs_name = "gfx_err_inject",
3823         };
3824         struct ras_common_if ras_block = {
3825                 .block = AMDGPU_RAS_BLOCK__GFX,
3826                 .type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE,
3827                 .sub_block_index = 0,
3828                 .name = "gfx",
3829         };
3830         int r;
3831
3832         if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) {
3833                 amdgpu_ras_feature_enable_on_boot(adev, &ras_block, 0);
3834                 return 0;
3835         }
3836
3837         r = gfx_v9_0_do_edc_gds_workarounds(adev);
3838         if (r)
3839                 return r;
3840
3841         /* requires IBs so do in late init after IB pool is initialized */
3842         r = gfx_v9_0_do_edc_gpr_workarounds(adev);
3843         if (r)
3844                 return r;
3845
3846         /* handle resume path. */
3847         if (*ras_if) {
3848                 /* resend ras TA enable cmd during resume.
3849                  * prepare to handle failure.
3850                  */
3851                 ih_info.head = **ras_if;
3852                 r = amdgpu_ras_feature_enable_on_boot(adev, *ras_if, 1);
3853                 if (r) {
3854                         if (r == -EAGAIN) {
3855                                 /* request a gpu reset. will run again. */
3856                                 amdgpu_ras_request_reset_on_boot(adev,
3857                                                 AMDGPU_RAS_BLOCK__GFX);
3858                                 return 0;
3859                         }
3860                         /* fail to enable ras, cleanup all. */
3861                         goto irq;
3862                 }
3863                 /* enable successfully. continue. */
3864                 goto resume;
3865         }
3866
3867         *ras_if = kmalloc(sizeof(**ras_if), GFP_KERNEL);
3868         if (!*ras_if)
3869                 return -ENOMEM;
3870
3871         **ras_if = ras_block;
3872
3873         r = amdgpu_ras_feature_enable_on_boot(adev, *ras_if, 1);
3874         if (r) {
3875                 if (r == -EAGAIN) {
3876                         amdgpu_ras_request_reset_on_boot(adev,
3877                                         AMDGPU_RAS_BLOCK__GFX);
3878                         r = 0;
3879                 }
3880                 goto feature;
3881         }
3882
3883         ih_info.head = **ras_if;
3884         fs_info.head = **ras_if;
3885
3886         r = amdgpu_ras_interrupt_add_handler(adev, &ih_info);
3887         if (r)
3888                 goto interrupt;
3889
3890         amdgpu_ras_debugfs_create(adev, &fs_info);
3891
3892         r = amdgpu_ras_sysfs_create(adev, &fs_info);
3893         if (r)
3894                 goto sysfs;
3895 resume:
3896         r = amdgpu_irq_get(adev, &adev->gfx.cp_ecc_error_irq, 0);
3897         if (r)
3898                 goto irq;
3899
3900         return 0;
3901 irq:
3902         amdgpu_ras_sysfs_remove(adev, *ras_if);
3903 sysfs:
3904         amdgpu_ras_debugfs_remove(adev, *ras_if);
3905         amdgpu_ras_interrupt_remove_handler(adev, &ih_info);
3906 interrupt:
3907         amdgpu_ras_feature_enable(adev, *ras_if, 0);
3908 feature:
3909         kfree(*ras_if);
3910         *ras_if = NULL;
3911         return r;
3912 }
3913
3914 static int gfx_v9_0_late_init(void *handle)
3915 {
3916         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3917         int r;
3918
3919         r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
3920         if (r)
3921                 return r;
3922
3923         r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
3924         if (r)
3925                 return r;
3926
3927         r = gfx_v9_0_ecc_late_init(handle);
3928         if (r)
3929                 return r;
3930
3931         return 0;
3932 }
3933
3934 static bool gfx_v9_0_is_rlc_enabled(struct amdgpu_device *adev)
3935 {
3936         uint32_t rlc_setting;
3937
3938         /* if RLC is not enabled, do nothing */
3939         rlc_setting = RREG32_SOC15(GC, 0, mmRLC_CNTL);
3940         if (!(rlc_setting & RLC_CNTL__RLC_ENABLE_F32_MASK))
3941                 return false;
3942
3943         return true;
3944 }
3945
3946 static void gfx_v9_0_set_safe_mode(struct amdgpu_device *adev)
3947 {
3948         uint32_t data;
3949         unsigned i;
3950
3951         data = RLC_SAFE_MODE__CMD_MASK;
3952         data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
3953         WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data);
3954
3955         /* wait for RLC_SAFE_MODE */
3956         for (i = 0; i < adev->usec_timeout; i++) {
3957                 if (!REG_GET_FIELD(RREG32_SOC15(GC, 0, mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
3958                         break;
3959                 udelay(1);
3960         }
3961 }
3962
3963 static void gfx_v9_0_unset_safe_mode(struct amdgpu_device *adev)
3964 {
3965         uint32_t data;
3966
3967         data = RLC_SAFE_MODE__CMD_MASK;
3968         WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data);
3969 }
3970
3971 static void gfx_v9_0_update_gfx_cg_power_gating(struct amdgpu_device *adev,
3972                                                 bool enable)
3973 {
3974         amdgpu_gfx_rlc_enter_safe_mode(adev);
3975
3976         if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
3977                 gfx_v9_0_enable_gfx_cg_power_gating(adev, true);
3978                 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
3979                         gfx_v9_0_enable_gfx_pipeline_powergating(adev, true);
3980         } else {
3981                 gfx_v9_0_enable_gfx_cg_power_gating(adev, false);
3982                 gfx_v9_0_enable_gfx_pipeline_powergating(adev, false);
3983         }
3984
3985         amdgpu_gfx_rlc_exit_safe_mode(adev);
3986 }
3987
3988 static void gfx_v9_0_update_gfx_mg_power_gating(struct amdgpu_device *adev,
3989                                                 bool enable)
3990 {
3991         /* TODO: double check if we need to perform under safe mode */
3992         /* gfx_v9_0_enter_rlc_safe_mode(adev); */
3993
3994         if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
3995                 gfx_v9_0_enable_gfx_static_mg_power_gating(adev, true);
3996         else
3997                 gfx_v9_0_enable_gfx_static_mg_power_gating(adev, false);
3998
3999         if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
4000                 gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, true);
4001         else
4002                 gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, false);
4003
4004         /* gfx_v9_0_exit_rlc_safe_mode(adev); */
4005 }
4006
4007 static void gfx_v9_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
4008                                                       bool enable)
4009 {
4010         uint32_t data, def;
4011
4012         amdgpu_gfx_rlc_enter_safe_mode(adev);
4013
4014         /* It is disabled by HW by default */
4015         if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
4016                 /* 1 - RLC_CGTT_MGCG_OVERRIDE */
4017                 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4018
4019                 if (adev->asic_type != CHIP_VEGA12)
4020                         data &= ~RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK;
4021
4022                 data &= ~(RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
4023                           RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK |
4024                           RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK);
4025
4026                 /* only for Vega10 & Raven1 */
4027                 data |= RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK;
4028
4029                 if (def != data)
4030                         WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4031
4032                 /* MGLS is a global flag to control all MGLS in GFX */
4033                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
4034                         /* 2 - RLC memory Light sleep */
4035                         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
4036                                 def = data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
4037                                 data |= RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
4038                                 if (def != data)
4039                                         WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data);
4040                         }
4041                         /* 3 - CP memory Light sleep */
4042                         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
4043                                 def = data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
4044                                 data |= CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
4045                                 if (def != data)
4046                                         WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data);
4047                         }
4048                 }
4049         } else {
4050                 /* 1 - MGCG_OVERRIDE */
4051                 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4052
4053                 if (adev->asic_type != CHIP_VEGA12)
4054                         data |= RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK;
4055
4056                 data |= (RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK |
4057                          RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
4058                          RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK |
4059                          RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK);
4060
4061                 if (def != data)
4062                         WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4063
4064                 /* 2 - disable MGLS in RLC */
4065                 data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
4066                 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
4067                         data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
4068                         WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data);
4069                 }
4070
4071                 /* 3 - disable MGLS in CP */
4072                 data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
4073                 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
4074                         data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
4075                         WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data);
4076                 }
4077         }
4078
4079         amdgpu_gfx_rlc_exit_safe_mode(adev);
4080 }
4081
4082 static void gfx_v9_0_update_3d_clock_gating(struct amdgpu_device *adev,
4083                                            bool enable)
4084 {
4085         uint32_t data, def;
4086
4087         amdgpu_gfx_rlc_enter_safe_mode(adev);
4088
4089         /* Enable 3D CGCG/CGLS */
4090         if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG)) {
4091                 /* write cmd to clear cgcg/cgls ov */
4092                 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4093                 /* unset CGCG override */
4094                 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_GFX3D_CG_OVERRIDE_MASK;
4095                 /* update CGCG and CGLS override bits */
4096                 if (def != data)
4097                         WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4098
4099                 /* enable 3Dcgcg FSM(0x0000363f) */
4100                 def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
4101
4102                 data = (0x36 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
4103                         RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK;
4104                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS)
4105                         data |= (0x000F << RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
4106                                 RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK;
4107                 if (def != data)
4108                         WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data);
4109
4110                 /* set IDLE_POLL_COUNT(0x00900100) */
4111                 def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL);
4112                 data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
4113                         (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
4114                 if (def != data)
4115                         WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data);
4116         } else {
4117                 /* Disable CGCG/CGLS */
4118                 def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
4119                 /* disable cgcg, cgls should be disabled */
4120                 data &= ~(RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK |
4121                           RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK);
4122                 /* disable cgcg and cgls in FSM */
4123                 if (def != data)
4124                         WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data);
4125         }
4126
4127         amdgpu_gfx_rlc_exit_safe_mode(adev);
4128 }
4129
4130 static void gfx_v9_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
4131                                                       bool enable)
4132 {
4133         uint32_t def, data;
4134
4135         amdgpu_gfx_rlc_enter_safe_mode(adev);
4136
4137         if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
4138                 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4139                 /* unset CGCG override */
4140                 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGCG_OVERRIDE_MASK;
4141                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
4142                         data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
4143                 else
4144                         data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
4145                 /* update CGCG and CGLS override bits */
4146                 if (def != data)
4147                         WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4148
4149                 /* enable cgcg FSM(0x0000363F) */
4150                 def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
4151
4152                 data = (0x36 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
4153                         RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
4154                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
4155                         data |= (0x000F << RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
4156                                 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
4157                 if (def != data)
4158                         WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data);
4159
4160                 /* set IDLE_POLL_COUNT(0x00900100) */
4161                 def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL);
4162                 data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
4163                         (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
4164                 if (def != data)
4165                         WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data);
4166         } else {
4167                 def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
4168                 /* reset CGCG/CGLS bits */
4169                 data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
4170                 /* disable cgcg and cgls in FSM */
4171                 if (def != data)
4172                         WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data);
4173         }
4174
4175         amdgpu_gfx_rlc_exit_safe_mode(adev);
4176 }
4177
4178 static int gfx_v9_0_update_gfx_clock_gating(struct amdgpu_device *adev,
4179                                             bool enable)
4180 {
4181         if (enable) {
4182                 /* CGCG/CGLS should be enabled after MGCG/MGLS
4183                  * ===  MGCG + MGLS ===
4184                  */
4185                 gfx_v9_0_update_medium_grain_clock_gating(adev, enable);
4186                 /* ===  CGCG /CGLS for GFX 3D Only === */
4187                 gfx_v9_0_update_3d_clock_gating(adev, enable);
4188                 /* ===  CGCG + CGLS === */
4189                 gfx_v9_0_update_coarse_grain_clock_gating(adev, enable);
4190         } else {
4191                 /* CGCG/CGLS should be disabled before MGCG/MGLS
4192                  * ===  CGCG + CGLS ===
4193                  */
4194                 gfx_v9_0_update_coarse_grain_clock_gating(adev, enable);
4195                 /* ===  CGCG /CGLS for GFX 3D Only === */
4196                 gfx_v9_0_update_3d_clock_gating(adev, enable);
4197                 /* ===  MGCG + MGLS === */
4198                 gfx_v9_0_update_medium_grain_clock_gating(adev, enable);
4199         }
4200         return 0;
4201 }
4202
4203 static const struct amdgpu_rlc_funcs gfx_v9_0_rlc_funcs = {
4204         .is_rlc_enabled = gfx_v9_0_is_rlc_enabled,
4205         .set_safe_mode = gfx_v9_0_set_safe_mode,
4206         .unset_safe_mode = gfx_v9_0_unset_safe_mode,
4207         .init = gfx_v9_0_rlc_init,
4208         .get_csb_size = gfx_v9_0_get_csb_size,
4209         .get_csb_buffer = gfx_v9_0_get_csb_buffer,
4210         .get_cp_table_num = gfx_v9_0_cp_jump_table_num,
4211         .resume = gfx_v9_0_rlc_resume,
4212         .stop = gfx_v9_0_rlc_stop,
4213         .reset = gfx_v9_0_rlc_reset,
4214         .start = gfx_v9_0_rlc_start
4215 };
4216
4217 static int gfx_v9_0_set_powergating_state(void *handle,
4218                                           enum amd_powergating_state state)
4219 {
4220         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4221         bool enable = (state == AMD_PG_STATE_GATE) ? true : false;
4222
4223         switch (adev->asic_type) {
4224         case CHIP_RAVEN:
4225                 if (!enable) {
4226                         amdgpu_gfx_off_ctrl(adev, false);
4227                         cancel_delayed_work_sync(&adev->gfx.gfx_off_delay_work);
4228                 }
4229                 if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
4230                         gfx_v9_0_enable_sck_slow_down_on_power_up(adev, true);
4231                         gfx_v9_0_enable_sck_slow_down_on_power_down(adev, true);
4232                 } else {
4233                         gfx_v9_0_enable_sck_slow_down_on_power_up(adev, false);
4234                         gfx_v9_0_enable_sck_slow_down_on_power_down(adev, false);
4235                 }
4236
4237                 if (adev->pg_flags & AMD_PG_SUPPORT_CP)
4238                         gfx_v9_0_enable_cp_power_gating(adev, true);
4239                 else
4240                         gfx_v9_0_enable_cp_power_gating(adev, false);
4241
4242                 /* update gfx cgpg state */
4243                 gfx_v9_0_update_gfx_cg_power_gating(adev, enable);
4244
4245                 /* update mgcg state */
4246                 gfx_v9_0_update_gfx_mg_power_gating(adev, enable);
4247
4248                 if (enable)
4249                         amdgpu_gfx_off_ctrl(adev, true);
4250                 break;
4251         case CHIP_VEGA12:
4252                 if (!enable) {
4253                         amdgpu_gfx_off_ctrl(adev, false);
4254                         cancel_delayed_work_sync(&adev->gfx.gfx_off_delay_work);
4255                 } else {
4256                         amdgpu_gfx_off_ctrl(adev, true);
4257                 }
4258                 break;
4259         default:
4260                 break;
4261         }
4262
4263         return 0;
4264 }
4265
4266 static int gfx_v9_0_set_clockgating_state(void *handle,
4267                                           enum amd_clockgating_state state)
4268 {
4269         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4270
4271         if (amdgpu_sriov_vf(adev))
4272                 return 0;
4273
4274         switch (adev->asic_type) {
4275         case CHIP_VEGA10:
4276         case CHIP_VEGA12:
4277         case CHIP_VEGA20:
4278         case CHIP_RAVEN:
4279                 gfx_v9_0_update_gfx_clock_gating(adev,
4280                                                  state == AMD_CG_STATE_GATE ? true : false);
4281                 break;
4282         default:
4283                 break;
4284         }
4285         return 0;
4286 }
4287
4288 static void gfx_v9_0_get_clockgating_state(void *handle, u32 *flags)
4289 {
4290         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4291         int data;
4292
4293         if (amdgpu_sriov_vf(adev))
4294                 *flags = 0;
4295
4296         /* AMD_CG_SUPPORT_GFX_MGCG */
4297         data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4298         if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK))
4299                 *flags |= AMD_CG_SUPPORT_GFX_MGCG;
4300
4301         /* AMD_CG_SUPPORT_GFX_CGCG */
4302         data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
4303         if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
4304                 *flags |= AMD_CG_SUPPORT_GFX_CGCG;
4305
4306         /* AMD_CG_SUPPORT_GFX_CGLS */
4307         if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
4308                 *flags |= AMD_CG_SUPPORT_GFX_CGLS;
4309
4310         /* AMD_CG_SUPPORT_GFX_RLC_LS */
4311         data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
4312         if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK)
4313                 *flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS;
4314
4315         /* AMD_CG_SUPPORT_GFX_CP_LS */
4316         data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
4317         if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK)
4318                 *flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS;
4319
4320         /* AMD_CG_SUPPORT_GFX_3D_CGCG */
4321         data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
4322         if (data & RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK)
4323                 *flags |= AMD_CG_SUPPORT_GFX_3D_CGCG;
4324
4325         /* AMD_CG_SUPPORT_GFX_3D_CGLS */
4326         if (data & RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK)
4327                 *flags |= AMD_CG_SUPPORT_GFX_3D_CGLS;
4328 }
4329
4330 static u64 gfx_v9_0_ring_get_rptr_gfx(struct amdgpu_ring *ring)
4331 {
4332         return ring->adev->wb.wb[ring->rptr_offs]; /* gfx9 is 32bit rptr*/
4333 }
4334
4335 static u64 gfx_v9_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
4336 {
4337         struct amdgpu_device *adev = ring->adev;
4338         u64 wptr;
4339
4340         /* XXX check if swapping is necessary on BE */
4341         if (ring->use_doorbell) {
4342                 wptr = atomic64_read((atomic64_t *)&adev->wb.wb[ring->wptr_offs]);
4343         } else {
4344                 wptr = RREG32_SOC15(GC, 0, mmCP_RB0_WPTR);
4345                 wptr += (u64)RREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI) << 32;
4346         }
4347
4348         return wptr;
4349 }
4350
4351 static void gfx_v9_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
4352 {
4353         struct amdgpu_device *adev = ring->adev;
4354
4355         if (ring->use_doorbell) {
4356                 /* XXX check if swapping is necessary on BE */
4357                 atomic64_set((atomic64_t*)&adev->wb.wb[ring->wptr_offs], ring->wptr);
4358                 WDOORBELL64(ring->doorbell_index, ring->wptr);
4359         } else {
4360                 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
4361                 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
4362         }
4363 }
4364
4365 static void gfx_v9_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
4366 {
4367         struct amdgpu_device *adev = ring->adev;
4368         u32 ref_and_mask, reg_mem_engine;
4369         const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio_funcs->hdp_flush_reg;
4370
4371         if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
4372                 switch (ring->me) {
4373                 case 1:
4374                         ref_and_mask = nbio_hf_reg->ref_and_mask_cp2 << ring->pipe;
4375                         break;
4376                 case 2:
4377                         ref_and_mask = nbio_hf_reg->ref_and_mask_cp6 << ring->pipe;
4378                         break;
4379                 default:
4380                         return;
4381                 }
4382                 reg_mem_engine = 0;
4383         } else {
4384                 ref_and_mask = nbio_hf_reg->ref_and_mask_cp0;
4385                 reg_mem_engine = 1; /* pfp */
4386         }
4387
4388         gfx_v9_0_wait_reg_mem(ring, reg_mem_engine, 0, 1,
4389                               adev->nbio_funcs->get_hdp_flush_req_offset(adev),
4390                               adev->nbio_funcs->get_hdp_flush_done_offset(adev),
4391                               ref_and_mask, ref_and_mask, 0x20);
4392 }
4393
4394 static void gfx_v9_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
4395                                         struct amdgpu_job *job,
4396                                         struct amdgpu_ib *ib,
4397                                         uint32_t flags)
4398 {
4399         unsigned vmid = AMDGPU_JOB_GET_VMID(job);
4400         u32 header, control = 0;
4401
4402         if (ib->flags & AMDGPU_IB_FLAG_CE)
4403                 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
4404         else
4405                 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
4406
4407         control |= ib->length_dw | (vmid << 24);
4408
4409         if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) {
4410                 control |= INDIRECT_BUFFER_PRE_ENB(1);
4411
4412                 if (!(ib->flags & AMDGPU_IB_FLAG_CE))
4413                         gfx_v9_0_ring_emit_de_meta(ring);
4414         }
4415
4416         amdgpu_ring_write(ring, header);
4417         BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
4418         amdgpu_ring_write(ring,
4419 #ifdef __BIG_ENDIAN
4420                 (2 << 0) |
4421 #endif
4422                 lower_32_bits(ib->gpu_addr));
4423         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
4424         amdgpu_ring_write(ring, control);
4425 }
4426
4427 static void gfx_v9_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
4428                                           struct amdgpu_job *job,
4429                                           struct amdgpu_ib *ib,
4430                                           uint32_t flags)
4431 {
4432         unsigned vmid = AMDGPU_JOB_GET_VMID(job);
4433         u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
4434
4435         /* Currently, there is a high possibility to get wave ID mismatch
4436          * between ME and GDS, leading to a hw deadlock, because ME generates
4437          * different wave IDs than the GDS expects. This situation happens
4438          * randomly when at least 5 compute pipes use GDS ordered append.
4439          * The wave IDs generated by ME are also wrong after suspend/resume.
4440          * Those are probably bugs somewhere else in the kernel driver.
4441          *
4442          * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and
4443          * GDS to 0 for this ring (me/pipe).
4444          */
4445         if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) {
4446                 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
4447                 amdgpu_ring_write(ring, mmGDS_COMPUTE_MAX_WAVE_ID);
4448                 amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id);
4449         }
4450
4451         amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
4452         BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
4453         amdgpu_ring_write(ring,
4454 #ifdef __BIG_ENDIAN
4455                                 (2 << 0) |
4456 #endif
4457                                 lower_32_bits(ib->gpu_addr));
4458         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
4459         amdgpu_ring_write(ring, control);
4460 }
4461
4462 static void gfx_v9_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
4463                                      u64 seq, unsigned flags)
4464 {
4465         bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
4466         bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
4467         bool writeback = flags & AMDGPU_FENCE_FLAG_TC_WB_ONLY;
4468
4469         /* RELEASE_MEM - flush caches, send int */
4470         amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6));
4471         amdgpu_ring_write(ring, ((writeback ? (EOP_TC_WB_ACTION_EN |
4472                                                EOP_TC_NC_ACTION_EN) :
4473                                               (EOP_TCL1_ACTION_EN |
4474                                                EOP_TC_ACTION_EN |
4475                                                EOP_TC_WB_ACTION_EN |
4476                                                EOP_TC_MD_ACTION_EN)) |
4477                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
4478                                  EVENT_INDEX(5)));
4479         amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
4480
4481         /*
4482          * the address should be Qword aligned if 64bit write, Dword
4483          * aligned if only send 32bit data low (discard data high)
4484          */
4485         if (write64bit)
4486                 BUG_ON(addr & 0x7);
4487         else
4488                 BUG_ON(addr & 0x3);
4489         amdgpu_ring_write(ring, lower_32_bits(addr));
4490         amdgpu_ring_write(ring, upper_32_bits(addr));
4491         amdgpu_ring_write(ring, lower_32_bits(seq));
4492         amdgpu_ring_write(ring, upper_32_bits(seq));
4493         amdgpu_ring_write(ring, 0);
4494 }
4495
4496 static void gfx_v9_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
4497 {
4498         int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
4499         uint32_t seq = ring->fence_drv.sync_seq;
4500         uint64_t addr = ring->fence_drv.gpu_addr;
4501
4502         gfx_v9_0_wait_reg_mem(ring, usepfp, 1, 0,
4503                               lower_32_bits(addr), upper_32_bits(addr),
4504                               seq, 0xffffffff, 4);
4505 }
4506
4507 static void gfx_v9_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
4508                                         unsigned vmid, uint64_t pd_addr)
4509 {
4510         amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
4511
4512         /* compute doesn't have PFP */
4513         if (ring->funcs->type == AMDGPU_RING_TYPE_GFX) {
4514                 /* sync PFP to ME, otherwise we might get invalid PFP reads */
4515                 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
4516                 amdgpu_ring_write(ring, 0x0);
4517         }
4518 }
4519
4520 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
4521 {
4522         return ring->adev->wb.wb[ring->rptr_offs]; /* gfx9 hardware is 32bit rptr */
4523 }
4524
4525 static u64 gfx_v9_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
4526 {
4527         u64 wptr;
4528
4529         /* XXX check if swapping is necessary on BE */
4530         if (ring->use_doorbell)
4531                 wptr = atomic64_read((atomic64_t *)&ring->adev->wb.wb[ring->wptr_offs]);
4532         else
4533                 BUG();
4534         return wptr;
4535 }
4536
4537 static void gfx_v9_0_ring_set_pipe_percent(struct amdgpu_ring *ring,
4538                                            bool acquire)
4539 {
4540         struct amdgpu_device *adev = ring->adev;
4541         int pipe_num, tmp, reg;
4542         int pipe_percent = acquire ? SPI_WCL_PIPE_PERCENT_GFX__VALUE_MASK : 0x1;
4543
4544         pipe_num = ring->me * adev->gfx.mec.num_pipe_per_mec + ring->pipe;
4545
4546         /* first me only has 2 entries, GFX and HP3D */
4547         if (ring->me > 0)
4548                 pipe_num -= 2;
4549
4550         reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_GFX) + pipe_num;
4551         tmp = RREG32(reg);
4552         tmp = REG_SET_FIELD(tmp, SPI_WCL_PIPE_PERCENT_GFX, VALUE, pipe_percent);
4553         WREG32(reg, tmp);
4554 }
4555
4556 static void gfx_v9_0_pipe_reserve_resources(struct amdgpu_device *adev,
4557                                             struct amdgpu_ring *ring,
4558                                             bool acquire)
4559 {
4560         int i, pipe;
4561         bool reserve;
4562         struct amdgpu_ring *iring;
4563
4564         mutex_lock(&adev->gfx.pipe_reserve_mutex);
4565         pipe = amdgpu_gfx_mec_queue_to_bit(adev, ring->me, ring->pipe, 0);
4566         if (acquire)
4567                 set_bit(pipe, adev->gfx.pipe_reserve_bitmap);
4568         else
4569                 clear_bit(pipe, adev->gfx.pipe_reserve_bitmap);
4570
4571         if (!bitmap_weight(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES)) {
4572                 /* Clear all reservations - everyone reacquires all resources */
4573                 for (i = 0; i < adev->gfx.num_gfx_rings; ++i)
4574                         gfx_v9_0_ring_set_pipe_percent(&adev->gfx.gfx_ring[i],
4575                                                        true);
4576
4577                 for (i = 0; i < adev->gfx.num_compute_rings; ++i)
4578                         gfx_v9_0_ring_set_pipe_percent(&adev->gfx.compute_ring[i],
4579                                                        true);
4580         } else {
4581                 /* Lower all pipes without a current reservation */
4582                 for (i = 0; i < adev->gfx.num_gfx_rings; ++i) {
4583                         iring = &adev->gfx.gfx_ring[i];
4584                         pipe = amdgpu_gfx_mec_queue_to_bit(adev,
4585                                                            iring->me,
4586                                                            iring->pipe,
4587                                                            0);
4588                         reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap);
4589                         gfx_v9_0_ring_set_pipe_percent(iring, reserve);
4590                 }
4591
4592                 for (i = 0; i < adev->gfx.num_compute_rings; ++i) {
4593                         iring = &adev->gfx.compute_ring[i];
4594                         pipe = amdgpu_gfx_mec_queue_to_bit(adev,
4595                                                            iring->me,
4596                                                            iring->pipe,
4597                                                            0);
4598                         reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap);
4599                         gfx_v9_0_ring_set_pipe_percent(iring, reserve);
4600                 }
4601         }
4602
4603         mutex_unlock(&adev->gfx.pipe_reserve_mutex);
4604 }
4605
4606 static void gfx_v9_0_hqd_set_priority(struct amdgpu_device *adev,
4607                                       struct amdgpu_ring *ring,
4608                                       bool acquire)
4609 {
4610         uint32_t pipe_priority = acquire ? 0x2 : 0x0;
4611         uint32_t queue_priority = acquire ? 0xf : 0x0;
4612
4613         mutex_lock(&adev->srbm_mutex);
4614         soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4615
4616         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PIPE_PRIORITY, pipe_priority);
4617         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_QUEUE_PRIORITY, queue_priority);
4618
4619         soc15_grbm_select(adev, 0, 0, 0, 0);
4620         mutex_unlock(&adev->srbm_mutex);
4621 }
4622
4623 static void gfx_v9_0_ring_set_priority_compute(struct amdgpu_ring *ring,
4624                                                enum drm_sched_priority priority)
4625 {
4626         struct amdgpu_device *adev = ring->adev;
4627         bool acquire = priority == DRM_SCHED_PRIORITY_HIGH_HW;
4628
4629         if (ring->funcs->type != AMDGPU_RING_TYPE_COMPUTE)
4630                 return;
4631
4632         gfx_v9_0_hqd_set_priority(adev, ring, acquire);
4633         gfx_v9_0_pipe_reserve_resources(adev, ring, acquire);
4634 }
4635
4636 static void gfx_v9_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
4637 {
4638         struct amdgpu_device *adev = ring->adev;
4639
4640         /* XXX check if swapping is necessary on BE */
4641         if (ring->use_doorbell) {
4642                 atomic64_set((atomic64_t*)&adev->wb.wb[ring->wptr_offs], ring->wptr);
4643                 WDOORBELL64(ring->doorbell_index, ring->wptr);
4644         } else{
4645                 BUG(); /* only DOORBELL method supported on gfx9 now */
4646         }
4647 }
4648
4649 static void gfx_v9_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
4650                                          u64 seq, unsigned int flags)
4651 {
4652         struct amdgpu_device *adev = ring->adev;
4653
4654         /* we only allocate 32bit for each seq wb address */
4655         BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
4656
4657         /* write fence seq to the "addr" */
4658         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4659         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4660                                  WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
4661         amdgpu_ring_write(ring, lower_32_bits(addr));
4662         amdgpu_ring_write(ring, upper_32_bits(addr));
4663         amdgpu_ring_write(ring, lower_32_bits(seq));
4664
4665         if (flags & AMDGPU_FENCE_FLAG_INT) {
4666                 /* set register to trigger INT */
4667                 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4668                 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4669                                          WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
4670                 amdgpu_ring_write(ring, SOC15_REG_OFFSET(GC, 0, mmCPC_INT_STATUS));
4671                 amdgpu_ring_write(ring, 0);
4672                 amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
4673         }
4674 }
4675
4676 static void gfx_v9_ring_emit_sb(struct amdgpu_ring *ring)
4677 {
4678         amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
4679         amdgpu_ring_write(ring, 0);
4680 }
4681
4682 static void gfx_v9_0_ring_emit_ce_meta(struct amdgpu_ring *ring)
4683 {
4684         struct v9_ce_ib_state ce_payload = {0};
4685         uint64_t csa_addr;
4686         int cnt;
4687
4688         cnt = (sizeof(ce_payload) >> 2) + 4 - 2;
4689         csa_addr = amdgpu_csa_vaddr(ring->adev);
4690
4691         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
4692         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
4693                                  WRITE_DATA_DST_SEL(8) |
4694                                  WR_CONFIRM) |
4695                                  WRITE_DATA_CACHE_POLICY(0));
4696         amdgpu_ring_write(ring, lower_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, ce_payload)));
4697         amdgpu_ring_write(ring, upper_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, ce_payload)));
4698         amdgpu_ring_write_multiple(ring, (void *)&ce_payload, sizeof(ce_payload) >> 2);
4699 }
4700
4701 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring)
4702 {
4703         struct v9_de_ib_state de_payload = {0};
4704         uint64_t csa_addr, gds_addr;
4705         int cnt;
4706
4707         csa_addr = amdgpu_csa_vaddr(ring->adev);
4708         gds_addr = csa_addr + 4096;
4709         de_payload.gds_backup_addrlo = lower_32_bits(gds_addr);
4710         de_payload.gds_backup_addrhi = upper_32_bits(gds_addr);
4711
4712         cnt = (sizeof(de_payload) >> 2) + 4 - 2;
4713         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
4714         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
4715                                  WRITE_DATA_DST_SEL(8) |
4716                                  WR_CONFIRM) |
4717                                  WRITE_DATA_CACHE_POLICY(0));
4718         amdgpu_ring_write(ring, lower_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, de_payload)));
4719         amdgpu_ring_write(ring, upper_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, de_payload)));
4720         amdgpu_ring_write_multiple(ring, (void *)&de_payload, sizeof(de_payload) >> 2);
4721 }
4722
4723 static void gfx_v9_0_ring_emit_tmz(struct amdgpu_ring *ring, bool start)
4724 {
4725         amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0));
4726         amdgpu_ring_write(ring, FRAME_CMD(start ? 0 : 1)); /* frame_end */
4727 }
4728
4729 static void gfx_v9_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
4730 {
4731         uint32_t dw2 = 0;
4732
4733         if (amdgpu_sriov_vf(ring->adev))
4734                 gfx_v9_0_ring_emit_ce_meta(ring);
4735
4736         gfx_v9_0_ring_emit_tmz(ring, true);
4737
4738         dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
4739         if (flags & AMDGPU_HAVE_CTX_SWITCH) {
4740                 /* set load_global_config & load_global_uconfig */
4741                 dw2 |= 0x8001;
4742                 /* set load_cs_sh_regs */
4743                 dw2 |= 0x01000000;
4744                 /* set load_per_context_state & load_gfx_sh_regs for GFX */
4745                 dw2 |= 0x10002;
4746
4747                 /* set load_ce_ram if preamble presented */
4748                 if (AMDGPU_PREAMBLE_IB_PRESENT & flags)
4749                         dw2 |= 0x10000000;
4750         } else {
4751                 /* still load_ce_ram if this is the first time preamble presented
4752                  * although there is no context switch happens.
4753                  */
4754                 if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags)
4755                         dw2 |= 0x10000000;
4756         }
4757
4758         amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4759         amdgpu_ring_write(ring, dw2);
4760         amdgpu_ring_write(ring, 0);
4761 }
4762
4763 static unsigned gfx_v9_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring)
4764 {
4765         unsigned ret;
4766         amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
4767         amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
4768         amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
4769         amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */
4770         ret = ring->wptr & ring->buf_mask;
4771         amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */
4772         return ret;
4773 }
4774
4775 static void gfx_v9_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset)
4776 {
4777         unsigned cur;
4778         BUG_ON(offset > ring->buf_mask);
4779         BUG_ON(ring->ring[offset] != 0x55aa55aa);
4780
4781         cur = (ring->wptr & ring->buf_mask) - 1;
4782         if (likely(cur > offset))
4783                 ring->ring[offset] = cur - offset;
4784         else
4785                 ring->ring[offset] = (ring->ring_size>>2) - offset + cur;
4786 }
4787
4788 static void gfx_v9_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg)
4789 {
4790         struct amdgpu_device *adev = ring->adev;
4791
4792         amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
4793         amdgpu_ring_write(ring, 0 |     /* src: register*/
4794                                 (5 << 8) |      /* dst: memory */
4795                                 (1 << 20));     /* write confirm */
4796         amdgpu_ring_write(ring, reg);
4797         amdgpu_ring_write(ring, 0);
4798         amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
4799                                 adev->virt.reg_val_offs * 4));
4800         amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
4801                                 adev->virt.reg_val_offs * 4));
4802 }
4803
4804 static void gfx_v9_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
4805                                     uint32_t val)
4806 {
4807         uint32_t cmd = 0;
4808
4809         switch (ring->funcs->type) {
4810         case AMDGPU_RING_TYPE_GFX:
4811                 cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM;
4812                 break;
4813         case AMDGPU_RING_TYPE_KIQ:
4814                 cmd = (1 << 16); /* no inc addr */
4815                 break;
4816         default:
4817                 cmd = WR_CONFIRM;
4818                 break;
4819         }
4820         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4821         amdgpu_ring_write(ring, cmd);
4822         amdgpu_ring_write(ring, reg);
4823         amdgpu_ring_write(ring, 0);
4824         amdgpu_ring_write(ring, val);
4825 }
4826
4827 static void gfx_v9_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
4828                                         uint32_t val, uint32_t mask)
4829 {
4830         gfx_v9_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20);
4831 }
4832
4833 static void gfx_v9_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
4834                                                   uint32_t reg0, uint32_t reg1,
4835                                                   uint32_t ref, uint32_t mask)
4836 {
4837         int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
4838         struct amdgpu_device *adev = ring->adev;
4839         bool fw_version_ok = (ring->funcs->type == AMDGPU_RING_TYPE_GFX) ?
4840                 adev->gfx.me_fw_write_wait : adev->gfx.mec_fw_write_wait;
4841
4842         if (fw_version_ok)
4843                 gfx_v9_0_wait_reg_mem(ring, usepfp, 0, 1, reg0, reg1,
4844                                       ref, mask, 0x20);
4845         else
4846                 amdgpu_ring_emit_reg_write_reg_wait_helper(ring, reg0, reg1,
4847                                                            ref, mask);
4848 }
4849
4850 static void gfx_v9_0_ring_soft_recovery(struct amdgpu_ring *ring, unsigned vmid)
4851 {
4852         struct amdgpu_device *adev = ring->adev;
4853         uint32_t value = 0;
4854
4855         value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03);
4856         value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01);
4857         value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1);
4858         value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid);
4859         WREG32(mmSQ_CMD, value);
4860 }
4861
4862 static void gfx_v9_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
4863                                                  enum amdgpu_interrupt_state state)
4864 {
4865         switch (state) {
4866         case AMDGPU_IRQ_STATE_DISABLE:
4867         case AMDGPU_IRQ_STATE_ENABLE:
4868                 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
4869                                TIME_STAMP_INT_ENABLE,
4870                                state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
4871                 break;
4872         default:
4873                 break;
4874         }
4875 }
4876
4877 static void gfx_v9_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
4878                                                      int me, int pipe,
4879                                                      enum amdgpu_interrupt_state state)
4880 {
4881         u32 mec_int_cntl, mec_int_cntl_reg;
4882
4883         /*
4884          * amdgpu controls only the first MEC. That's why this function only
4885          * handles the setting of interrupts for this specific MEC. All other
4886          * pipes' interrupts are set by amdkfd.
4887          */
4888
4889         if (me == 1) {
4890                 switch (pipe) {
4891                 case 0:
4892                         mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE0_INT_CNTL);
4893                         break;
4894                 case 1:
4895                         mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE1_INT_CNTL);
4896                         break;
4897                 case 2:
4898                         mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE2_INT_CNTL);
4899                         break;
4900                 case 3:
4901                         mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE3_INT_CNTL);
4902                         break;
4903                 default:
4904                         DRM_DEBUG("invalid pipe %d\n", pipe);
4905                         return;
4906                 }
4907         } else {
4908                 DRM_DEBUG("invalid me %d\n", me);
4909                 return;
4910         }
4911
4912         switch (state) {
4913         case AMDGPU_IRQ_STATE_DISABLE:
4914                 mec_int_cntl = RREG32(mec_int_cntl_reg);
4915                 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
4916                                              TIME_STAMP_INT_ENABLE, 0);
4917                 WREG32(mec_int_cntl_reg, mec_int_cntl);
4918                 break;
4919         case AMDGPU_IRQ_STATE_ENABLE:
4920                 mec_int_cntl = RREG32(mec_int_cntl_reg);
4921                 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
4922                                              TIME_STAMP_INT_ENABLE, 1);
4923                 WREG32(mec_int_cntl_reg, mec_int_cntl);
4924                 break;
4925         default:
4926                 break;
4927         }
4928 }
4929
4930 static int gfx_v9_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
4931                                              struct amdgpu_irq_src *source,
4932                                              unsigned type,
4933                                              enum amdgpu_interrupt_state state)
4934 {
4935         switch (state) {
4936         case AMDGPU_IRQ_STATE_DISABLE:
4937         case AMDGPU_IRQ_STATE_ENABLE:
4938                 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
4939                                PRIV_REG_INT_ENABLE,
4940                                state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
4941                 break;
4942         default:
4943                 break;
4944         }
4945
4946         return 0;
4947 }
4948
4949 static int gfx_v9_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
4950                                               struct amdgpu_irq_src *source,
4951                                               unsigned type,
4952                                               enum amdgpu_interrupt_state state)
4953 {
4954         switch (state) {
4955         case AMDGPU_IRQ_STATE_DISABLE:
4956         case AMDGPU_IRQ_STATE_ENABLE:
4957                 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
4958                                PRIV_INSTR_INT_ENABLE,
4959                                state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
4960         default:
4961                 break;
4962         }
4963
4964         return 0;
4965 }
4966
4967 #define ENABLE_ECC_ON_ME_PIPE(me, pipe)                         \
4968         WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\
4969                         CP_ECC_ERROR_INT_ENABLE, 1)
4970
4971 #define DISABLE_ECC_ON_ME_PIPE(me, pipe)                        \
4972         WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\
4973                         CP_ECC_ERROR_INT_ENABLE, 0)
4974
4975 static int gfx_v9_0_set_cp_ecc_error_state(struct amdgpu_device *adev,
4976                                               struct amdgpu_irq_src *source,
4977                                               unsigned type,
4978                                               enum amdgpu_interrupt_state state)
4979 {
4980         switch (state) {
4981         case AMDGPU_IRQ_STATE_DISABLE:
4982                 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
4983                                 CP_ECC_ERROR_INT_ENABLE, 0);
4984                 DISABLE_ECC_ON_ME_PIPE(1, 0);
4985                 DISABLE_ECC_ON_ME_PIPE(1, 1);
4986                 DISABLE_ECC_ON_ME_PIPE(1, 2);
4987                 DISABLE_ECC_ON_ME_PIPE(1, 3);
4988                 break;
4989
4990         case AMDGPU_IRQ_STATE_ENABLE:
4991                 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
4992                                 CP_ECC_ERROR_INT_ENABLE, 1);
4993                 ENABLE_ECC_ON_ME_PIPE(1, 0);
4994                 ENABLE_ECC_ON_ME_PIPE(1, 1);
4995                 ENABLE_ECC_ON_ME_PIPE(1, 2);
4996                 ENABLE_ECC_ON_ME_PIPE(1, 3);
4997                 break;
4998         default:
4999                 break;
5000         }
5001
5002         return 0;
5003 }
5004
5005
5006 static int gfx_v9_0_set_eop_interrupt_state(struct amdgpu_device *adev,
5007                                             struct amdgpu_irq_src *src,
5008                                             unsigned type,
5009                                             enum amdgpu_interrupt_state state)
5010 {
5011         switch (type) {
5012         case AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP:
5013                 gfx_v9_0_set_gfx_eop_interrupt_state(adev, state);
5014                 break;
5015         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
5016                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
5017                 break;
5018         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
5019                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
5020                 break;
5021         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
5022                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
5023                 break;
5024         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
5025                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
5026                 break;
5027         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
5028                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
5029                 break;
5030         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
5031                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
5032                 break;
5033         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
5034                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
5035                 break;
5036         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
5037                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
5038                 break;
5039         default:
5040                 break;
5041         }
5042         return 0;
5043 }
5044
5045 static int gfx_v9_0_eop_irq(struct amdgpu_device *adev,
5046                             struct amdgpu_irq_src *source,
5047                             struct amdgpu_iv_entry *entry)
5048 {
5049         int i;
5050         u8 me_id, pipe_id, queue_id;
5051         struct amdgpu_ring *ring;
5052
5053         DRM_DEBUG("IH: CP EOP\n");
5054         me_id = (entry->ring_id & 0x0c) >> 2;
5055         pipe_id = (entry->ring_id & 0x03) >> 0;
5056         queue_id = (entry->ring_id & 0x70) >> 4;
5057
5058         switch (me_id) {
5059         case 0:
5060                 amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
5061                 break;
5062         case 1:
5063         case 2:
5064                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5065                         ring = &adev->gfx.compute_ring[i];
5066                         /* Per-queue interrupt is supported for MEC starting from VI.
5067                           * The interrupt can only be enabled/disabled per pipe instead of per queue.
5068                           */
5069                         if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
5070                                 amdgpu_fence_process(ring);
5071                 }
5072                 break;
5073         }
5074         return 0;
5075 }
5076
5077 static void gfx_v9_0_fault(struct amdgpu_device *adev,
5078                            struct amdgpu_iv_entry *entry)
5079 {
5080         u8 me_id, pipe_id, queue_id;
5081         struct amdgpu_ring *ring;
5082         int i;
5083
5084         me_id = (entry->ring_id & 0x0c) >> 2;
5085         pipe_id = (entry->ring_id & 0x03) >> 0;
5086         queue_id = (entry->ring_id & 0x70) >> 4;
5087
5088         switch (me_id) {
5089         case 0:
5090                 drm_sched_fault(&adev->gfx.gfx_ring[0].sched);
5091                 break;
5092         case 1:
5093         case 2:
5094                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5095                         ring = &adev->gfx.compute_ring[i];
5096                         if (ring->me == me_id && ring->pipe == pipe_id &&
5097                             ring->queue == queue_id)
5098                                 drm_sched_fault(&ring->sched);
5099                 }
5100                 break;
5101         }
5102 }
5103
5104 static int gfx_v9_0_priv_reg_irq(struct amdgpu_device *adev,
5105                                  struct amdgpu_irq_src *source,
5106                                  struct amdgpu_iv_entry *entry)
5107 {
5108         DRM_ERROR("Illegal register access in command stream\n");
5109         gfx_v9_0_fault(adev, entry);
5110         return 0;
5111 }
5112
5113 static int gfx_v9_0_priv_inst_irq(struct amdgpu_device *adev,
5114                                   struct amdgpu_irq_src *source,
5115                                   struct amdgpu_iv_entry *entry)
5116 {
5117         DRM_ERROR("Illegal instruction in command stream\n");
5118         gfx_v9_0_fault(adev, entry);
5119         return 0;
5120 }
5121
5122 static int gfx_v9_0_process_ras_data_cb(struct amdgpu_device *adev,
5123                 struct amdgpu_iv_entry *entry)
5124 {
5125         /* TODO ue will trigger an interrupt. */
5126         kgd2kfd_set_sram_ecc_flag(adev->kfd.dev);
5127         amdgpu_ras_reset_gpu(adev, 0);
5128         return AMDGPU_RAS_UE;
5129 }
5130
5131 static int gfx_v9_0_cp_ecc_error_irq(struct amdgpu_device *adev,
5132                                   struct amdgpu_irq_src *source,
5133                                   struct amdgpu_iv_entry *entry)
5134 {
5135         struct ras_common_if *ras_if = adev->gfx.ras_if;
5136         struct ras_dispatch_if ih_data = {
5137                 .entry = entry,
5138         };
5139
5140         if (!ras_if)
5141                 return 0;
5142
5143         ih_data.head = *ras_if;
5144
5145         DRM_ERROR("CP ECC ERROR IRQ\n");
5146         amdgpu_ras_interrupt_dispatch(adev, &ih_data);
5147         return 0;
5148 }
5149
5150 static const struct amd_ip_funcs gfx_v9_0_ip_funcs = {
5151         .name = "gfx_v9_0",
5152         .early_init = gfx_v9_0_early_init,
5153         .late_init = gfx_v9_0_late_init,
5154         .sw_init = gfx_v9_0_sw_init,
5155         .sw_fini = gfx_v9_0_sw_fini,
5156         .hw_init = gfx_v9_0_hw_init,
5157         .hw_fini = gfx_v9_0_hw_fini,
5158         .suspend = gfx_v9_0_suspend,
5159         .resume = gfx_v9_0_resume,
5160         .is_idle = gfx_v9_0_is_idle,
5161         .wait_for_idle = gfx_v9_0_wait_for_idle,
5162         .soft_reset = gfx_v9_0_soft_reset,
5163         .set_clockgating_state = gfx_v9_0_set_clockgating_state,
5164         .set_powergating_state = gfx_v9_0_set_powergating_state,
5165         .get_clockgating_state = gfx_v9_0_get_clockgating_state,
5166 };
5167
5168 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = {
5169         .type = AMDGPU_RING_TYPE_GFX,
5170         .align_mask = 0xff,
5171         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
5172         .support_64bit_ptrs = true,
5173         .vmhub = AMDGPU_GFXHUB,
5174         .get_rptr = gfx_v9_0_ring_get_rptr_gfx,
5175         .get_wptr = gfx_v9_0_ring_get_wptr_gfx,
5176         .set_wptr = gfx_v9_0_ring_set_wptr_gfx,
5177         .emit_frame_size = /* totally 242 maximum if 16 IBs */
5178                 5 +  /* COND_EXEC */
5179                 7 +  /* PIPELINE_SYNC */
5180                 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
5181                 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
5182                 2 + /* VM_FLUSH */
5183                 8 +  /* FENCE for VM_FLUSH */
5184                 20 + /* GDS switch */
5185                 4 + /* double SWITCH_BUFFER,
5186                        the first COND_EXEC jump to the place just
5187                            prior to this double SWITCH_BUFFER  */
5188                 5 + /* COND_EXEC */
5189                 7 +      /*     HDP_flush */
5190                 4 +      /*     VGT_flush */
5191                 14 + /* CE_META */
5192                 31 + /* DE_META */
5193                 3 + /* CNTX_CTRL */
5194                 5 + /* HDP_INVL */
5195                 8 + 8 + /* FENCE x2 */
5196                 2, /* SWITCH_BUFFER */
5197         .emit_ib_size = 4, /* gfx_v9_0_ring_emit_ib_gfx */
5198         .emit_ib = gfx_v9_0_ring_emit_ib_gfx,
5199         .emit_fence = gfx_v9_0_ring_emit_fence,
5200         .emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
5201         .emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
5202         .emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
5203         .emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
5204         .test_ring = gfx_v9_0_ring_test_ring,
5205         .test_ib = gfx_v9_0_ring_test_ib,
5206         .insert_nop = amdgpu_ring_insert_nop,
5207         .pad_ib = amdgpu_ring_generic_pad_ib,
5208         .emit_switch_buffer = gfx_v9_ring_emit_sb,
5209         .emit_cntxcntl = gfx_v9_ring_emit_cntxcntl,
5210         .init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec,
5211         .patch_cond_exec = gfx_v9_0_ring_emit_patch_cond_exec,
5212         .emit_tmz = gfx_v9_0_ring_emit_tmz,
5213         .emit_wreg = gfx_v9_0_ring_emit_wreg,
5214         .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
5215         .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
5216         .soft_recovery = gfx_v9_0_ring_soft_recovery,
5217 };
5218
5219 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = {
5220         .type = AMDGPU_RING_TYPE_COMPUTE,
5221         .align_mask = 0xff,
5222         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
5223         .support_64bit_ptrs = true,
5224         .vmhub = AMDGPU_GFXHUB,
5225         .get_rptr = gfx_v9_0_ring_get_rptr_compute,
5226         .get_wptr = gfx_v9_0_ring_get_wptr_compute,
5227         .set_wptr = gfx_v9_0_ring_set_wptr_compute,
5228         .emit_frame_size =
5229                 20 + /* gfx_v9_0_ring_emit_gds_switch */
5230                 7 + /* gfx_v9_0_ring_emit_hdp_flush */
5231                 5 + /* hdp invalidate */
5232                 7 + /* gfx_v9_0_ring_emit_pipeline_sync */
5233                 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
5234                 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
5235                 2 + /* gfx_v9_0_ring_emit_vm_flush */
5236                 8 + 8 + 8, /* gfx_v9_0_ring_emit_fence x3 for user fence, vm fence */
5237         .emit_ib_size = 7, /* gfx_v9_0_ring_emit_ib_compute */
5238         .emit_ib = gfx_v9_0_ring_emit_ib_compute,
5239         .emit_fence = gfx_v9_0_ring_emit_fence,
5240         .emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
5241         .emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
5242         .emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
5243         .emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
5244         .test_ring = gfx_v9_0_ring_test_ring,
5245         .test_ib = gfx_v9_0_ring_test_ib,
5246         .insert_nop = amdgpu_ring_insert_nop,
5247         .pad_ib = amdgpu_ring_generic_pad_ib,
5248         .set_priority = gfx_v9_0_ring_set_priority_compute,
5249         .emit_wreg = gfx_v9_0_ring_emit_wreg,
5250         .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
5251         .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
5252 };
5253
5254 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = {
5255         .type = AMDGPU_RING_TYPE_KIQ,
5256         .align_mask = 0xff,
5257         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
5258         .support_64bit_ptrs = true,
5259         .vmhub = AMDGPU_GFXHUB,
5260         .get_rptr = gfx_v9_0_ring_get_rptr_compute,
5261         .get_wptr = gfx_v9_0_ring_get_wptr_compute,
5262         .set_wptr = gfx_v9_0_ring_set_wptr_compute,
5263         .emit_frame_size =
5264                 20 + /* gfx_v9_0_ring_emit_gds_switch */
5265                 7 + /* gfx_v9_0_ring_emit_hdp_flush */
5266                 5 + /* hdp invalidate */
5267                 7 + /* gfx_v9_0_ring_emit_pipeline_sync */
5268                 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
5269                 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
5270                 2 + /* gfx_v9_0_ring_emit_vm_flush */
5271                 8 + 8 + 8, /* gfx_v9_0_ring_emit_fence_kiq x3 for user fence, vm fence */
5272         .emit_ib_size = 7, /* gfx_v9_0_ring_emit_ib_compute */
5273         .emit_fence = gfx_v9_0_ring_emit_fence_kiq,
5274         .test_ring = gfx_v9_0_ring_test_ring,
5275         .insert_nop = amdgpu_ring_insert_nop,
5276         .pad_ib = amdgpu_ring_generic_pad_ib,
5277         .emit_rreg = gfx_v9_0_ring_emit_rreg,
5278         .emit_wreg = gfx_v9_0_ring_emit_wreg,
5279         .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
5280         .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
5281 };
5282
5283 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev)
5284 {
5285         int i;
5286
5287         adev->gfx.kiq.ring.funcs = &gfx_v9_0_ring_funcs_kiq;
5288
5289         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
5290                 adev->gfx.gfx_ring[i].funcs = &gfx_v9_0_ring_funcs_gfx;
5291
5292         for (i = 0; i < adev->gfx.num_compute_rings; i++)
5293                 adev->gfx.compute_ring[i].funcs = &gfx_v9_0_ring_funcs_compute;
5294 }
5295
5296 static const struct amdgpu_irq_src_funcs gfx_v9_0_eop_irq_funcs = {
5297         .set = gfx_v9_0_set_eop_interrupt_state,
5298         .process = gfx_v9_0_eop_irq,
5299 };
5300
5301 static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_reg_irq_funcs = {
5302         .set = gfx_v9_0_set_priv_reg_fault_state,
5303         .process = gfx_v9_0_priv_reg_irq,
5304 };
5305
5306 static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_inst_irq_funcs = {
5307         .set = gfx_v9_0_set_priv_inst_fault_state,
5308         .process = gfx_v9_0_priv_inst_irq,
5309 };
5310
5311 static const struct amdgpu_irq_src_funcs gfx_v9_0_cp_ecc_error_irq_funcs = {
5312         .set = gfx_v9_0_set_cp_ecc_error_state,
5313         .process = gfx_v9_0_cp_ecc_error_irq,
5314 };
5315
5316
5317 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev)
5318 {
5319         adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
5320         adev->gfx.eop_irq.funcs = &gfx_v9_0_eop_irq_funcs;
5321
5322         adev->gfx.priv_reg_irq.num_types = 1;
5323         adev->gfx.priv_reg_irq.funcs = &gfx_v9_0_priv_reg_irq_funcs;
5324
5325         adev->gfx.priv_inst_irq.num_types = 1;
5326         adev->gfx.priv_inst_irq.funcs = &gfx_v9_0_priv_inst_irq_funcs;
5327
5328         adev->gfx.cp_ecc_error_irq.num_types = 2; /*C5 ECC error and C9 FUE error*/
5329         adev->gfx.cp_ecc_error_irq.funcs = &gfx_v9_0_cp_ecc_error_irq_funcs;
5330 }
5331
5332 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev)
5333 {
5334         switch (adev->asic_type) {
5335         case CHIP_VEGA10:
5336         case CHIP_VEGA12:
5337         case CHIP_VEGA20:
5338         case CHIP_RAVEN:
5339                 adev->gfx.rlc.funcs = &gfx_v9_0_rlc_funcs;
5340                 break;
5341         default:
5342                 break;
5343         }
5344 }
5345
5346 static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev)
5347 {
5348         /* init asci gds info */
5349         switch (adev->asic_type) {
5350         case CHIP_VEGA10:
5351         case CHIP_VEGA12:
5352         case CHIP_VEGA20:
5353                 adev->gds.gds_size = 0x10000;
5354                 break;
5355         case CHIP_RAVEN:
5356                 adev->gds.gds_size = 0x1000;
5357                 break;
5358         default:
5359                 adev->gds.gds_size = 0x10000;
5360                 break;
5361         }
5362
5363         switch (adev->asic_type) {
5364         case CHIP_VEGA10:
5365         case CHIP_VEGA20:
5366                 adev->gds.gds_compute_max_wave_id = 0x7ff;
5367                 break;
5368         case CHIP_VEGA12:
5369                 adev->gds.gds_compute_max_wave_id = 0x27f;
5370                 break;
5371         case CHIP_RAVEN:
5372                 if (adev->rev_id >= 0x8)
5373                         adev->gds.gds_compute_max_wave_id = 0x77; /* raven2 */
5374                 else
5375                         adev->gds.gds_compute_max_wave_id = 0x15f; /* raven1 */
5376                 break;
5377         default:
5378                 /* this really depends on the chip */
5379                 adev->gds.gds_compute_max_wave_id = 0x7ff;
5380                 break;
5381         }
5382
5383         adev->gds.gws_size = 64;
5384         adev->gds.oa_size = 16;
5385 }
5386
5387 static void gfx_v9_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
5388                                                  u32 bitmap)
5389 {
5390         u32 data;
5391
5392         if (!bitmap)
5393                 return;
5394
5395         data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
5396         data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
5397
5398         WREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG, data);
5399 }
5400
5401 static u32 gfx_v9_0_get_cu_active_bitmap(struct amdgpu_device *adev)
5402 {
5403         u32 data, mask;
5404
5405         data = RREG32_SOC15(GC, 0, mmCC_GC_SHADER_ARRAY_CONFIG);
5406         data |= RREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG);
5407
5408         data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
5409         data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
5410
5411         mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh);
5412
5413         return (~data) & mask;
5414 }
5415
5416 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
5417                                  struct amdgpu_cu_info *cu_info)
5418 {
5419         int i, j, k, counter, active_cu_number = 0;
5420         u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
5421         unsigned disable_masks[4 * 2];
5422
5423         if (!adev || !cu_info)
5424                 return -EINVAL;
5425
5426         amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2);
5427
5428         mutex_lock(&adev->grbm_idx_mutex);
5429         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
5430                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
5431                         mask = 1;
5432                         ao_bitmap = 0;
5433                         counter = 0;
5434                         gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
5435                         if (i < 4 && j < 2)
5436                                 gfx_v9_0_set_user_cu_inactive_bitmap(
5437                                         adev, disable_masks[i * 2 + j]);
5438                         bitmap = gfx_v9_0_get_cu_active_bitmap(adev);
5439                         cu_info->bitmap[i][j] = bitmap;
5440
5441                         for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
5442                                 if (bitmap & mask) {
5443                                         if (counter < adev->gfx.config.max_cu_per_sh)
5444                                                 ao_bitmap |= mask;
5445                                         counter ++;
5446                                 }
5447                                 mask <<= 1;
5448                         }
5449                         active_cu_number += counter;
5450                         if (i < 2 && j < 2)
5451                                 ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
5452                         cu_info->ao_cu_bitmap[i][j] = ao_bitmap;
5453                 }
5454         }
5455         gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
5456         mutex_unlock(&adev->grbm_idx_mutex);
5457
5458         cu_info->number = active_cu_number;
5459         cu_info->ao_cu_mask = ao_cu_mask;
5460         cu_info->simd_per_cu = NUM_SIMD_PER_CU;
5461
5462         return 0;
5463 }
5464
5465 const struct amdgpu_ip_block_version gfx_v9_0_ip_block =
5466 {
5467         .type = AMD_IP_BLOCK_TYPE_GFX,
5468         .major = 9,
5469         .minor = 0,
5470         .rev = 0,
5471         .funcs = &gfx_v9_0_ip_funcs,
5472 };
This page took 0.362731 seconds and 4 git commands to generate.