]> Git Repo - linux.git/blob - drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
Merge branch 'vmwgfx-next' of git://people.freedesktop.org/~thomash/linux into drm...
[linux.git] / drivers / gpu / drm / amd / amdgpu / gfx_v9_0.c
1 /*
2  * Copyright 2016 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23
24 #include <linux/delay.h>
25 #include <linux/kernel.h>
26 #include <linux/firmware.h>
27 #include <linux/module.h>
28 #include <linux/pci.h>
29
30 #include "amdgpu.h"
31 #include "amdgpu_gfx.h"
32 #include "soc15.h"
33 #include "soc15d.h"
34 #include "amdgpu_atomfirmware.h"
35 #include "amdgpu_pm.h"
36
37 #include "gc/gc_9_0_offset.h"
38 #include "gc/gc_9_0_sh_mask.h"
39 #include "vega10_enum.h"
40 #include "hdp/hdp_4_0_offset.h"
41
42 #include "soc15.h"
43 #include "soc15_common.h"
44 #include "clearstate_gfx9.h"
45 #include "v9_structs.h"
46
47 #include "ivsrcid/gfx/irqsrcs_gfx_9_0.h"
48
49 #include "amdgpu_ras.h"
50
51 #define GFX9_NUM_GFX_RINGS     1
52 #define GFX9_MEC_HPD_SIZE 4096
53 #define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L
54 #define RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET 0x00000000L
55
56 #define mmPWR_MISC_CNTL_STATUS                                  0x0183
57 #define mmPWR_MISC_CNTL_STATUS_BASE_IDX                         0
58 #define PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN__SHIFT        0x0
59 #define PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT          0x1
60 #define PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK          0x00000001L
61 #define PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK            0x00000006L
62
63 MODULE_FIRMWARE("amdgpu/vega10_ce.bin");
64 MODULE_FIRMWARE("amdgpu/vega10_pfp.bin");
65 MODULE_FIRMWARE("amdgpu/vega10_me.bin");
66 MODULE_FIRMWARE("amdgpu/vega10_mec.bin");
67 MODULE_FIRMWARE("amdgpu/vega10_mec2.bin");
68 MODULE_FIRMWARE("amdgpu/vega10_rlc.bin");
69
70 MODULE_FIRMWARE("amdgpu/vega12_ce.bin");
71 MODULE_FIRMWARE("amdgpu/vega12_pfp.bin");
72 MODULE_FIRMWARE("amdgpu/vega12_me.bin");
73 MODULE_FIRMWARE("amdgpu/vega12_mec.bin");
74 MODULE_FIRMWARE("amdgpu/vega12_mec2.bin");
75 MODULE_FIRMWARE("amdgpu/vega12_rlc.bin");
76
77 MODULE_FIRMWARE("amdgpu/vega20_ce.bin");
78 MODULE_FIRMWARE("amdgpu/vega20_pfp.bin");
79 MODULE_FIRMWARE("amdgpu/vega20_me.bin");
80 MODULE_FIRMWARE("amdgpu/vega20_mec.bin");
81 MODULE_FIRMWARE("amdgpu/vega20_mec2.bin");
82 MODULE_FIRMWARE("amdgpu/vega20_rlc.bin");
83
84 MODULE_FIRMWARE("amdgpu/raven_ce.bin");
85 MODULE_FIRMWARE("amdgpu/raven_pfp.bin");
86 MODULE_FIRMWARE("amdgpu/raven_me.bin");
87 MODULE_FIRMWARE("amdgpu/raven_mec.bin");
88 MODULE_FIRMWARE("amdgpu/raven_mec2.bin");
89 MODULE_FIRMWARE("amdgpu/raven_rlc.bin");
90
91 MODULE_FIRMWARE("amdgpu/picasso_ce.bin");
92 MODULE_FIRMWARE("amdgpu/picasso_pfp.bin");
93 MODULE_FIRMWARE("amdgpu/picasso_me.bin");
94 MODULE_FIRMWARE("amdgpu/picasso_mec.bin");
95 MODULE_FIRMWARE("amdgpu/picasso_mec2.bin");
96 MODULE_FIRMWARE("amdgpu/picasso_rlc.bin");
97 MODULE_FIRMWARE("amdgpu/picasso_rlc_am4.bin");
98
99 MODULE_FIRMWARE("amdgpu/raven2_ce.bin");
100 MODULE_FIRMWARE("amdgpu/raven2_pfp.bin");
101 MODULE_FIRMWARE("amdgpu/raven2_me.bin");
102 MODULE_FIRMWARE("amdgpu/raven2_mec.bin");
103 MODULE_FIRMWARE("amdgpu/raven2_mec2.bin");
104 MODULE_FIRMWARE("amdgpu/raven2_rlc.bin");
105 MODULE_FIRMWARE("amdgpu/raven_kicker_rlc.bin");
106
107 static const struct soc15_reg_golden golden_settings_gc_9_0[] =
108 {
109         SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000400),
110         SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0x80000000, 0x80000000),
111         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
112         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
113         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
114         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
115         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000),
116         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800),
117         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800),
118         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x0000ff87),
119         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x0000ff8f),
120         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000),
121         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
122         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68),
123         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197),
124         SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
125         SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff),
126         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000000, 0x00000800),
127         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000000, 0x00000800),
128         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00000000, 0x00008000)
129 };
130
131 static const struct soc15_reg_golden golden_settings_gc_9_0_vg10[] =
132 {
133         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0x0000f000, 0x00012107),
134         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
135         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080),
136         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080),
137         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080),
138         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042),
139         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x2a114042),
140         SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080),
141         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0x00008000, 0x00048000),
142         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080),
143         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080),
144         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080),
145         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080),
146         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080),
147         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000),
148         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x01000107),
149         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x00001800, 0x00000800),
150         SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080)
151 };
152
153 static const struct soc15_reg_golden golden_settings_gc_9_0_vg20[] =
154 {
155         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x0f000080, 0x04000080),
156         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000),
157         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
158         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xf3e777ff, 0x22014042),
159         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xf3e777ff, 0x22014042),
160         SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0x00003e00, 0x00000400),
161         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xff840000, 0x04040000),
162         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00030000),
163         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff010f, 0x01000107),
164         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0x000b0000, 0x000b0000),
165         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01000000, 0x01000000)
166 };
167
168 static const struct soc15_reg_golden golden_settings_gc_9_1[] =
169 {
170         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
171         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080),
172         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080),
173         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080),
174         SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420),
175         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
176         SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080),
177         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
178         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
179         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
180         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080),
181         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080),
182         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080),
183         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080),
184         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080),
185         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
186         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
187         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003120),
188         SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
189         SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000000ff),
190         SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080),
191         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000000, 0x00000800),
192         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000000, 0x00000800),
193         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00000000, 0x00008000)
194 };
195
196 static const struct soc15_reg_golden golden_settings_gc_9_1_rv1[] =
197 {
198         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
199         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24000042),
200         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24000042),
201         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04048000),
202         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_MODE_CNTL_1, 0x06000000, 0x06000000),
203         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000),
204         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x00000800)
205 };
206
207 static const struct soc15_reg_golden golden_settings_gc_9_1_rv2[] =
208 {
209         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0xff7fffff, 0x04000000),
210         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
211         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0xff7fffff, 0x0a000000),
212         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x7f0fffff, 0x08000080),
213         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0xff8fffff, 0x08000080),
214         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x7f8fffff, 0x08000080),
215         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x26013041),
216         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x26013041),
217         SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x3f8fffff, 0x08000080),
218         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
219         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0xff0fffff, 0x08000080),
220         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0xff0fffff, 0x08000080),
221         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0xff0fffff, 0x08000080),
222         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0xff0fffff, 0x08000080),
223         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0xff0fffff, 0x08000080),
224         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
225         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010),
226         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000),
227         SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x3f8fffff, 0x08000080),
228 };
229
230 static const struct soc15_reg_golden golden_settings_gc_9_x_common[] =
231 {
232         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_SD_CNTL, 0xffffffff, 0x000001ff),
233         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_INDEX, 0xffffffff, 0x00000000),
234         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_DATA, 0xffffffff, 0x2544c382)
235 };
236
237 static const struct soc15_reg_golden golden_settings_gc_9_2_1[] =
238 {
239         SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420),
240         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
241         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
242         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
243         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
244         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000),
245         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800),
246         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800),
247         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x0000ff87),
248         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x0000ff8f),
249         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000),
250         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
251         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68),
252         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197),
253         SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
254         SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff)
255 };
256
257 static const struct soc15_reg_golden golden_settings_gc_9_2_1_vg12[] =
258 {
259         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x00000080, 0x04000080),
260         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
261         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000),
262         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24104041),
263         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24104041),
264         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
265         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff03ff, 0x01000107),
266         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
267         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x76325410),
268         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000),
269         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000000, 0x00000800),
270         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000000, 0x00000800),
271         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00000000, 0x00008000)
272 };
273
274 static const u32 GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[] =
275 {
276         mmRLC_SRM_INDEX_CNTL_ADDR_0 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
277         mmRLC_SRM_INDEX_CNTL_ADDR_1 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
278         mmRLC_SRM_INDEX_CNTL_ADDR_2 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
279         mmRLC_SRM_INDEX_CNTL_ADDR_3 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
280         mmRLC_SRM_INDEX_CNTL_ADDR_4 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
281         mmRLC_SRM_INDEX_CNTL_ADDR_5 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
282         mmRLC_SRM_INDEX_CNTL_ADDR_6 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
283         mmRLC_SRM_INDEX_CNTL_ADDR_7 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
284 };
285
286 static const u32 GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[] =
287 {
288         mmRLC_SRM_INDEX_CNTL_DATA_0 - mmRLC_SRM_INDEX_CNTL_DATA_0,
289         mmRLC_SRM_INDEX_CNTL_DATA_1 - mmRLC_SRM_INDEX_CNTL_DATA_0,
290         mmRLC_SRM_INDEX_CNTL_DATA_2 - mmRLC_SRM_INDEX_CNTL_DATA_0,
291         mmRLC_SRM_INDEX_CNTL_DATA_3 - mmRLC_SRM_INDEX_CNTL_DATA_0,
292         mmRLC_SRM_INDEX_CNTL_DATA_4 - mmRLC_SRM_INDEX_CNTL_DATA_0,
293         mmRLC_SRM_INDEX_CNTL_DATA_5 - mmRLC_SRM_INDEX_CNTL_DATA_0,
294         mmRLC_SRM_INDEX_CNTL_DATA_6 - mmRLC_SRM_INDEX_CNTL_DATA_0,
295         mmRLC_SRM_INDEX_CNTL_DATA_7 - mmRLC_SRM_INDEX_CNTL_DATA_0,
296 };
297
298 #define VEGA10_GB_ADDR_CONFIG_GOLDEN 0x2a114042
299 #define VEGA12_GB_ADDR_CONFIG_GOLDEN 0x24104041
300 #define RAVEN_GB_ADDR_CONFIG_GOLDEN 0x24000042
301 #define RAVEN2_GB_ADDR_CONFIG_GOLDEN 0x26013041
302
303 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev);
304 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev);
305 static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev);
306 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev);
307 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
308                                  struct amdgpu_cu_info *cu_info);
309 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev);
310 static void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num, u32 instance);
311 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring);
312
313 static void gfx_v9_0_init_golden_registers(struct amdgpu_device *adev)
314 {
315         switch (adev->asic_type) {
316         case CHIP_VEGA10:
317                 if (!amdgpu_virt_support_skip_setting(adev)) {
318                         soc15_program_register_sequence(adev,
319                                                          golden_settings_gc_9_0,
320                                                          ARRAY_SIZE(golden_settings_gc_9_0));
321                         soc15_program_register_sequence(adev,
322                                                          golden_settings_gc_9_0_vg10,
323                                                          ARRAY_SIZE(golden_settings_gc_9_0_vg10));
324                 }
325                 break;
326         case CHIP_VEGA12:
327                 soc15_program_register_sequence(adev,
328                                                 golden_settings_gc_9_2_1,
329                                                 ARRAY_SIZE(golden_settings_gc_9_2_1));
330                 soc15_program_register_sequence(adev,
331                                                 golden_settings_gc_9_2_1_vg12,
332                                                 ARRAY_SIZE(golden_settings_gc_9_2_1_vg12));
333                 break;
334         case CHIP_VEGA20:
335                 soc15_program_register_sequence(adev,
336                                                 golden_settings_gc_9_0,
337                                                 ARRAY_SIZE(golden_settings_gc_9_0));
338                 soc15_program_register_sequence(adev,
339                                                 golden_settings_gc_9_0_vg20,
340                                                 ARRAY_SIZE(golden_settings_gc_9_0_vg20));
341                 break;
342         case CHIP_RAVEN:
343                 soc15_program_register_sequence(adev, golden_settings_gc_9_1,
344                                                 ARRAY_SIZE(golden_settings_gc_9_1));
345                 if (adev->rev_id >= 8)
346                         soc15_program_register_sequence(adev,
347                                                         golden_settings_gc_9_1_rv2,
348                                                         ARRAY_SIZE(golden_settings_gc_9_1_rv2));
349                 else
350                         soc15_program_register_sequence(adev,
351                                                         golden_settings_gc_9_1_rv1,
352                                                         ARRAY_SIZE(golden_settings_gc_9_1_rv1));
353                 break;
354         default:
355                 break;
356         }
357
358         soc15_program_register_sequence(adev, golden_settings_gc_9_x_common,
359                                         (const u32)ARRAY_SIZE(golden_settings_gc_9_x_common));
360 }
361
362 static void gfx_v9_0_scratch_init(struct amdgpu_device *adev)
363 {
364         adev->gfx.scratch.num_reg = 8;
365         adev->gfx.scratch.reg_base = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0);
366         adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1;
367 }
368
369 static void gfx_v9_0_write_data_to_reg(struct amdgpu_ring *ring, int eng_sel,
370                                        bool wc, uint32_t reg, uint32_t val)
371 {
372         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
373         amdgpu_ring_write(ring, WRITE_DATA_ENGINE_SEL(eng_sel) |
374                                 WRITE_DATA_DST_SEL(0) |
375                                 (wc ? WR_CONFIRM : 0));
376         amdgpu_ring_write(ring, reg);
377         amdgpu_ring_write(ring, 0);
378         amdgpu_ring_write(ring, val);
379 }
380
381 static void gfx_v9_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel,
382                                   int mem_space, int opt, uint32_t addr0,
383                                   uint32_t addr1, uint32_t ref, uint32_t mask,
384                                   uint32_t inv)
385 {
386         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
387         amdgpu_ring_write(ring,
388                                  /* memory (1) or register (0) */
389                                  (WAIT_REG_MEM_MEM_SPACE(mem_space) |
390                                  WAIT_REG_MEM_OPERATION(opt) | /* wait */
391                                  WAIT_REG_MEM_FUNCTION(3) |  /* equal */
392                                  WAIT_REG_MEM_ENGINE(eng_sel)));
393
394         if (mem_space)
395                 BUG_ON(addr0 & 0x3); /* Dword align */
396         amdgpu_ring_write(ring, addr0);
397         amdgpu_ring_write(ring, addr1);
398         amdgpu_ring_write(ring, ref);
399         amdgpu_ring_write(ring, mask);
400         amdgpu_ring_write(ring, inv); /* poll interval */
401 }
402
403 static int gfx_v9_0_ring_test_ring(struct amdgpu_ring *ring)
404 {
405         struct amdgpu_device *adev = ring->adev;
406         uint32_t scratch;
407         uint32_t tmp = 0;
408         unsigned i;
409         int r;
410
411         r = amdgpu_gfx_scratch_get(adev, &scratch);
412         if (r)
413                 return r;
414
415         WREG32(scratch, 0xCAFEDEAD);
416         r = amdgpu_ring_alloc(ring, 3);
417         if (r)
418                 goto error_free_scratch;
419
420         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
421         amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
422         amdgpu_ring_write(ring, 0xDEADBEEF);
423         amdgpu_ring_commit(ring);
424
425         for (i = 0; i < adev->usec_timeout; i++) {
426                 tmp = RREG32(scratch);
427                 if (tmp == 0xDEADBEEF)
428                         break;
429                 udelay(1);
430         }
431
432         if (i >= adev->usec_timeout)
433                 r = -ETIMEDOUT;
434
435 error_free_scratch:
436         amdgpu_gfx_scratch_free(adev, scratch);
437         return r;
438 }
439
440 static int gfx_v9_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
441 {
442         struct amdgpu_device *adev = ring->adev;
443         struct amdgpu_ib ib;
444         struct dma_fence *f = NULL;
445
446         unsigned index;
447         uint64_t gpu_addr;
448         uint32_t tmp;
449         long r;
450
451         r = amdgpu_device_wb_get(adev, &index);
452         if (r)
453                 return r;
454
455         gpu_addr = adev->wb.gpu_addr + (index * 4);
456         adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
457         memset(&ib, 0, sizeof(ib));
458         r = amdgpu_ib_get(adev, NULL, 16, &ib);
459         if (r)
460                 goto err1;
461
462         ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3);
463         ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
464         ib.ptr[2] = lower_32_bits(gpu_addr);
465         ib.ptr[3] = upper_32_bits(gpu_addr);
466         ib.ptr[4] = 0xDEADBEEF;
467         ib.length_dw = 5;
468
469         r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
470         if (r)
471                 goto err2;
472
473         r = dma_fence_wait_timeout(f, false, timeout);
474         if (r == 0) {
475                 r = -ETIMEDOUT;
476                 goto err2;
477         } else if (r < 0) {
478                 goto err2;
479         }
480
481         tmp = adev->wb.wb[index];
482         if (tmp == 0xDEADBEEF)
483                 r = 0;
484         else
485                 r = -EINVAL;
486
487 err2:
488         amdgpu_ib_free(adev, &ib, NULL);
489         dma_fence_put(f);
490 err1:
491         amdgpu_device_wb_free(adev, index);
492         return r;
493 }
494
495
496 static void gfx_v9_0_free_microcode(struct amdgpu_device *adev)
497 {
498         release_firmware(adev->gfx.pfp_fw);
499         adev->gfx.pfp_fw = NULL;
500         release_firmware(adev->gfx.me_fw);
501         adev->gfx.me_fw = NULL;
502         release_firmware(adev->gfx.ce_fw);
503         adev->gfx.ce_fw = NULL;
504         release_firmware(adev->gfx.rlc_fw);
505         adev->gfx.rlc_fw = NULL;
506         release_firmware(adev->gfx.mec_fw);
507         adev->gfx.mec_fw = NULL;
508         release_firmware(adev->gfx.mec2_fw);
509         adev->gfx.mec2_fw = NULL;
510
511         kfree(adev->gfx.rlc.register_list_format);
512 }
513
514 static void gfx_v9_0_init_rlc_ext_microcode(struct amdgpu_device *adev)
515 {
516         const struct rlc_firmware_header_v2_1 *rlc_hdr;
517
518         rlc_hdr = (const struct rlc_firmware_header_v2_1 *)adev->gfx.rlc_fw->data;
519         adev->gfx.rlc_srlc_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_ucode_ver);
520         adev->gfx.rlc_srlc_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_feature_ver);
521         adev->gfx.rlc.save_restore_list_cntl_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_cntl_size_bytes);
522         adev->gfx.rlc.save_restore_list_cntl = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_cntl_offset_bytes);
523         adev->gfx.rlc_srlg_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_ucode_ver);
524         adev->gfx.rlc_srlg_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_feature_ver);
525         adev->gfx.rlc.save_restore_list_gpm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_gpm_size_bytes);
526         adev->gfx.rlc.save_restore_list_gpm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_gpm_offset_bytes);
527         adev->gfx.rlc_srls_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_ucode_ver);
528         adev->gfx.rlc_srls_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_feature_ver);
529         adev->gfx.rlc.save_restore_list_srm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_srm_size_bytes);
530         adev->gfx.rlc.save_restore_list_srm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_srm_offset_bytes);
531         adev->gfx.rlc.reg_list_format_direct_reg_list_length =
532                         le32_to_cpu(rlc_hdr->reg_list_format_direct_reg_list_length);
533 }
534
535 static void gfx_v9_0_check_fw_write_wait(struct amdgpu_device *adev)
536 {
537         adev->gfx.me_fw_write_wait = false;
538         adev->gfx.mec_fw_write_wait = false;
539
540         switch (adev->asic_type) {
541         case CHIP_VEGA10:
542                 if ((adev->gfx.me_fw_version >= 0x0000009c) &&
543                     (adev->gfx.me_feature_version >= 42) &&
544                     (adev->gfx.pfp_fw_version >=  0x000000b1) &&
545                     (adev->gfx.pfp_feature_version >= 42))
546                         adev->gfx.me_fw_write_wait = true;
547
548                 if ((adev->gfx.mec_fw_version >=  0x00000193) &&
549                     (adev->gfx.mec_feature_version >= 42))
550                         adev->gfx.mec_fw_write_wait = true;
551                 break;
552         case CHIP_VEGA12:
553                 if ((adev->gfx.me_fw_version >= 0x0000009c) &&
554                     (adev->gfx.me_feature_version >= 44) &&
555                     (adev->gfx.pfp_fw_version >=  0x000000b2) &&
556                     (adev->gfx.pfp_feature_version >= 44))
557                         adev->gfx.me_fw_write_wait = true;
558
559                 if ((adev->gfx.mec_fw_version >=  0x00000196) &&
560                     (adev->gfx.mec_feature_version >= 44))
561                         adev->gfx.mec_fw_write_wait = true;
562                 break;
563         case CHIP_VEGA20:
564                 if ((adev->gfx.me_fw_version >= 0x0000009c) &&
565                     (adev->gfx.me_feature_version >= 44) &&
566                     (adev->gfx.pfp_fw_version >=  0x000000b2) &&
567                     (adev->gfx.pfp_feature_version >= 44))
568                         adev->gfx.me_fw_write_wait = true;
569
570                 if ((adev->gfx.mec_fw_version >=  0x00000197) &&
571                     (adev->gfx.mec_feature_version >= 44))
572                         adev->gfx.mec_fw_write_wait = true;
573                 break;
574         case CHIP_RAVEN:
575                 if ((adev->gfx.me_fw_version >= 0x0000009c) &&
576                     (adev->gfx.me_feature_version >= 42) &&
577                     (adev->gfx.pfp_fw_version >=  0x000000b1) &&
578                     (adev->gfx.pfp_feature_version >= 42))
579                         adev->gfx.me_fw_write_wait = true;
580
581                 if ((adev->gfx.mec_fw_version >=  0x00000192) &&
582                     (adev->gfx.mec_feature_version >= 42))
583                         adev->gfx.mec_fw_write_wait = true;
584                 break;
585         default:
586                 break;
587         }
588 }
589
590 static void gfx_v9_0_check_if_need_gfxoff(struct amdgpu_device *adev)
591 {
592         switch (adev->asic_type) {
593         case CHIP_VEGA10:
594         case CHIP_VEGA12:
595         case CHIP_VEGA20:
596                 break;
597         case CHIP_RAVEN:
598                 if (adev->rev_id >= 0x8 || adev->pdev->device == 0x15d8)
599                         break;
600                 if ((adev->gfx.rlc_fw_version != 106 &&
601                      adev->gfx.rlc_fw_version < 531) ||
602                     (adev->gfx.rlc_fw_version == 53815) ||
603                     (adev->gfx.rlc_feature_version < 1) ||
604                     !adev->gfx.rlc.is_rlc_v2_1)
605                         adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
606                 break;
607         default:
608                 break;
609         }
610 }
611
612 static int gfx_v9_0_init_microcode(struct amdgpu_device *adev)
613 {
614         const char *chip_name;
615         char fw_name[30];
616         int err;
617         struct amdgpu_firmware_info *info = NULL;
618         const struct common_firmware_header *header = NULL;
619         const struct gfx_firmware_header_v1_0 *cp_hdr;
620         const struct rlc_firmware_header_v2_0 *rlc_hdr;
621         unsigned int *tmp = NULL;
622         unsigned int i = 0;
623         uint16_t version_major;
624         uint16_t version_minor;
625         uint32_t smu_version;
626
627         DRM_DEBUG("\n");
628
629         switch (adev->asic_type) {
630         case CHIP_VEGA10:
631                 chip_name = "vega10";
632                 break;
633         case CHIP_VEGA12:
634                 chip_name = "vega12";
635                 break;
636         case CHIP_VEGA20:
637                 chip_name = "vega20";
638                 break;
639         case CHIP_RAVEN:
640                 if (adev->rev_id >= 8)
641                         chip_name = "raven2";
642                 else if (adev->pdev->device == 0x15d8)
643                         chip_name = "picasso";
644                 else
645                         chip_name = "raven";
646                 break;
647         default:
648                 BUG();
649         }
650
651         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
652         err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
653         if (err)
654                 goto out;
655         err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
656         if (err)
657                 goto out;
658         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
659         adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
660         adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
661
662         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
663         err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
664         if (err)
665                 goto out;
666         err = amdgpu_ucode_validate(adev->gfx.me_fw);
667         if (err)
668                 goto out;
669         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
670         adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
671         adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
672
673         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
674         err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
675         if (err)
676                 goto out;
677         err = amdgpu_ucode_validate(adev->gfx.ce_fw);
678         if (err)
679                 goto out;
680         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
681         adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
682         adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
683
684         /*
685          * For Picasso && AM4 SOCKET board, we use picasso_rlc_am4.bin
686          * instead of picasso_rlc.bin.
687          * Judgment method:
688          * PCO AM4: revision >= 0xC8 && revision <= 0xCF
689          *          or revision >= 0xD8 && revision <= 0xDF
690          * otherwise is PCO FP5
691          */
692         if (!strcmp(chip_name, "picasso") &&
693                 (((adev->pdev->revision >= 0xC8) && (adev->pdev->revision <= 0xCF)) ||
694                 ((adev->pdev->revision >= 0xD8) && (adev->pdev->revision <= 0xDF))))
695                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc_am4.bin", chip_name);
696         else if (!strcmp(chip_name, "raven") && (amdgpu_pm_load_smu_firmware(adev, &smu_version) == 0) &&
697                 (smu_version >= 0x41e2b))
698                 /**
699                 *SMC is loaded by SBIOS on APU and it's able to get the SMU version directly.
700                 */
701                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_kicker_rlc.bin", chip_name);
702         else
703                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
704         err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
705         if (err)
706                 goto out;
707         err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
708         rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
709
710         version_major = le16_to_cpu(rlc_hdr->header.header_version_major);
711         version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor);
712         if (version_major == 2 && version_minor == 1)
713                 adev->gfx.rlc.is_rlc_v2_1 = true;
714
715         adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
716         adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
717         adev->gfx.rlc.save_and_restore_offset =
718                         le32_to_cpu(rlc_hdr->save_and_restore_offset);
719         adev->gfx.rlc.clear_state_descriptor_offset =
720                         le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
721         adev->gfx.rlc.avail_scratch_ram_locations =
722                         le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
723         adev->gfx.rlc.reg_restore_list_size =
724                         le32_to_cpu(rlc_hdr->reg_restore_list_size);
725         adev->gfx.rlc.reg_list_format_start =
726                         le32_to_cpu(rlc_hdr->reg_list_format_start);
727         adev->gfx.rlc.reg_list_format_separate_start =
728                         le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
729         adev->gfx.rlc.starting_offsets_start =
730                         le32_to_cpu(rlc_hdr->starting_offsets_start);
731         adev->gfx.rlc.reg_list_format_size_bytes =
732                         le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
733         adev->gfx.rlc.reg_list_size_bytes =
734                         le32_to_cpu(rlc_hdr->reg_list_size_bytes);
735         adev->gfx.rlc.register_list_format =
736                         kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
737                                 adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
738         if (!adev->gfx.rlc.register_list_format) {
739                 err = -ENOMEM;
740                 goto out;
741         }
742
743         tmp = (unsigned int *)((uintptr_t)rlc_hdr +
744                         le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
745         for (i = 0 ; i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2); i++)
746                 adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]);
747
748         adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
749
750         tmp = (unsigned int *)((uintptr_t)rlc_hdr +
751                         le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
752         for (i = 0 ; i < (adev->gfx.rlc.reg_list_size_bytes >> 2); i++)
753                 adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
754
755         if (adev->gfx.rlc.is_rlc_v2_1)
756                 gfx_v9_0_init_rlc_ext_microcode(adev);
757
758         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
759         err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
760         if (err)
761                 goto out;
762         err = amdgpu_ucode_validate(adev->gfx.mec_fw);
763         if (err)
764                 goto out;
765         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
766         adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
767         adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
768
769
770         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
771         err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
772         if (!err) {
773                 err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
774                 if (err)
775                         goto out;
776                 cp_hdr = (const struct gfx_firmware_header_v1_0 *)
777                 adev->gfx.mec2_fw->data;
778                 adev->gfx.mec2_fw_version =
779                 le32_to_cpu(cp_hdr->header.ucode_version);
780                 adev->gfx.mec2_feature_version =
781                 le32_to_cpu(cp_hdr->ucode_feature_version);
782         } else {
783                 err = 0;
784                 adev->gfx.mec2_fw = NULL;
785         }
786
787         if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
788                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
789                 info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
790                 info->fw = adev->gfx.pfp_fw;
791                 header = (const struct common_firmware_header *)info->fw->data;
792                 adev->firmware.fw_size +=
793                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
794
795                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
796                 info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
797                 info->fw = adev->gfx.me_fw;
798                 header = (const struct common_firmware_header *)info->fw->data;
799                 adev->firmware.fw_size +=
800                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
801
802                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
803                 info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
804                 info->fw = adev->gfx.ce_fw;
805                 header = (const struct common_firmware_header *)info->fw->data;
806                 adev->firmware.fw_size +=
807                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
808
809                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
810                 info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
811                 info->fw = adev->gfx.rlc_fw;
812                 header = (const struct common_firmware_header *)info->fw->data;
813                 adev->firmware.fw_size +=
814                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
815
816                 if (adev->gfx.rlc.is_rlc_v2_1 &&
817                     adev->gfx.rlc.save_restore_list_cntl_size_bytes &&
818                     adev->gfx.rlc.save_restore_list_gpm_size_bytes &&
819                     adev->gfx.rlc.save_restore_list_srm_size_bytes) {
820                         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL];
821                         info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL;
822                         info->fw = adev->gfx.rlc_fw;
823                         adev->firmware.fw_size +=
824                                 ALIGN(adev->gfx.rlc.save_restore_list_cntl_size_bytes, PAGE_SIZE);
825
826                         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM];
827                         info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM;
828                         info->fw = adev->gfx.rlc_fw;
829                         adev->firmware.fw_size +=
830                                 ALIGN(adev->gfx.rlc.save_restore_list_gpm_size_bytes, PAGE_SIZE);
831
832                         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM];
833                         info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM;
834                         info->fw = adev->gfx.rlc_fw;
835                         adev->firmware.fw_size +=
836                                 ALIGN(adev->gfx.rlc.save_restore_list_srm_size_bytes, PAGE_SIZE);
837                 }
838
839                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
840                 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
841                 info->fw = adev->gfx.mec_fw;
842                 header = (const struct common_firmware_header *)info->fw->data;
843                 cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data;
844                 adev->firmware.fw_size +=
845                         ALIGN(le32_to_cpu(header->ucode_size_bytes) - le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
846
847                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1_JT];
848                 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1_JT;
849                 info->fw = adev->gfx.mec_fw;
850                 adev->firmware.fw_size +=
851                         ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
852
853                 if (adev->gfx.mec2_fw) {
854                         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
855                         info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
856                         info->fw = adev->gfx.mec2_fw;
857                         header = (const struct common_firmware_header *)info->fw->data;
858                         cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data;
859                         adev->firmware.fw_size +=
860                                 ALIGN(le32_to_cpu(header->ucode_size_bytes) - le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
861                         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2_JT];
862                         info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2_JT;
863                         info->fw = adev->gfx.mec2_fw;
864                         adev->firmware.fw_size +=
865                                 ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
866                 }
867
868         }
869
870 out:
871         gfx_v9_0_check_if_need_gfxoff(adev);
872         gfx_v9_0_check_fw_write_wait(adev);
873         if (err) {
874                 dev_err(adev->dev,
875                         "gfx9: Failed to load firmware \"%s\"\n",
876                         fw_name);
877                 release_firmware(adev->gfx.pfp_fw);
878                 adev->gfx.pfp_fw = NULL;
879                 release_firmware(adev->gfx.me_fw);
880                 adev->gfx.me_fw = NULL;
881                 release_firmware(adev->gfx.ce_fw);
882                 adev->gfx.ce_fw = NULL;
883                 release_firmware(adev->gfx.rlc_fw);
884                 adev->gfx.rlc_fw = NULL;
885                 release_firmware(adev->gfx.mec_fw);
886                 adev->gfx.mec_fw = NULL;
887                 release_firmware(adev->gfx.mec2_fw);
888                 adev->gfx.mec2_fw = NULL;
889         }
890         return err;
891 }
892
893 static u32 gfx_v9_0_get_csb_size(struct amdgpu_device *adev)
894 {
895         u32 count = 0;
896         const struct cs_section_def *sect = NULL;
897         const struct cs_extent_def *ext = NULL;
898
899         /* begin clear state */
900         count += 2;
901         /* context control state */
902         count += 3;
903
904         for (sect = gfx9_cs_data; sect->section != NULL; ++sect) {
905                 for (ext = sect->section; ext->extent != NULL; ++ext) {
906                         if (sect->id == SECT_CONTEXT)
907                                 count += 2 + ext->reg_count;
908                         else
909                                 return 0;
910                 }
911         }
912
913         /* end clear state */
914         count += 2;
915         /* clear state */
916         count += 2;
917
918         return count;
919 }
920
921 static void gfx_v9_0_get_csb_buffer(struct amdgpu_device *adev,
922                                     volatile u32 *buffer)
923 {
924         u32 count = 0, i;
925         const struct cs_section_def *sect = NULL;
926         const struct cs_extent_def *ext = NULL;
927
928         if (adev->gfx.rlc.cs_data == NULL)
929                 return;
930         if (buffer == NULL)
931                 return;
932
933         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
934         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
935
936         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
937         buffer[count++] = cpu_to_le32(0x80000000);
938         buffer[count++] = cpu_to_le32(0x80000000);
939
940         for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
941                 for (ext = sect->section; ext->extent != NULL; ++ext) {
942                         if (sect->id == SECT_CONTEXT) {
943                                 buffer[count++] =
944                                         cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
945                                 buffer[count++] = cpu_to_le32(ext->reg_index -
946                                                 PACKET3_SET_CONTEXT_REG_START);
947                                 for (i = 0; i < ext->reg_count; i++)
948                                         buffer[count++] = cpu_to_le32(ext->extent[i]);
949                         } else {
950                                 return;
951                         }
952                 }
953         }
954
955         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
956         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
957
958         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
959         buffer[count++] = cpu_to_le32(0);
960 }
961
962 static void gfx_v9_0_init_always_on_cu_mask(struct amdgpu_device *adev)
963 {
964         struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
965         uint32_t pg_always_on_cu_num = 2;
966         uint32_t always_on_cu_num;
967         uint32_t i, j, k;
968         uint32_t mask, cu_bitmap, counter;
969
970         if (adev->flags & AMD_IS_APU)
971                 always_on_cu_num = 4;
972         else if (adev->asic_type == CHIP_VEGA12)
973                 always_on_cu_num = 8;
974         else
975                 always_on_cu_num = 12;
976
977         mutex_lock(&adev->grbm_idx_mutex);
978         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
979                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
980                         mask = 1;
981                         cu_bitmap = 0;
982                         counter = 0;
983                         gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
984
985                         for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
986                                 if (cu_info->bitmap[i][j] & mask) {
987                                         if (counter == pg_always_on_cu_num)
988                                                 WREG32_SOC15(GC, 0, mmRLC_PG_ALWAYS_ON_CU_MASK, cu_bitmap);
989                                         if (counter < always_on_cu_num)
990                                                 cu_bitmap |= mask;
991                                         else
992                                                 break;
993                                         counter++;
994                                 }
995                                 mask <<= 1;
996                         }
997
998                         WREG32_SOC15(GC, 0, mmRLC_LB_ALWAYS_ACTIVE_CU_MASK, cu_bitmap);
999                         cu_info->ao_cu_bitmap[i][j] = cu_bitmap;
1000                 }
1001         }
1002         gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1003         mutex_unlock(&adev->grbm_idx_mutex);
1004 }
1005
1006 static void gfx_v9_0_init_lbpw(struct amdgpu_device *adev)
1007 {
1008         uint32_t data;
1009
1010         /* set mmRLC_LB_THR_CONFIG_1/2/3/4 */
1011         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F);
1012         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x0333A5A7);
1013         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077);
1014         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x30 | 0x40 << 8 | 0x02FA << 16));
1015
1016         /* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */
1017         WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000);
1018
1019         /* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */
1020         WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000500);
1021
1022         mutex_lock(&adev->grbm_idx_mutex);
1023         /* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/
1024         gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1025         WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff);
1026
1027         /* set mmRLC_LB_PARAMS = 0x003F_1006 */
1028         data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003);
1029         data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010);
1030         data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F);
1031         WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data);
1032
1033         /* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */
1034         data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7);
1035         data &= 0x0000FFFF;
1036         data |= 0x00C00000;
1037         WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data);
1038
1039         /*
1040          * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xF (4 CUs AON for Raven),
1041          * programmed in gfx_v9_0_init_always_on_cu_mask()
1042          */
1043
1044         /* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved,
1045          * but used for RLC_LB_CNTL configuration */
1046         data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK;
1047         data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09);
1048         data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000);
1049         WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data);
1050         mutex_unlock(&adev->grbm_idx_mutex);
1051
1052         gfx_v9_0_init_always_on_cu_mask(adev);
1053 }
1054
1055 static void gfx_v9_4_init_lbpw(struct amdgpu_device *adev)
1056 {
1057         uint32_t data;
1058
1059         /* set mmRLC_LB_THR_CONFIG_1/2/3/4 */
1060         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F);
1061         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x033388F8);
1062         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077);
1063         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x10 | 0x27 << 8 | 0x02FA << 16));
1064
1065         /* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */
1066         WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000);
1067
1068         /* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */
1069         WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000800);
1070
1071         mutex_lock(&adev->grbm_idx_mutex);
1072         /* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/
1073         gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1074         WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff);
1075
1076         /* set mmRLC_LB_PARAMS = 0x003F_1006 */
1077         data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003);
1078         data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010);
1079         data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F);
1080         WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data);
1081
1082         /* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */
1083         data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7);
1084         data &= 0x0000FFFF;
1085         data |= 0x00C00000;
1086         WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data);
1087
1088         /*
1089          * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xFFF (12 CUs AON),
1090          * programmed in gfx_v9_0_init_always_on_cu_mask()
1091          */
1092
1093         /* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved,
1094          * but used for RLC_LB_CNTL configuration */
1095         data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK;
1096         data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09);
1097         data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000);
1098         WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data);
1099         mutex_unlock(&adev->grbm_idx_mutex);
1100
1101         gfx_v9_0_init_always_on_cu_mask(adev);
1102 }
1103
1104 static void gfx_v9_0_enable_lbpw(struct amdgpu_device *adev, bool enable)
1105 {
1106         WREG32_FIELD15(GC, 0, RLC_LB_CNTL, LOAD_BALANCE_ENABLE, enable ? 1 : 0);
1107 }
1108
1109 static int gfx_v9_0_cp_jump_table_num(struct amdgpu_device *adev)
1110 {
1111         return 5;
1112 }
1113
1114 static int gfx_v9_0_rlc_init(struct amdgpu_device *adev)
1115 {
1116         const struct cs_section_def *cs_data;
1117         int r;
1118
1119         adev->gfx.rlc.cs_data = gfx9_cs_data;
1120
1121         cs_data = adev->gfx.rlc.cs_data;
1122
1123         if (cs_data) {
1124                 /* init clear state block */
1125                 r = amdgpu_gfx_rlc_init_csb(adev);
1126                 if (r)
1127                         return r;
1128         }
1129
1130         if (adev->asic_type == CHIP_RAVEN) {
1131                 /* TODO: double check the cp_table_size for RV */
1132                 adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
1133                 r = amdgpu_gfx_rlc_init_cpt(adev);
1134                 if (r)
1135                         return r;
1136         }
1137
1138         switch (adev->asic_type) {
1139         case CHIP_RAVEN:
1140                 gfx_v9_0_init_lbpw(adev);
1141                 break;
1142         case CHIP_VEGA20:
1143                 gfx_v9_4_init_lbpw(adev);
1144                 break;
1145         default:
1146                 break;
1147         }
1148
1149         return 0;
1150 }
1151
1152 static int gfx_v9_0_csb_vram_pin(struct amdgpu_device *adev)
1153 {
1154         int r;
1155
1156         r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false);
1157         if (unlikely(r != 0))
1158                 return r;
1159
1160         r = amdgpu_bo_pin(adev->gfx.rlc.clear_state_obj,
1161                         AMDGPU_GEM_DOMAIN_VRAM);
1162         if (!r)
1163                 adev->gfx.rlc.clear_state_gpu_addr =
1164                         amdgpu_bo_gpu_offset(adev->gfx.rlc.clear_state_obj);
1165
1166         amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1167
1168         return r;
1169 }
1170
1171 static void gfx_v9_0_csb_vram_unpin(struct amdgpu_device *adev)
1172 {
1173         int r;
1174
1175         if (!adev->gfx.rlc.clear_state_obj)
1176                 return;
1177
1178         r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, true);
1179         if (likely(r == 0)) {
1180                 amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj);
1181                 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1182         }
1183 }
1184
1185 static void gfx_v9_0_mec_fini(struct amdgpu_device *adev)
1186 {
1187         amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
1188         amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_obj, NULL, NULL);
1189 }
1190
1191 static int gfx_v9_0_mec_init(struct amdgpu_device *adev)
1192 {
1193         int r;
1194         u32 *hpd;
1195         const __le32 *fw_data;
1196         unsigned fw_size;
1197         u32 *fw;
1198         size_t mec_hpd_size;
1199
1200         const struct gfx_firmware_header_v1_0 *mec_hdr;
1201
1202         bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
1203
1204         /* take ownership of the relevant compute queues */
1205         amdgpu_gfx_compute_queue_acquire(adev);
1206         mec_hpd_size = adev->gfx.num_compute_rings * GFX9_MEC_HPD_SIZE;
1207
1208         r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
1209                                       AMDGPU_GEM_DOMAIN_VRAM,
1210                                       &adev->gfx.mec.hpd_eop_obj,
1211                                       &adev->gfx.mec.hpd_eop_gpu_addr,
1212                                       (void **)&hpd);
1213         if (r) {
1214                 dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
1215                 gfx_v9_0_mec_fini(adev);
1216                 return r;
1217         }
1218
1219         memset(hpd, 0, adev->gfx.mec.hpd_eop_obj->tbo.mem.size);
1220
1221         amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
1222         amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1223
1224         mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1225
1226         fw_data = (const __le32 *)
1227                 (adev->gfx.mec_fw->data +
1228                  le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
1229         fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
1230
1231         r = amdgpu_bo_create_reserved(adev, mec_hdr->header.ucode_size_bytes,
1232                                       PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
1233                                       &adev->gfx.mec.mec_fw_obj,
1234                                       &adev->gfx.mec.mec_fw_gpu_addr,
1235                                       (void **)&fw);
1236         if (r) {
1237                 dev_warn(adev->dev, "(%d) create mec firmware bo failed\n", r);
1238                 gfx_v9_0_mec_fini(adev);
1239                 return r;
1240         }
1241
1242         memcpy(fw, fw_data, fw_size);
1243
1244         amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj);
1245         amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj);
1246
1247         return 0;
1248 }
1249
1250 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
1251 {
1252         WREG32_SOC15(GC, 0, mmSQ_IND_INDEX,
1253                 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
1254                 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
1255                 (address << SQ_IND_INDEX__INDEX__SHIFT) |
1256                 (SQ_IND_INDEX__FORCE_READ_MASK));
1257         return RREG32_SOC15(GC, 0, mmSQ_IND_DATA);
1258 }
1259
1260 static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
1261                            uint32_t wave, uint32_t thread,
1262                            uint32_t regno, uint32_t num, uint32_t *out)
1263 {
1264         WREG32_SOC15(GC, 0, mmSQ_IND_INDEX,
1265                 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
1266                 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
1267                 (regno << SQ_IND_INDEX__INDEX__SHIFT) |
1268                 (thread << SQ_IND_INDEX__THREAD_ID__SHIFT) |
1269                 (SQ_IND_INDEX__FORCE_READ_MASK) |
1270                 (SQ_IND_INDEX__AUTO_INCR_MASK));
1271         while (num--)
1272                 *(out++) = RREG32_SOC15(GC, 0, mmSQ_IND_DATA);
1273 }
1274
1275 static void gfx_v9_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
1276 {
1277         /* type 1 wave data */
1278         dst[(*no_fields)++] = 1;
1279         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS);
1280         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO);
1281         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI);
1282         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO);
1283         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI);
1284         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID);
1285         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0);
1286         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1);
1287         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC);
1288         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC);
1289         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS);
1290         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS);
1291         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0);
1292         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0);
1293 }
1294
1295 static void gfx_v9_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
1296                                      uint32_t wave, uint32_t start,
1297                                      uint32_t size, uint32_t *dst)
1298 {
1299         wave_read_regs(
1300                 adev, simd, wave, 0,
1301                 start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
1302 }
1303
1304 static void gfx_v9_0_read_wave_vgprs(struct amdgpu_device *adev, uint32_t simd,
1305                                      uint32_t wave, uint32_t thread,
1306                                      uint32_t start, uint32_t size,
1307                                      uint32_t *dst)
1308 {
1309         wave_read_regs(
1310                 adev, simd, wave, thread,
1311                 start + SQIND_WAVE_VGPRS_OFFSET, size, dst);
1312 }
1313
1314 static void gfx_v9_0_select_me_pipe_q(struct amdgpu_device *adev,
1315                                   u32 me, u32 pipe, u32 q)
1316 {
1317         soc15_grbm_select(adev, me, pipe, q, 0);
1318 }
1319
1320 static const struct amdgpu_gfx_funcs gfx_v9_0_gfx_funcs = {
1321         .get_gpu_clock_counter = &gfx_v9_0_get_gpu_clock_counter,
1322         .select_se_sh = &gfx_v9_0_select_se_sh,
1323         .read_wave_data = &gfx_v9_0_read_wave_data,
1324         .read_wave_sgprs = &gfx_v9_0_read_wave_sgprs,
1325         .read_wave_vgprs = &gfx_v9_0_read_wave_vgprs,
1326         .select_me_pipe_q = &gfx_v9_0_select_me_pipe_q
1327 };
1328
1329 static int gfx_v9_0_gpu_early_init(struct amdgpu_device *adev)
1330 {
1331         u32 gb_addr_config;
1332         int err;
1333
1334         adev->gfx.funcs = &gfx_v9_0_gfx_funcs;
1335
1336         switch (adev->asic_type) {
1337         case CHIP_VEGA10:
1338                 adev->gfx.config.max_hw_contexts = 8;
1339                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1340                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1341                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1342                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1343                 gb_addr_config = VEGA10_GB_ADDR_CONFIG_GOLDEN;
1344                 break;
1345         case CHIP_VEGA12:
1346                 adev->gfx.config.max_hw_contexts = 8;
1347                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1348                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1349                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1350                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1351                 gb_addr_config = VEGA12_GB_ADDR_CONFIG_GOLDEN;
1352                 DRM_INFO("fix gfx.config for vega12\n");
1353                 break;
1354         case CHIP_VEGA20:
1355                 adev->gfx.config.max_hw_contexts = 8;
1356                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1357                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1358                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1359                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1360                 gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
1361                 gb_addr_config &= ~0xf3e777ff;
1362                 gb_addr_config |= 0x22014042;
1363                 /* check vbios table if gpu info is not available */
1364                 err = amdgpu_atomfirmware_get_gfx_info(adev);
1365                 if (err)
1366                         return err;
1367                 break;
1368         case CHIP_RAVEN:
1369                 adev->gfx.config.max_hw_contexts = 8;
1370                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1371                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1372                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1373                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1374                 if (adev->rev_id >= 8)
1375                         gb_addr_config = RAVEN2_GB_ADDR_CONFIG_GOLDEN;
1376                 else
1377                         gb_addr_config = RAVEN_GB_ADDR_CONFIG_GOLDEN;
1378                 break;
1379         default:
1380                 BUG();
1381                 break;
1382         }
1383
1384         adev->gfx.config.gb_addr_config = gb_addr_config;
1385
1386         adev->gfx.config.gb_addr_config_fields.num_pipes = 1 <<
1387                         REG_GET_FIELD(
1388                                         adev->gfx.config.gb_addr_config,
1389                                         GB_ADDR_CONFIG,
1390                                         NUM_PIPES);
1391
1392         adev->gfx.config.max_tile_pipes =
1393                 adev->gfx.config.gb_addr_config_fields.num_pipes;
1394
1395         adev->gfx.config.gb_addr_config_fields.num_banks = 1 <<
1396                         REG_GET_FIELD(
1397                                         adev->gfx.config.gb_addr_config,
1398                                         GB_ADDR_CONFIG,
1399                                         NUM_BANKS);
1400         adev->gfx.config.gb_addr_config_fields.max_compress_frags = 1 <<
1401                         REG_GET_FIELD(
1402                                         adev->gfx.config.gb_addr_config,
1403                                         GB_ADDR_CONFIG,
1404                                         MAX_COMPRESSED_FRAGS);
1405         adev->gfx.config.gb_addr_config_fields.num_rb_per_se = 1 <<
1406                         REG_GET_FIELD(
1407                                         adev->gfx.config.gb_addr_config,
1408                                         GB_ADDR_CONFIG,
1409                                         NUM_RB_PER_SE);
1410         adev->gfx.config.gb_addr_config_fields.num_se = 1 <<
1411                         REG_GET_FIELD(
1412                                         adev->gfx.config.gb_addr_config,
1413                                         GB_ADDR_CONFIG,
1414                                         NUM_SHADER_ENGINES);
1415         adev->gfx.config.gb_addr_config_fields.pipe_interleave_size = 1 << (8 +
1416                         REG_GET_FIELD(
1417                                         adev->gfx.config.gb_addr_config,
1418                                         GB_ADDR_CONFIG,
1419                                         PIPE_INTERLEAVE_SIZE));
1420
1421         return 0;
1422 }
1423
1424 static int gfx_v9_0_ngg_create_buf(struct amdgpu_device *adev,
1425                                    struct amdgpu_ngg_buf *ngg_buf,
1426                                    int size_se,
1427                                    int default_size_se)
1428 {
1429         int r;
1430
1431         if (size_se < 0) {
1432                 dev_err(adev->dev, "Buffer size is invalid: %d\n", size_se);
1433                 return -EINVAL;
1434         }
1435         size_se = size_se ? size_se : default_size_se;
1436
1437         ngg_buf->size = size_se * adev->gfx.config.max_shader_engines;
1438         r = amdgpu_bo_create_kernel(adev, ngg_buf->size,
1439                                     PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM,
1440                                     &ngg_buf->bo,
1441                                     &ngg_buf->gpu_addr,
1442                                     NULL);
1443         if (r) {
1444                 dev_err(adev->dev, "(%d) failed to create NGG buffer\n", r);
1445                 return r;
1446         }
1447         ngg_buf->bo_size = amdgpu_bo_size(ngg_buf->bo);
1448
1449         return r;
1450 }
1451
1452 static int gfx_v9_0_ngg_fini(struct amdgpu_device *adev)
1453 {
1454         int i;
1455
1456         for (i = 0; i < NGG_BUF_MAX; i++)
1457                 amdgpu_bo_free_kernel(&adev->gfx.ngg.buf[i].bo,
1458                                       &adev->gfx.ngg.buf[i].gpu_addr,
1459                                       NULL);
1460
1461         memset(&adev->gfx.ngg.buf[0], 0,
1462                         sizeof(struct amdgpu_ngg_buf) * NGG_BUF_MAX);
1463
1464         adev->gfx.ngg.init = false;
1465
1466         return 0;
1467 }
1468
1469 static int gfx_v9_0_ngg_init(struct amdgpu_device *adev)
1470 {
1471         int r;
1472
1473         if (!amdgpu_ngg || adev->gfx.ngg.init == true)
1474                 return 0;
1475
1476         /* GDS reserve memory: 64 bytes alignment */
1477         adev->gfx.ngg.gds_reserve_size = ALIGN(5 * 4, 0x40);
1478         adev->gds.gds_size -= adev->gfx.ngg.gds_reserve_size;
1479         adev->gfx.ngg.gds_reserve_addr = RREG32_SOC15(GC, 0, mmGDS_VMID0_BASE);
1480         adev->gfx.ngg.gds_reserve_addr += RREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE);
1481
1482         /* Primitive Buffer */
1483         r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_PRIM],
1484                                     amdgpu_prim_buf_per_se,
1485                                     64 * 1024);
1486         if (r) {
1487                 dev_err(adev->dev, "Failed to create Primitive Buffer\n");
1488                 goto err;
1489         }
1490
1491         /* Position Buffer */
1492         r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_POS],
1493                                     amdgpu_pos_buf_per_se,
1494                                     256 * 1024);
1495         if (r) {
1496                 dev_err(adev->dev, "Failed to create Position Buffer\n");
1497                 goto err;
1498         }
1499
1500         /* Control Sideband */
1501         r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_CNTL],
1502                                     amdgpu_cntl_sb_buf_per_se,
1503                                     256);
1504         if (r) {
1505                 dev_err(adev->dev, "Failed to create Control Sideband Buffer\n");
1506                 goto err;
1507         }
1508
1509         /* Parameter Cache, not created by default */
1510         if (amdgpu_param_buf_per_se <= 0)
1511                 goto out;
1512
1513         r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_PARAM],
1514                                     amdgpu_param_buf_per_se,
1515                                     512 * 1024);
1516         if (r) {
1517                 dev_err(adev->dev, "Failed to create Parameter Cache\n");
1518                 goto err;
1519         }
1520
1521 out:
1522         adev->gfx.ngg.init = true;
1523         return 0;
1524 err:
1525         gfx_v9_0_ngg_fini(adev);
1526         return r;
1527 }
1528
1529 static int gfx_v9_0_ngg_en(struct amdgpu_device *adev)
1530 {
1531         struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
1532         int r;
1533         u32 data, base;
1534
1535         if (!amdgpu_ngg)
1536                 return 0;
1537
1538         /* Program buffer size */
1539         data = REG_SET_FIELD(0, WD_BUF_RESOURCE_1, INDEX_BUF_SIZE,
1540                              adev->gfx.ngg.buf[NGG_PRIM].size >> 8);
1541         data = REG_SET_FIELD(data, WD_BUF_RESOURCE_1, POS_BUF_SIZE,
1542                              adev->gfx.ngg.buf[NGG_POS].size >> 8);
1543         WREG32_SOC15(GC, 0, mmWD_BUF_RESOURCE_1, data);
1544
1545         data = REG_SET_FIELD(0, WD_BUF_RESOURCE_2, CNTL_SB_BUF_SIZE,
1546                              adev->gfx.ngg.buf[NGG_CNTL].size >> 8);
1547         data = REG_SET_FIELD(data, WD_BUF_RESOURCE_2, PARAM_BUF_SIZE,
1548                              adev->gfx.ngg.buf[NGG_PARAM].size >> 10);
1549         WREG32_SOC15(GC, 0, mmWD_BUF_RESOURCE_2, data);
1550
1551         /* Program buffer base address */
1552         base = lower_32_bits(adev->gfx.ngg.buf[NGG_PRIM].gpu_addr);
1553         data = REG_SET_FIELD(0, WD_INDEX_BUF_BASE, BASE, base);
1554         WREG32_SOC15(GC, 0, mmWD_INDEX_BUF_BASE, data);
1555
1556         base = upper_32_bits(adev->gfx.ngg.buf[NGG_PRIM].gpu_addr);
1557         data = REG_SET_FIELD(0, WD_INDEX_BUF_BASE_HI, BASE_HI, base);
1558         WREG32_SOC15(GC, 0, mmWD_INDEX_BUF_BASE_HI, data);
1559
1560         base = lower_32_bits(adev->gfx.ngg.buf[NGG_POS].gpu_addr);
1561         data = REG_SET_FIELD(0, WD_POS_BUF_BASE, BASE, base);
1562         WREG32_SOC15(GC, 0, mmWD_POS_BUF_BASE, data);
1563
1564         base = upper_32_bits(adev->gfx.ngg.buf[NGG_POS].gpu_addr);
1565         data = REG_SET_FIELD(0, WD_POS_BUF_BASE_HI, BASE_HI, base);
1566         WREG32_SOC15(GC, 0, mmWD_POS_BUF_BASE_HI, data);
1567
1568         base = lower_32_bits(adev->gfx.ngg.buf[NGG_CNTL].gpu_addr);
1569         data = REG_SET_FIELD(0, WD_CNTL_SB_BUF_BASE, BASE, base);
1570         WREG32_SOC15(GC, 0, mmWD_CNTL_SB_BUF_BASE, data);
1571
1572         base = upper_32_bits(adev->gfx.ngg.buf[NGG_CNTL].gpu_addr);
1573         data = REG_SET_FIELD(0, WD_CNTL_SB_BUF_BASE_HI, BASE_HI, base);
1574         WREG32_SOC15(GC, 0, mmWD_CNTL_SB_BUF_BASE_HI, data);
1575
1576         /* Clear GDS reserved memory */
1577         r = amdgpu_ring_alloc(ring, 17);
1578         if (r) {
1579                 DRM_ERROR("amdgpu: NGG failed to lock ring %s (%d).\n",
1580                           ring->name, r);
1581                 return r;
1582         }
1583
1584         gfx_v9_0_write_data_to_reg(ring, 0, false,
1585                                    SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE),
1586                                    (adev->gds.gds_size +
1587                                     adev->gfx.ngg.gds_reserve_size));
1588
1589         amdgpu_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
1590         amdgpu_ring_write(ring, (PACKET3_DMA_DATA_CP_SYNC |
1591                                 PACKET3_DMA_DATA_DST_SEL(1) |
1592                                 PACKET3_DMA_DATA_SRC_SEL(2)));
1593         amdgpu_ring_write(ring, 0);
1594         amdgpu_ring_write(ring, 0);
1595         amdgpu_ring_write(ring, adev->gfx.ngg.gds_reserve_addr);
1596         amdgpu_ring_write(ring, 0);
1597         amdgpu_ring_write(ring, PACKET3_DMA_DATA_CMD_RAW_WAIT |
1598                                 adev->gfx.ngg.gds_reserve_size);
1599
1600         gfx_v9_0_write_data_to_reg(ring, 0, false,
1601                                    SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE), 0);
1602
1603         amdgpu_ring_commit(ring);
1604
1605         return 0;
1606 }
1607
1608 static int gfx_v9_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
1609                                       int mec, int pipe, int queue)
1610 {
1611         int r;
1612         unsigned irq_type;
1613         struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
1614
1615         ring = &adev->gfx.compute_ring[ring_id];
1616
1617         /* mec0 is me1 */
1618         ring->me = mec + 1;
1619         ring->pipe = pipe;
1620         ring->queue = queue;
1621
1622         ring->ring_obj = NULL;
1623         ring->use_doorbell = true;
1624         ring->doorbell_index = (adev->doorbell_index.mec_ring0 + ring_id) << 1;
1625         ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
1626                                 + (ring_id * GFX9_MEC_HPD_SIZE);
1627         sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
1628
1629         irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
1630                 + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
1631                 + ring->pipe;
1632
1633         /* type-2 packets are deprecated on MEC, use type-3 instead */
1634         r = amdgpu_ring_init(adev, ring, 1024,
1635                              &adev->gfx.eop_irq, irq_type);
1636         if (r)
1637                 return r;
1638
1639
1640         return 0;
1641 }
1642
1643 static int gfx_v9_0_sw_init(void *handle)
1644 {
1645         int i, j, k, r, ring_id;
1646         struct amdgpu_ring *ring;
1647         struct amdgpu_kiq *kiq;
1648         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1649
1650         switch (adev->asic_type) {
1651         case CHIP_VEGA10:
1652         case CHIP_VEGA12:
1653         case CHIP_VEGA20:
1654         case CHIP_RAVEN:
1655                 adev->gfx.mec.num_mec = 2;
1656                 break;
1657         default:
1658                 adev->gfx.mec.num_mec = 1;
1659                 break;
1660         }
1661
1662         adev->gfx.mec.num_pipe_per_mec = 4;
1663         adev->gfx.mec.num_queue_per_pipe = 8;
1664
1665         /* EOP Event */
1666         r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_EOP_INTERRUPT, &adev->gfx.eop_irq);
1667         if (r)
1668                 return r;
1669
1670         /* Privileged reg */
1671         r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_REG_FAULT,
1672                               &adev->gfx.priv_reg_irq);
1673         if (r)
1674                 return r;
1675
1676         /* Privileged inst */
1677         r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_INSTR_FAULT,
1678                               &adev->gfx.priv_inst_irq);
1679         if (r)
1680                 return r;
1681
1682         /* ECC error */
1683         r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_ECC_ERROR,
1684                               &adev->gfx.cp_ecc_error_irq);
1685         if (r)
1686                 return r;
1687
1688         /* FUE error */
1689         r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_FUE_ERROR,
1690                               &adev->gfx.cp_ecc_error_irq);
1691         if (r)
1692                 return r;
1693
1694         adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
1695
1696         gfx_v9_0_scratch_init(adev);
1697
1698         r = gfx_v9_0_init_microcode(adev);
1699         if (r) {
1700                 DRM_ERROR("Failed to load gfx firmware!\n");
1701                 return r;
1702         }
1703
1704         r = adev->gfx.rlc.funcs->init(adev);
1705         if (r) {
1706                 DRM_ERROR("Failed to init rlc BOs!\n");
1707                 return r;
1708         }
1709
1710         r = gfx_v9_0_mec_init(adev);
1711         if (r) {
1712                 DRM_ERROR("Failed to init MEC BOs!\n");
1713                 return r;
1714         }
1715
1716         /* set up the gfx ring */
1717         for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
1718                 ring = &adev->gfx.gfx_ring[i];
1719                 ring->ring_obj = NULL;
1720                 if (!i)
1721                         sprintf(ring->name, "gfx");
1722                 else
1723                         sprintf(ring->name, "gfx_%d", i);
1724                 ring->use_doorbell = true;
1725                 ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1;
1726                 r = amdgpu_ring_init(adev, ring, 1024,
1727                                      &adev->gfx.eop_irq, AMDGPU_CP_IRQ_GFX_EOP);
1728                 if (r)
1729                         return r;
1730         }
1731
1732         /* set up the compute queues - allocate horizontally across pipes */
1733         ring_id = 0;
1734         for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
1735                 for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
1736                         for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
1737                                 if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, j))
1738                                         continue;
1739
1740                                 r = gfx_v9_0_compute_ring_init(adev,
1741                                                                ring_id,
1742                                                                i, k, j);
1743                                 if (r)
1744                                         return r;
1745
1746                                 ring_id++;
1747                         }
1748                 }
1749         }
1750
1751         r = amdgpu_gfx_kiq_init(adev, GFX9_MEC_HPD_SIZE);
1752         if (r) {
1753                 DRM_ERROR("Failed to init KIQ BOs!\n");
1754                 return r;
1755         }
1756
1757         kiq = &adev->gfx.kiq;
1758         r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
1759         if (r)
1760                 return r;
1761
1762         /* create MQD for all compute queues as wel as KIQ for SRIOV case */
1763         r = amdgpu_gfx_compute_mqd_sw_init(adev, sizeof(struct v9_mqd_allocation));
1764         if (r)
1765                 return r;
1766
1767         adev->gfx.ce_ram_size = 0x8000;
1768
1769         r = gfx_v9_0_gpu_early_init(adev);
1770         if (r)
1771                 return r;
1772
1773         r = gfx_v9_0_ngg_init(adev);
1774         if (r)
1775                 return r;
1776
1777         return 0;
1778 }
1779
1780
1781 static int gfx_v9_0_sw_fini(void *handle)
1782 {
1783         int i;
1784         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1785
1786         if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX) &&
1787                         adev->gfx.ras_if) {
1788                 struct ras_common_if *ras_if = adev->gfx.ras_if;
1789                 struct ras_ih_if ih_info = {
1790                         .head = *ras_if,
1791                 };
1792
1793                 amdgpu_ras_debugfs_remove(adev, ras_if);
1794                 amdgpu_ras_sysfs_remove(adev, ras_if);
1795                 amdgpu_ras_interrupt_remove_handler(adev,  &ih_info);
1796                 amdgpu_ras_feature_enable(adev, ras_if, 0);
1797                 kfree(ras_if);
1798         }
1799
1800         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
1801                 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
1802         for (i = 0; i < adev->gfx.num_compute_rings; i++)
1803                 amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
1804
1805         amdgpu_gfx_compute_mqd_sw_fini(adev);
1806         amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq);
1807         amdgpu_gfx_kiq_fini(adev);
1808
1809         gfx_v9_0_mec_fini(adev);
1810         gfx_v9_0_ngg_fini(adev);
1811         amdgpu_bo_unref(&adev->gfx.rlc.clear_state_obj);
1812         if (adev->asic_type == CHIP_RAVEN) {
1813                 amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj,
1814                                 &adev->gfx.rlc.cp_table_gpu_addr,
1815                                 (void **)&adev->gfx.rlc.cp_table_ptr);
1816         }
1817         gfx_v9_0_free_microcode(adev);
1818
1819         return 0;
1820 }
1821
1822
1823 static void gfx_v9_0_tiling_mode_table_init(struct amdgpu_device *adev)
1824 {
1825         /* TODO */
1826 }
1827
1828 static void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num, u32 instance)
1829 {
1830         u32 data;
1831
1832         if (instance == 0xffffffff)
1833                 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
1834         else
1835                 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
1836
1837         if (se_num == 0xffffffff)
1838                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
1839         else
1840                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
1841
1842         if (sh_num == 0xffffffff)
1843                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
1844         else
1845                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
1846
1847         WREG32_SOC15_RLC_SHADOW(GC, 0, mmGRBM_GFX_INDEX, data);
1848 }
1849
1850 static u32 gfx_v9_0_get_rb_active_bitmap(struct amdgpu_device *adev)
1851 {
1852         u32 data, mask;
1853
1854         data = RREG32_SOC15(GC, 0, mmCC_RB_BACKEND_DISABLE);
1855         data |= RREG32_SOC15(GC, 0, mmGC_USER_RB_BACKEND_DISABLE);
1856
1857         data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK;
1858         data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT;
1859
1860         mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se /
1861                                          adev->gfx.config.max_sh_per_se);
1862
1863         return (~data) & mask;
1864 }
1865
1866 static void gfx_v9_0_setup_rb(struct amdgpu_device *adev)
1867 {
1868         int i, j;
1869         u32 data;
1870         u32 active_rbs = 0;
1871         u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
1872                                         adev->gfx.config.max_sh_per_se;
1873
1874         mutex_lock(&adev->grbm_idx_mutex);
1875         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
1876                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
1877                         gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
1878                         data = gfx_v9_0_get_rb_active_bitmap(adev);
1879                         active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
1880                                                rb_bitmap_width_per_sh);
1881                 }
1882         }
1883         gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1884         mutex_unlock(&adev->grbm_idx_mutex);
1885
1886         adev->gfx.config.backend_enable_mask = active_rbs;
1887         adev->gfx.config.num_rbs = hweight32(active_rbs);
1888 }
1889
1890 #define DEFAULT_SH_MEM_BASES    (0x6000)
1891 #define FIRST_COMPUTE_VMID      (8)
1892 #define LAST_COMPUTE_VMID       (16)
1893 static void gfx_v9_0_init_compute_vmid(struct amdgpu_device *adev)
1894 {
1895         int i;
1896         uint32_t sh_mem_config;
1897         uint32_t sh_mem_bases;
1898
1899         /*
1900          * Configure apertures:
1901          * LDS:         0x60000000'00000000 - 0x60000001'00000000 (4GB)
1902          * Scratch:     0x60000001'00000000 - 0x60000002'00000000 (4GB)
1903          * GPUVM:       0x60010000'00000000 - 0x60020000'00000000 (1TB)
1904          */
1905         sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
1906
1907         sh_mem_config = SH_MEM_ADDRESS_MODE_64 |
1908                         SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
1909                         SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT;
1910
1911         mutex_lock(&adev->srbm_mutex);
1912         for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
1913                 soc15_grbm_select(adev, 0, 0, 0, i);
1914                 /* CP and shaders */
1915                 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, sh_mem_config);
1916                 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, sh_mem_bases);
1917         }
1918         soc15_grbm_select(adev, 0, 0, 0, 0);
1919         mutex_unlock(&adev->srbm_mutex);
1920 }
1921
1922 static void gfx_v9_0_constants_init(struct amdgpu_device *adev)
1923 {
1924         u32 tmp;
1925         int i;
1926
1927         WREG32_FIELD15_RLC(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff);
1928
1929         gfx_v9_0_tiling_mode_table_init(adev);
1930
1931         gfx_v9_0_setup_rb(adev);
1932         gfx_v9_0_get_cu_info(adev, &adev->gfx.cu_info);
1933         adev->gfx.config.db_debug2 = RREG32_SOC15(GC, 0, mmDB_DEBUG2);
1934
1935         /* XXX SH_MEM regs */
1936         /* where to put LDS, scratch, GPUVM in FSA64 space */
1937         mutex_lock(&adev->srbm_mutex);
1938         for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB].num_ids; i++) {
1939                 soc15_grbm_select(adev, 0, 0, 0, i);
1940                 /* CP and shaders */
1941                 if (i == 0) {
1942                         tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
1943                                             SH_MEM_ALIGNMENT_MODE_UNALIGNED);
1944                         WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp);
1945                         WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, 0);
1946                 } else {
1947                         tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
1948                                             SH_MEM_ALIGNMENT_MODE_UNALIGNED);
1949                         WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp);
1950                         tmp = REG_SET_FIELD(0, SH_MEM_BASES, PRIVATE_BASE,
1951                                 (adev->gmc.private_aperture_start >> 48));
1952                         tmp = REG_SET_FIELD(tmp, SH_MEM_BASES, SHARED_BASE,
1953                                 (adev->gmc.shared_aperture_start >> 48));
1954                         WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, tmp);
1955                 }
1956         }
1957         soc15_grbm_select(adev, 0, 0, 0, 0);
1958
1959         mutex_unlock(&adev->srbm_mutex);
1960
1961         gfx_v9_0_init_compute_vmid(adev);
1962
1963         mutex_lock(&adev->grbm_idx_mutex);
1964         /*
1965          * making sure that the following register writes will be broadcasted
1966          * to all the shaders
1967          */
1968         gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1969
1970         WREG32_SOC15_RLC(GC, 0, mmPA_SC_FIFO_SIZE,
1971                    (adev->gfx.config.sc_prim_fifo_size_frontend <<
1972                         PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) |
1973                    (adev->gfx.config.sc_prim_fifo_size_backend <<
1974                         PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT) |
1975                    (adev->gfx.config.sc_hiz_tile_fifo_size <<
1976                         PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) |
1977                    (adev->gfx.config.sc_earlyz_tile_fifo_size <<
1978                         PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT));
1979         mutex_unlock(&adev->grbm_idx_mutex);
1980
1981 }
1982
1983 static void gfx_v9_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
1984 {
1985         u32 i, j, k;
1986         u32 mask;
1987
1988         mutex_lock(&adev->grbm_idx_mutex);
1989         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
1990                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
1991                         gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
1992                         for (k = 0; k < adev->usec_timeout; k++) {
1993                                 if (RREG32_SOC15(GC, 0, mmRLC_SERDES_CU_MASTER_BUSY) == 0)
1994                                         break;
1995                                 udelay(1);
1996                         }
1997                         if (k == adev->usec_timeout) {
1998                                 gfx_v9_0_select_se_sh(adev, 0xffffffff,
1999                                                       0xffffffff, 0xffffffff);
2000                                 mutex_unlock(&adev->grbm_idx_mutex);
2001                                 DRM_INFO("Timeout wait for RLC serdes %u,%u\n",
2002                                          i, j);
2003                                 return;
2004                         }
2005                 }
2006         }
2007         gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
2008         mutex_unlock(&adev->grbm_idx_mutex);
2009
2010         mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
2011                 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
2012                 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
2013                 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
2014         for (k = 0; k < adev->usec_timeout; k++) {
2015                 if ((RREG32_SOC15(GC, 0, mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
2016                         break;
2017                 udelay(1);
2018         }
2019 }
2020
2021 static void gfx_v9_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
2022                                                bool enable)
2023 {
2024         u32 tmp = RREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0);
2025
2026         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
2027         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
2028         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
2029         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
2030
2031         WREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0, tmp);
2032 }
2033
2034 static void gfx_v9_0_init_csb(struct amdgpu_device *adev)
2035 {
2036         /* csib */
2037         WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_HI),
2038                         adev->gfx.rlc.clear_state_gpu_addr >> 32);
2039         WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_LO),
2040                         adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
2041         WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_LENGTH),
2042                         adev->gfx.rlc.clear_state_size);
2043 }
2044
2045 static void gfx_v9_1_parse_ind_reg_list(int *register_list_format,
2046                                 int indirect_offset,
2047                                 int list_size,
2048                                 int *unique_indirect_regs,
2049                                 int unique_indirect_reg_count,
2050                                 int *indirect_start_offsets,
2051                                 int *indirect_start_offsets_count,
2052                                 int max_start_offsets_count)
2053 {
2054         int idx;
2055
2056         for (; indirect_offset < list_size; indirect_offset++) {
2057                 WARN_ON(*indirect_start_offsets_count >= max_start_offsets_count);
2058                 indirect_start_offsets[*indirect_start_offsets_count] = indirect_offset;
2059                 *indirect_start_offsets_count = *indirect_start_offsets_count + 1;
2060
2061                 while (register_list_format[indirect_offset] != 0xFFFFFFFF) {
2062                         indirect_offset += 2;
2063
2064                         /* look for the matching indice */
2065                         for (idx = 0; idx < unique_indirect_reg_count; idx++) {
2066                                 if (unique_indirect_regs[idx] ==
2067                                         register_list_format[indirect_offset] ||
2068                                         !unique_indirect_regs[idx])
2069                                         break;
2070                         }
2071
2072                         BUG_ON(idx >= unique_indirect_reg_count);
2073
2074                         if (!unique_indirect_regs[idx])
2075                                 unique_indirect_regs[idx] = register_list_format[indirect_offset];
2076
2077                         indirect_offset++;
2078                 }
2079         }
2080 }
2081
2082 static int gfx_v9_1_init_rlc_save_restore_list(struct amdgpu_device *adev)
2083 {
2084         int unique_indirect_regs[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
2085         int unique_indirect_reg_count = 0;
2086
2087         int indirect_start_offsets[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
2088         int indirect_start_offsets_count = 0;
2089
2090         int list_size = 0;
2091         int i = 0, j = 0;
2092         u32 tmp = 0;
2093
2094         u32 *register_list_format =
2095                 kmalloc(adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
2096         if (!register_list_format)
2097                 return -ENOMEM;
2098         memcpy(register_list_format, adev->gfx.rlc.register_list_format,
2099                 adev->gfx.rlc.reg_list_format_size_bytes);
2100
2101         /* setup unique_indirect_regs array and indirect_start_offsets array */
2102         unique_indirect_reg_count = ARRAY_SIZE(unique_indirect_regs);
2103         gfx_v9_1_parse_ind_reg_list(register_list_format,
2104                                     adev->gfx.rlc.reg_list_format_direct_reg_list_length,
2105                                     adev->gfx.rlc.reg_list_format_size_bytes >> 2,
2106                                     unique_indirect_regs,
2107                                     unique_indirect_reg_count,
2108                                     indirect_start_offsets,
2109                                     &indirect_start_offsets_count,
2110                                     ARRAY_SIZE(indirect_start_offsets));
2111
2112         /* enable auto inc in case it is disabled */
2113         tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL));
2114         tmp |= RLC_SRM_CNTL__AUTO_INCR_ADDR_MASK;
2115         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL), tmp);
2116
2117         /* write register_restore table to offset 0x0 using RLC_SRM_ARAM_ADDR/DATA */
2118         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_ADDR),
2119                 RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET);
2120         for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
2121                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_DATA),
2122                         adev->gfx.rlc.register_restore[i]);
2123
2124         /* load indirect register */
2125         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2126                 adev->gfx.rlc.reg_list_format_start);
2127
2128         /* direct register portion */
2129         for (i = 0; i < adev->gfx.rlc.reg_list_format_direct_reg_list_length; i++)
2130                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA),
2131                         register_list_format[i]);
2132
2133         /* indirect register portion */
2134         while (i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2)) {
2135                 if (register_list_format[i] == 0xFFFFFFFF) {
2136                         WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2137                         continue;
2138                 }
2139
2140                 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2141                 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2142
2143                 for (j = 0; j < unique_indirect_reg_count; j++) {
2144                         if (register_list_format[i] == unique_indirect_regs[j]) {
2145                                 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, j);
2146                                 break;
2147                         }
2148                 }
2149
2150                 BUG_ON(j >= unique_indirect_reg_count);
2151
2152                 i++;
2153         }
2154
2155         /* set save/restore list size */
2156         list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
2157         list_size = list_size >> 1;
2158         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2159                 adev->gfx.rlc.reg_restore_list_size);
2160         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA), list_size);
2161
2162         /* write the starting offsets to RLC scratch ram */
2163         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2164                 adev->gfx.rlc.starting_offsets_start);
2165         for (i = 0; i < ARRAY_SIZE(indirect_start_offsets); i++)
2166                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA),
2167                        indirect_start_offsets[i]);
2168
2169         /* load unique indirect regs*/
2170         for (i = 0; i < ARRAY_SIZE(unique_indirect_regs); i++) {
2171                 if (unique_indirect_regs[i] != 0) {
2172                         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_ADDR_0)
2173                                + GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[i],
2174                                unique_indirect_regs[i] & 0x3FFFF);
2175
2176                         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_DATA_0)
2177                                + GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[i],
2178                                unique_indirect_regs[i] >> 20);
2179                 }
2180         }
2181
2182         kfree(register_list_format);
2183         return 0;
2184 }
2185
2186 static void gfx_v9_0_enable_save_restore_machine(struct amdgpu_device *adev)
2187 {
2188         WREG32_FIELD15(GC, 0, RLC_SRM_CNTL, SRM_ENABLE, 1);
2189 }
2190
2191 static void pwr_10_0_gfxip_control_over_cgpg(struct amdgpu_device *adev,
2192                                              bool enable)
2193 {
2194         uint32_t data = 0;
2195         uint32_t default_data = 0;
2196
2197         default_data = data = RREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS));
2198         if (enable == true) {
2199                 /* enable GFXIP control over CGPG */
2200                 data |= PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK;
2201                 if(default_data != data)
2202                         WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2203
2204                 /* update status */
2205                 data &= ~PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK;
2206                 data |= (2 << PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT);
2207                 if(default_data != data)
2208                         WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2209         } else {
2210                 /* restore GFXIP control over GCPG */
2211                 data &= ~PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK;
2212                 if(default_data != data)
2213                         WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2214         }
2215 }
2216
2217 static void gfx_v9_0_init_gfx_power_gating(struct amdgpu_device *adev)
2218 {
2219         uint32_t data = 0;
2220
2221         if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
2222                               AMD_PG_SUPPORT_GFX_SMG |
2223                               AMD_PG_SUPPORT_GFX_DMG)) {
2224                 /* init IDLE_POLL_COUNT = 60 */
2225                 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL));
2226                 data &= ~CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT_MASK;
2227                 data |= (0x60 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
2228                 WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL), data);
2229
2230                 /* init RLC PG Delay */
2231                 data = 0;
2232                 data |= (0x10 << RLC_PG_DELAY__POWER_UP_DELAY__SHIFT);
2233                 data |= (0x10 << RLC_PG_DELAY__POWER_DOWN_DELAY__SHIFT);
2234                 data |= (0x10 << RLC_PG_DELAY__CMD_PROPAGATE_DELAY__SHIFT);
2235                 data |= (0x40 << RLC_PG_DELAY__MEM_SLEEP_DELAY__SHIFT);
2236                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY), data);
2237
2238                 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2));
2239                 data &= ~RLC_PG_DELAY_2__SERDES_CMD_DELAY_MASK;
2240                 data |= (0x4 << RLC_PG_DELAY_2__SERDES_CMD_DELAY__SHIFT);
2241                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2), data);
2242
2243                 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3));
2244                 data &= ~RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG_MASK;
2245                 data |= (0xff << RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG__SHIFT);
2246                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3), data);
2247
2248                 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL));
2249                 data &= ~RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD_MASK;
2250
2251                 /* program GRBM_REG_SAVE_GFX_IDLE_THRESHOLD to 0x55f0 */
2252                 data |= (0x55f0 << RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD__SHIFT);
2253                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL), data);
2254
2255                 pwr_10_0_gfxip_control_over_cgpg(adev, true);
2256         }
2257 }
2258
2259 static void gfx_v9_0_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
2260                                                 bool enable)
2261 {
2262         uint32_t data = 0;
2263         uint32_t default_data = 0;
2264
2265         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2266         data = REG_SET_FIELD(data, RLC_PG_CNTL,
2267                              SMU_CLK_SLOWDOWN_ON_PU_ENABLE,
2268                              enable ? 1 : 0);
2269         if (default_data != data)
2270                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2271 }
2272
2273 static void gfx_v9_0_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
2274                                                 bool enable)
2275 {
2276         uint32_t data = 0;
2277         uint32_t default_data = 0;
2278
2279         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2280         data = REG_SET_FIELD(data, RLC_PG_CNTL,
2281                              SMU_CLK_SLOWDOWN_ON_PD_ENABLE,
2282                              enable ? 1 : 0);
2283         if(default_data != data)
2284                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2285 }
2286
2287 static void gfx_v9_0_enable_cp_power_gating(struct amdgpu_device *adev,
2288                                         bool enable)
2289 {
2290         uint32_t data = 0;
2291         uint32_t default_data = 0;
2292
2293         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2294         data = REG_SET_FIELD(data, RLC_PG_CNTL,
2295                              CP_PG_DISABLE,
2296                              enable ? 0 : 1);
2297         if(default_data != data)
2298                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2299 }
2300
2301 static void gfx_v9_0_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
2302                                                 bool enable)
2303 {
2304         uint32_t data, default_data;
2305
2306         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2307         data = REG_SET_FIELD(data, RLC_PG_CNTL,
2308                              GFX_POWER_GATING_ENABLE,
2309                              enable ? 1 : 0);
2310         if(default_data != data)
2311                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2312 }
2313
2314 static void gfx_v9_0_enable_gfx_pipeline_powergating(struct amdgpu_device *adev,
2315                                                 bool enable)
2316 {
2317         uint32_t data, default_data;
2318
2319         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2320         data = REG_SET_FIELD(data, RLC_PG_CNTL,
2321                              GFX_PIPELINE_PG_ENABLE,
2322                              enable ? 1 : 0);
2323         if(default_data != data)
2324                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2325
2326         if (!enable)
2327                 /* read any GFX register to wake up GFX */
2328                 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmDB_RENDER_CONTROL));
2329 }
2330
2331 static void gfx_v9_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
2332                                                        bool enable)
2333 {
2334         uint32_t data, default_data;
2335
2336         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2337         data = REG_SET_FIELD(data, RLC_PG_CNTL,
2338                              STATIC_PER_CU_PG_ENABLE,
2339                              enable ? 1 : 0);
2340         if(default_data != data)
2341                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2342 }
2343
2344 static void gfx_v9_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
2345                                                 bool enable)
2346 {
2347         uint32_t data, default_data;
2348
2349         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2350         data = REG_SET_FIELD(data, RLC_PG_CNTL,
2351                              DYN_PER_CU_PG_ENABLE,
2352                              enable ? 1 : 0);
2353         if(default_data != data)
2354                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2355 }
2356
2357 static void gfx_v9_0_init_pg(struct amdgpu_device *adev)
2358 {
2359         gfx_v9_0_init_csb(adev);
2360
2361         /*
2362          * Rlc save restore list is workable since v2_1.
2363          * And it's needed by gfxoff feature.
2364          */
2365         if (adev->gfx.rlc.is_rlc_v2_1) {
2366                 gfx_v9_1_init_rlc_save_restore_list(adev);
2367                 gfx_v9_0_enable_save_restore_machine(adev);
2368         }
2369
2370         if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
2371                               AMD_PG_SUPPORT_GFX_SMG |
2372                               AMD_PG_SUPPORT_GFX_DMG |
2373                               AMD_PG_SUPPORT_CP |
2374                               AMD_PG_SUPPORT_GDS |
2375                               AMD_PG_SUPPORT_RLC_SMU_HS)) {
2376                 WREG32(mmRLC_JUMP_TABLE_RESTORE,
2377                        adev->gfx.rlc.cp_table_gpu_addr >> 8);
2378                 gfx_v9_0_init_gfx_power_gating(adev);
2379         }
2380 }
2381
2382 void gfx_v9_0_rlc_stop(struct amdgpu_device *adev)
2383 {
2384         WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 0);
2385         gfx_v9_0_enable_gui_idle_interrupt(adev, false);
2386         gfx_v9_0_wait_for_rlc_serdes(adev);
2387 }
2388
2389 static void gfx_v9_0_rlc_reset(struct amdgpu_device *adev)
2390 {
2391         WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
2392         udelay(50);
2393         WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
2394         udelay(50);
2395 }
2396
2397 static void gfx_v9_0_rlc_start(struct amdgpu_device *adev)
2398 {
2399 #ifdef AMDGPU_RLC_DEBUG_RETRY
2400         u32 rlc_ucode_ver;
2401 #endif
2402
2403         WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 1);
2404         udelay(50);
2405
2406         /* carrizo do enable cp interrupt after cp inited */
2407         if (!(adev->flags & AMD_IS_APU)) {
2408                 gfx_v9_0_enable_gui_idle_interrupt(adev, true);
2409                 udelay(50);
2410         }
2411
2412 #ifdef AMDGPU_RLC_DEBUG_RETRY
2413         /* RLC_GPM_GENERAL_6 : RLC Ucode version */
2414         rlc_ucode_ver = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_6);
2415         if(rlc_ucode_ver == 0x108) {
2416                 DRM_INFO("Using rlc debug ucode. mmRLC_GPM_GENERAL_6 ==0x08%x / fw_ver == %i \n",
2417                                 rlc_ucode_ver, adev->gfx.rlc_fw_version);
2418                 /* RLC_GPM_TIMER_INT_3 : Timer interval in RefCLK cycles,
2419                  * default is 0x9C4 to create a 100us interval */
2420                 WREG32_SOC15(GC, 0, mmRLC_GPM_TIMER_INT_3, 0x9C4);
2421                 /* RLC_GPM_GENERAL_12 : Minimum gap between wptr and rptr
2422                  * to disable the page fault retry interrupts, default is
2423                  * 0x100 (256) */
2424                 WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_12, 0x100);
2425         }
2426 #endif
2427 }
2428
2429 static int gfx_v9_0_rlc_load_microcode(struct amdgpu_device *adev)
2430 {
2431         const struct rlc_firmware_header_v2_0 *hdr;
2432         const __le32 *fw_data;
2433         unsigned i, fw_size;
2434
2435         if (!adev->gfx.rlc_fw)
2436                 return -EINVAL;
2437
2438         hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
2439         amdgpu_ucode_print_rlc_hdr(&hdr->header);
2440
2441         fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
2442                            le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2443         fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
2444
2445         WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR,
2446                         RLCG_UCODE_LOADING_START_ADDRESS);
2447         for (i = 0; i < fw_size; i++)
2448                 WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
2449         WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
2450
2451         return 0;
2452 }
2453
2454 static int gfx_v9_0_rlc_resume(struct amdgpu_device *adev)
2455 {
2456         int r;
2457
2458         if (amdgpu_sriov_vf(adev)) {
2459                 gfx_v9_0_init_csb(adev);
2460                 return 0;
2461         }
2462
2463         adev->gfx.rlc.funcs->stop(adev);
2464
2465         /* disable CG */
2466         WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, 0);
2467
2468         gfx_v9_0_init_pg(adev);
2469
2470         if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
2471                 /* legacy rlc firmware loading */
2472                 r = gfx_v9_0_rlc_load_microcode(adev);
2473                 if (r)
2474                         return r;
2475         }
2476
2477         switch (adev->asic_type) {
2478         case CHIP_RAVEN:
2479                 if (amdgpu_lbpw == 0)
2480                         gfx_v9_0_enable_lbpw(adev, false);
2481                 else
2482                         gfx_v9_0_enable_lbpw(adev, true);
2483                 break;
2484         case CHIP_VEGA20:
2485                 if (amdgpu_lbpw > 0)
2486                         gfx_v9_0_enable_lbpw(adev, true);
2487                 else
2488                         gfx_v9_0_enable_lbpw(adev, false);
2489                 break;
2490         default:
2491                 break;
2492         }
2493
2494         adev->gfx.rlc.funcs->start(adev);
2495
2496         return 0;
2497 }
2498
2499 static void gfx_v9_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
2500 {
2501         int i;
2502         u32 tmp = RREG32_SOC15(GC, 0, mmCP_ME_CNTL);
2503
2504         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, enable ? 0 : 1);
2505         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, enable ? 0 : 1);
2506         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, enable ? 0 : 1);
2507         if (!enable) {
2508                 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2509                         adev->gfx.gfx_ring[i].sched.ready = false;
2510         }
2511         WREG32_SOC15_RLC(GC, 0, mmCP_ME_CNTL, tmp);
2512         udelay(50);
2513 }
2514
2515 static int gfx_v9_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
2516 {
2517         const struct gfx_firmware_header_v1_0 *pfp_hdr;
2518         const struct gfx_firmware_header_v1_0 *ce_hdr;
2519         const struct gfx_firmware_header_v1_0 *me_hdr;
2520         const __le32 *fw_data;
2521         unsigned i, fw_size;
2522
2523         if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
2524                 return -EINVAL;
2525
2526         pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
2527                 adev->gfx.pfp_fw->data;
2528         ce_hdr = (const struct gfx_firmware_header_v1_0 *)
2529                 adev->gfx.ce_fw->data;
2530         me_hdr = (const struct gfx_firmware_header_v1_0 *)
2531                 adev->gfx.me_fw->data;
2532
2533         amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
2534         amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
2535         amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
2536
2537         gfx_v9_0_cp_gfx_enable(adev, false);
2538
2539         /* PFP */
2540         fw_data = (const __le32 *)
2541                 (adev->gfx.pfp_fw->data +
2542                  le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
2543         fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
2544         WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, 0);
2545         for (i = 0; i < fw_size; i++)
2546                 WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
2547         WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
2548
2549         /* CE */
2550         fw_data = (const __le32 *)
2551                 (adev->gfx.ce_fw->data +
2552                  le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
2553         fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
2554         WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, 0);
2555         for (i = 0; i < fw_size; i++)
2556                 WREG32_SOC15(GC, 0, mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
2557         WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
2558
2559         /* ME */
2560         fw_data = (const __le32 *)
2561                 (adev->gfx.me_fw->data +
2562                  le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
2563         fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
2564         WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, 0);
2565         for (i = 0; i < fw_size; i++)
2566                 WREG32_SOC15(GC, 0, mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
2567         WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version);
2568
2569         return 0;
2570 }
2571
2572 static int gfx_v9_0_cp_gfx_start(struct amdgpu_device *adev)
2573 {
2574         struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
2575         const struct cs_section_def *sect = NULL;
2576         const struct cs_extent_def *ext = NULL;
2577         int r, i, tmp;
2578
2579         /* init the CP */
2580         WREG32_SOC15(GC, 0, mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
2581         WREG32_SOC15(GC, 0, mmCP_DEVICE_ID, 1);
2582
2583         gfx_v9_0_cp_gfx_enable(adev, true);
2584
2585         r = amdgpu_ring_alloc(ring, gfx_v9_0_get_csb_size(adev) + 4 + 3);
2586         if (r) {
2587                 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
2588                 return r;
2589         }
2590
2591         amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
2592         amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
2593
2594         amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
2595         amdgpu_ring_write(ring, 0x80000000);
2596         amdgpu_ring_write(ring, 0x80000000);
2597
2598         for (sect = gfx9_cs_data; sect->section != NULL; ++sect) {
2599                 for (ext = sect->section; ext->extent != NULL; ++ext) {
2600                         if (sect->id == SECT_CONTEXT) {
2601                                 amdgpu_ring_write(ring,
2602                                        PACKET3(PACKET3_SET_CONTEXT_REG,
2603                                                ext->reg_count));
2604                                 amdgpu_ring_write(ring,
2605                                        ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
2606                                 for (i = 0; i < ext->reg_count; i++)
2607                                         amdgpu_ring_write(ring, ext->extent[i]);
2608                         }
2609                 }
2610         }
2611
2612         amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
2613         amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
2614
2615         amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
2616         amdgpu_ring_write(ring, 0);
2617
2618         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
2619         amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
2620         amdgpu_ring_write(ring, 0x8000);
2621         amdgpu_ring_write(ring, 0x8000);
2622
2623         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG,1));
2624         tmp = (PACKET3_SET_UCONFIG_REG_INDEX_TYPE |
2625                 (SOC15_REG_OFFSET(GC, 0, mmVGT_INDEX_TYPE) - PACKET3_SET_UCONFIG_REG_START));
2626         amdgpu_ring_write(ring, tmp);
2627         amdgpu_ring_write(ring, 0);
2628
2629         amdgpu_ring_commit(ring);
2630
2631         return 0;
2632 }
2633
2634 static int gfx_v9_0_cp_gfx_resume(struct amdgpu_device *adev)
2635 {
2636         struct amdgpu_ring *ring;
2637         u32 tmp;
2638         u32 rb_bufsz;
2639         u64 rb_addr, rptr_addr, wptr_gpu_addr;
2640
2641         /* Set the write pointer delay */
2642         WREG32_SOC15(GC, 0, mmCP_RB_WPTR_DELAY, 0);
2643
2644         /* set the RB to use vmid 0 */
2645         WREG32_SOC15(GC, 0, mmCP_RB_VMID, 0);
2646
2647         /* Set ring buffer size */
2648         ring = &adev->gfx.gfx_ring[0];
2649         rb_bufsz = order_base_2(ring->ring_size / 8);
2650         tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
2651         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
2652 #ifdef __BIG_ENDIAN
2653         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
2654 #endif
2655         WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp);
2656
2657         /* Initialize the ring buffer's write pointers */
2658         ring->wptr = 0;
2659         WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
2660         WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
2661
2662         /* set the wb address wether it's enabled or not */
2663         rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
2664         WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
2665         WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & CP_RB_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK);
2666
2667         wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
2668         WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr));
2669         WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr));
2670
2671         mdelay(1);
2672         WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp);
2673
2674         rb_addr = ring->gpu_addr >> 8;
2675         WREG32_SOC15(GC, 0, mmCP_RB0_BASE, rb_addr);
2676         WREG32_SOC15(GC, 0, mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
2677
2678         tmp = RREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL);
2679         if (ring->use_doorbell) {
2680                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
2681                                     DOORBELL_OFFSET, ring->doorbell_index);
2682                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
2683                                     DOORBELL_EN, 1);
2684         } else {
2685                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0);
2686         }
2687         WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL, tmp);
2688
2689         tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
2690                         DOORBELL_RANGE_LOWER, ring->doorbell_index);
2691         WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
2692
2693         WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_UPPER,
2694                        CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
2695
2696
2697         /* start the ring */
2698         gfx_v9_0_cp_gfx_start(adev);
2699         ring->sched.ready = true;
2700
2701         return 0;
2702 }
2703
2704 static void gfx_v9_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
2705 {
2706         int i;
2707
2708         if (enable) {
2709                 WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL, 0);
2710         } else {
2711                 WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL,
2712                         (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
2713                 for (i = 0; i < adev->gfx.num_compute_rings; i++)
2714                         adev->gfx.compute_ring[i].sched.ready = false;
2715                 adev->gfx.kiq.ring.sched.ready = false;
2716         }
2717         udelay(50);
2718 }
2719
2720 static int gfx_v9_0_cp_compute_load_microcode(struct amdgpu_device *adev)
2721 {
2722         const struct gfx_firmware_header_v1_0 *mec_hdr;
2723         const __le32 *fw_data;
2724         unsigned i;
2725         u32 tmp;
2726
2727         if (!adev->gfx.mec_fw)
2728                 return -EINVAL;
2729
2730         gfx_v9_0_cp_compute_enable(adev, false);
2731
2732         mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
2733         amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
2734
2735         fw_data = (const __le32 *)
2736                 (adev->gfx.mec_fw->data +
2737                  le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
2738         tmp = 0;
2739         tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, VMID, 0);
2740         tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0);
2741         WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_CNTL, tmp);
2742
2743         WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_LO,
2744                 adev->gfx.mec.mec_fw_gpu_addr & 0xFFFFF000);
2745         WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_HI,
2746                 upper_32_bits(adev->gfx.mec.mec_fw_gpu_addr));
2747
2748         /* MEC1 */
2749         WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR,
2750                          mec_hdr->jt_offset);
2751         for (i = 0; i < mec_hdr->jt_size; i++)
2752                 WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_DATA,
2753                         le32_to_cpup(fw_data + mec_hdr->jt_offset + i));
2754
2755         WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR,
2756                         adev->gfx.mec_fw_version);
2757         /* Todo : Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */
2758
2759         return 0;
2760 }
2761
2762 /* KIQ functions */
2763 static void gfx_v9_0_kiq_setting(struct amdgpu_ring *ring)
2764 {
2765         uint32_t tmp;
2766         struct amdgpu_device *adev = ring->adev;
2767
2768         /* tell RLC which is KIQ queue */
2769         tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS);
2770         tmp &= 0xffffff00;
2771         tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
2772         WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
2773         tmp |= 0x80;
2774         WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
2775 }
2776
2777 static int gfx_v9_0_kiq_kcq_enable(struct amdgpu_device *adev)
2778 {
2779         struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
2780         uint64_t queue_mask = 0;
2781         int r, i;
2782
2783         for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) {
2784                 if (!test_bit(i, adev->gfx.mec.queue_bitmap))
2785                         continue;
2786
2787                 /* This situation may be hit in the future if a new HW
2788                  * generation exposes more than 64 queues. If so, the
2789                  * definition of queue_mask needs updating */
2790                 if (WARN_ON(i >= (sizeof(queue_mask)*8))) {
2791                         DRM_ERROR("Invalid KCQ enabled: %d\n", i);
2792                         break;
2793                 }
2794
2795                 queue_mask |= (1ull << i);
2796         }
2797
2798         r = amdgpu_ring_alloc(kiq_ring, (7 * adev->gfx.num_compute_rings) + 8);
2799         if (r) {
2800                 DRM_ERROR("Failed to lock KIQ (%d).\n", r);
2801                 return r;
2802         }
2803
2804         /* set resources */
2805         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
2806         amdgpu_ring_write(kiq_ring, PACKET3_SET_RESOURCES_VMID_MASK(0) |
2807                           PACKET3_SET_RESOURCES_QUEUE_TYPE(0)); /* vmid_mask:0 queue_type:0 (KIQ) */
2808         amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask)); /* queue mask lo */
2809         amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask)); /* queue mask hi */
2810         amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */
2811         amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */
2812         amdgpu_ring_write(kiq_ring, 0); /* oac mask */
2813         amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */
2814         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
2815                 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
2816                 uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
2817                 uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
2818
2819                 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
2820                 /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
2821                 amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
2822                                   PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */
2823                                   PACKET3_MAP_QUEUES_VMID(0) | /* VMID */
2824                                   PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
2825                                   PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
2826                                   PACKET3_MAP_QUEUES_ME((ring->me == 1 ? 0 : 1)) |
2827                                   PACKET3_MAP_QUEUES_QUEUE_TYPE(0) | /*queue_type: normal compute queue */
2828                                   PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) | /* alloc format: all_on_one_pipe */
2829                                   PACKET3_MAP_QUEUES_ENGINE_SEL(0) | /* engine_sel: compute */
2830                                   PACKET3_MAP_QUEUES_NUM_QUEUES(1)); /* num_queues: must be 1 */
2831                 amdgpu_ring_write(kiq_ring, PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index));
2832                 amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
2833                 amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
2834                 amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
2835                 amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
2836         }
2837
2838         r = amdgpu_ring_test_helper(kiq_ring);
2839         if (r)
2840                 DRM_ERROR("KCQ enable failed\n");
2841
2842         return r;
2843 }
2844
2845 static int gfx_v9_0_mqd_init(struct amdgpu_ring *ring)
2846 {
2847         struct amdgpu_device *adev = ring->adev;
2848         struct v9_mqd *mqd = ring->mqd_ptr;
2849         uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
2850         uint32_t tmp;
2851
2852         mqd->header = 0xC0310800;
2853         mqd->compute_pipelinestat_enable = 0x00000001;
2854         mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
2855         mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
2856         mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
2857         mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
2858         mqd->compute_misc_reserved = 0x00000003;
2859
2860         mqd->dynamic_cu_mask_addr_lo =
2861                 lower_32_bits(ring->mqd_gpu_addr
2862                               + offsetof(struct v9_mqd_allocation, dynamic_cu_mask));
2863         mqd->dynamic_cu_mask_addr_hi =
2864                 upper_32_bits(ring->mqd_gpu_addr
2865                               + offsetof(struct v9_mqd_allocation, dynamic_cu_mask));
2866
2867         eop_base_addr = ring->eop_gpu_addr >> 8;
2868         mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
2869         mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
2870
2871         /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
2872         tmp = RREG32_SOC15(GC, 0, mmCP_HQD_EOP_CONTROL);
2873         tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
2874                         (order_base_2(GFX9_MEC_HPD_SIZE / 4) - 1));
2875
2876         mqd->cp_hqd_eop_control = tmp;
2877
2878         /* enable doorbell? */
2879         tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
2880
2881         if (ring->use_doorbell) {
2882                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
2883                                     DOORBELL_OFFSET, ring->doorbell_index);
2884                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
2885                                     DOORBELL_EN, 1);
2886                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
2887                                     DOORBELL_SOURCE, 0);
2888                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
2889                                     DOORBELL_HIT, 0);
2890         } else {
2891                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
2892                                          DOORBELL_EN, 0);
2893         }
2894
2895         mqd->cp_hqd_pq_doorbell_control = tmp;
2896
2897         /* disable the queue if it's active */
2898         ring->wptr = 0;
2899         mqd->cp_hqd_dequeue_request = 0;
2900         mqd->cp_hqd_pq_rptr = 0;
2901         mqd->cp_hqd_pq_wptr_lo = 0;
2902         mqd->cp_hqd_pq_wptr_hi = 0;
2903
2904         /* set the pointer to the MQD */
2905         mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
2906         mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
2907
2908         /* set MQD vmid to 0 */
2909         tmp = RREG32_SOC15(GC, 0, mmCP_MQD_CONTROL);
2910         tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
2911         mqd->cp_mqd_control = tmp;
2912
2913         /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
2914         hqd_gpu_addr = ring->gpu_addr >> 8;
2915         mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
2916         mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
2917
2918         /* set up the HQD, this is similar to CP_RB0_CNTL */
2919         tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_CONTROL);
2920         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
2921                             (order_base_2(ring->ring_size / 4) - 1));
2922         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
2923                         ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
2924 #ifdef __BIG_ENDIAN
2925         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
2926 #endif
2927         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
2928         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
2929         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
2930         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
2931         mqd->cp_hqd_pq_control = tmp;
2932
2933         /* set the wb address whether it's enabled or not */
2934         wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
2935         mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
2936         mqd->cp_hqd_pq_rptr_report_addr_hi =
2937                 upper_32_bits(wb_gpu_addr) & 0xffff;
2938
2939         /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
2940         wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
2941         mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
2942         mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
2943
2944         tmp = 0;
2945         /* enable the doorbell if requested */
2946         if (ring->use_doorbell) {
2947                 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
2948                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
2949                                 DOORBELL_OFFSET, ring->doorbell_index);
2950
2951                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
2952                                          DOORBELL_EN, 1);
2953                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
2954                                          DOORBELL_SOURCE, 0);
2955                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
2956                                          DOORBELL_HIT, 0);
2957         }
2958
2959         mqd->cp_hqd_pq_doorbell_control = tmp;
2960
2961         /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
2962         ring->wptr = 0;
2963         mqd->cp_hqd_pq_rptr = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR);
2964
2965         /* set the vmid for the queue */
2966         mqd->cp_hqd_vmid = 0;
2967
2968         tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PERSISTENT_STATE);
2969         tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
2970         mqd->cp_hqd_persistent_state = tmp;
2971
2972         /* set MIN_IB_AVAIL_SIZE */
2973         tmp = RREG32_SOC15(GC, 0, mmCP_HQD_IB_CONTROL);
2974         tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
2975         mqd->cp_hqd_ib_control = tmp;
2976
2977         /* activate the queue */
2978         mqd->cp_hqd_active = 1;
2979
2980         return 0;
2981 }
2982
2983 static int gfx_v9_0_kiq_init_register(struct amdgpu_ring *ring)
2984 {
2985         struct amdgpu_device *adev = ring->adev;
2986         struct v9_mqd *mqd = ring->mqd_ptr;
2987         int j;
2988
2989         /* disable wptr polling */
2990         WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
2991
2992         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR,
2993                mqd->cp_hqd_eop_base_addr_lo);
2994         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI,
2995                mqd->cp_hqd_eop_base_addr_hi);
2996
2997         /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
2998         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_CONTROL,
2999                mqd->cp_hqd_eop_control);
3000
3001         /* enable doorbell? */
3002         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
3003                mqd->cp_hqd_pq_doorbell_control);
3004
3005         /* disable the queue if it's active */
3006         if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) {
3007                 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
3008                 for (j = 0; j < adev->usec_timeout; j++) {
3009                         if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
3010                                 break;
3011                         udelay(1);
3012                 }
3013                 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST,
3014                        mqd->cp_hqd_dequeue_request);
3015                 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR,
3016                        mqd->cp_hqd_pq_rptr);
3017                 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO,
3018                        mqd->cp_hqd_pq_wptr_lo);
3019                 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI,
3020                        mqd->cp_hqd_pq_wptr_hi);
3021         }
3022
3023         /* set the pointer to the MQD */
3024         WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR,
3025                mqd->cp_mqd_base_addr_lo);
3026         WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR_HI,
3027                mqd->cp_mqd_base_addr_hi);
3028
3029         /* set MQD vmid to 0 */
3030         WREG32_SOC15_RLC(GC, 0, mmCP_MQD_CONTROL,
3031                mqd->cp_mqd_control);
3032
3033         /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3034         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE,
3035                mqd->cp_hqd_pq_base_lo);
3036         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE_HI,
3037                mqd->cp_hqd_pq_base_hi);
3038
3039         /* set up the HQD, this is similar to CP_RB0_CNTL */
3040         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_CONTROL,
3041                mqd->cp_hqd_pq_control);
3042
3043         /* set the wb address whether it's enabled or not */
3044         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR,
3045                                 mqd->cp_hqd_pq_rptr_report_addr_lo);
3046         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
3047                                 mqd->cp_hqd_pq_rptr_report_addr_hi);
3048
3049         /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
3050         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR,
3051                mqd->cp_hqd_pq_wptr_poll_addr_lo);
3052         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
3053                mqd->cp_hqd_pq_wptr_poll_addr_hi);
3054
3055         /* enable the doorbell if requested */
3056         if (ring->use_doorbell) {
3057                 WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_LOWER,
3058                                         (adev->doorbell_index.kiq * 2) << 2);
3059                 WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER,
3060                                         (adev->doorbell_index.userqueue_end * 2) << 2);
3061         }
3062
3063         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
3064                mqd->cp_hqd_pq_doorbell_control);
3065
3066         /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3067         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO,
3068                mqd->cp_hqd_pq_wptr_lo);
3069         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI,
3070                mqd->cp_hqd_pq_wptr_hi);
3071
3072         /* set the vmid for the queue */
3073         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_VMID, mqd->cp_hqd_vmid);
3074
3075         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE,
3076                mqd->cp_hqd_persistent_state);
3077
3078         /* activate the queue */
3079         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE,
3080                mqd->cp_hqd_active);
3081
3082         if (ring->use_doorbell)
3083                 WREG32_FIELD15(GC, 0, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
3084
3085         return 0;
3086 }
3087
3088 static int gfx_v9_0_kiq_fini_register(struct amdgpu_ring *ring)
3089 {
3090         struct amdgpu_device *adev = ring->adev;
3091         int j;
3092
3093         /* disable the queue if it's active */
3094         if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) {
3095
3096                 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
3097
3098                 for (j = 0; j < adev->usec_timeout; j++) {
3099                         if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
3100                                 break;
3101                         udelay(1);
3102                 }
3103
3104                 if (j == AMDGPU_MAX_USEC_TIMEOUT) {
3105                         DRM_DEBUG("KIQ dequeue request failed.\n");
3106
3107                         /* Manual disable if dequeue request times out */
3108                         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE, 0);
3109                 }
3110
3111                 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST,
3112                       0);
3113         }
3114
3115         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IQ_TIMER, 0);
3116         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IB_CONTROL, 0);
3117         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE, 0);
3118         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0x40000000);
3119         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0);
3120         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR, 0);
3121         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI, 0);
3122         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO, 0);
3123
3124         return 0;
3125 }
3126
3127 static int gfx_v9_0_kiq_init_queue(struct amdgpu_ring *ring)
3128 {
3129         struct amdgpu_device *adev = ring->adev;
3130         struct v9_mqd *mqd = ring->mqd_ptr;
3131         int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS;
3132
3133         gfx_v9_0_kiq_setting(ring);
3134
3135         if (adev->in_gpu_reset) { /* for GPU_RESET case */
3136                 /* reset MQD to a clean status */
3137                 if (adev->gfx.mec.mqd_backup[mqd_idx])
3138                         memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation));
3139
3140                 /* reset ring buffer */
3141                 ring->wptr = 0;
3142                 amdgpu_ring_clear_ring(ring);
3143
3144                 mutex_lock(&adev->srbm_mutex);
3145                 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3146                 gfx_v9_0_kiq_init_register(ring);
3147                 soc15_grbm_select(adev, 0, 0, 0, 0);
3148                 mutex_unlock(&adev->srbm_mutex);
3149         } else {
3150                 memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation));
3151                 ((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
3152                 ((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
3153                 mutex_lock(&adev->srbm_mutex);
3154                 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3155                 gfx_v9_0_mqd_init(ring);
3156                 gfx_v9_0_kiq_init_register(ring);
3157                 soc15_grbm_select(adev, 0, 0, 0, 0);
3158                 mutex_unlock(&adev->srbm_mutex);
3159
3160                 if (adev->gfx.mec.mqd_backup[mqd_idx])
3161                         memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation));
3162         }
3163
3164         return 0;
3165 }
3166
3167 static int gfx_v9_0_kcq_init_queue(struct amdgpu_ring *ring)
3168 {
3169         struct amdgpu_device *adev = ring->adev;
3170         struct v9_mqd *mqd = ring->mqd_ptr;
3171         int mqd_idx = ring - &adev->gfx.compute_ring[0];
3172
3173         if (!adev->in_gpu_reset && !adev->in_suspend) {
3174                 memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation));
3175                 ((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
3176                 ((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
3177                 mutex_lock(&adev->srbm_mutex);
3178                 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3179                 gfx_v9_0_mqd_init(ring);
3180                 soc15_grbm_select(adev, 0, 0, 0, 0);
3181                 mutex_unlock(&adev->srbm_mutex);
3182
3183                 if (adev->gfx.mec.mqd_backup[mqd_idx])
3184                         memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation));
3185         } else if (adev->in_gpu_reset) { /* for GPU_RESET case */
3186                 /* reset MQD to a clean status */
3187                 if (adev->gfx.mec.mqd_backup[mqd_idx])
3188                         memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation));
3189
3190                 /* reset ring buffer */
3191                 ring->wptr = 0;
3192                 amdgpu_ring_clear_ring(ring);
3193         } else {
3194                 amdgpu_ring_clear_ring(ring);
3195         }
3196
3197         return 0;
3198 }
3199
3200 static int gfx_v9_0_kiq_resume(struct amdgpu_device *adev)
3201 {
3202         struct amdgpu_ring *ring;
3203         int r;
3204
3205         ring = &adev->gfx.kiq.ring;
3206
3207         r = amdgpu_bo_reserve(ring->mqd_obj, false);
3208         if (unlikely(r != 0))
3209                 return r;
3210
3211         r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
3212         if (unlikely(r != 0))
3213                 return r;
3214
3215         gfx_v9_0_kiq_init_queue(ring);
3216         amdgpu_bo_kunmap(ring->mqd_obj);
3217         ring->mqd_ptr = NULL;
3218         amdgpu_bo_unreserve(ring->mqd_obj);
3219         ring->sched.ready = true;
3220         return 0;
3221 }
3222
3223 static int gfx_v9_0_kcq_resume(struct amdgpu_device *adev)
3224 {
3225         struct amdgpu_ring *ring = NULL;
3226         int r = 0, i;
3227
3228         gfx_v9_0_cp_compute_enable(adev, true);
3229
3230         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3231                 ring = &adev->gfx.compute_ring[i];
3232
3233                 r = amdgpu_bo_reserve(ring->mqd_obj, false);
3234                 if (unlikely(r != 0))
3235                         goto done;
3236                 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
3237                 if (!r) {
3238                         r = gfx_v9_0_kcq_init_queue(ring);
3239                         amdgpu_bo_kunmap(ring->mqd_obj);
3240                         ring->mqd_ptr = NULL;
3241                 }
3242                 amdgpu_bo_unreserve(ring->mqd_obj);
3243                 if (r)
3244                         goto done;
3245         }
3246
3247         r = gfx_v9_0_kiq_kcq_enable(adev);
3248 done:
3249         return r;
3250 }
3251
3252 static int gfx_v9_0_cp_resume(struct amdgpu_device *adev)
3253 {
3254         int r, i;
3255         struct amdgpu_ring *ring;
3256
3257         if (!(adev->flags & AMD_IS_APU))
3258                 gfx_v9_0_enable_gui_idle_interrupt(adev, false);
3259
3260         if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
3261                 /* legacy firmware loading */
3262                 r = gfx_v9_0_cp_gfx_load_microcode(adev);
3263                 if (r)
3264                         return r;
3265
3266                 r = gfx_v9_0_cp_compute_load_microcode(adev);
3267                 if (r)
3268                         return r;
3269         }
3270
3271         r = gfx_v9_0_kiq_resume(adev);
3272         if (r)
3273                 return r;
3274
3275         r = gfx_v9_0_cp_gfx_resume(adev);
3276         if (r)
3277                 return r;
3278
3279         r = gfx_v9_0_kcq_resume(adev);
3280         if (r)
3281                 return r;
3282
3283         ring = &adev->gfx.gfx_ring[0];
3284         r = amdgpu_ring_test_helper(ring);
3285         if (r)
3286                 return r;
3287
3288         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3289                 ring = &adev->gfx.compute_ring[i];
3290                 amdgpu_ring_test_helper(ring);
3291         }
3292
3293         gfx_v9_0_enable_gui_idle_interrupt(adev, true);
3294
3295         return 0;
3296 }
3297
3298 static void gfx_v9_0_cp_enable(struct amdgpu_device *adev, bool enable)
3299 {
3300         gfx_v9_0_cp_gfx_enable(adev, enable);
3301         gfx_v9_0_cp_compute_enable(adev, enable);
3302 }
3303
3304 static int gfx_v9_0_hw_init(void *handle)
3305 {
3306         int r;
3307         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3308
3309         gfx_v9_0_init_golden_registers(adev);
3310
3311         gfx_v9_0_constants_init(adev);
3312
3313         r = gfx_v9_0_csb_vram_pin(adev);
3314         if (r)
3315                 return r;
3316
3317         r = adev->gfx.rlc.funcs->resume(adev);
3318         if (r)
3319                 return r;
3320
3321         r = gfx_v9_0_cp_resume(adev);
3322         if (r)
3323                 return r;
3324
3325         r = gfx_v9_0_ngg_en(adev);
3326         if (r)
3327                 return r;
3328
3329         return r;
3330 }
3331
3332 static int gfx_v9_0_kcq_disable(struct amdgpu_device *adev)
3333 {
3334         int r, i;
3335         struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
3336
3337         r = amdgpu_ring_alloc(kiq_ring, 6 * adev->gfx.num_compute_rings);
3338         if (r)
3339                 DRM_ERROR("Failed to lock KIQ (%d).\n", r);
3340
3341         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3342                 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
3343
3344                 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
3345                 amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
3346                                                 PACKET3_UNMAP_QUEUES_ACTION(1) | /* RESET_QUEUES */
3347                                                 PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) |
3348                                                 PACKET3_UNMAP_QUEUES_ENGINE_SEL(0) |
3349                                                 PACKET3_UNMAP_QUEUES_NUM_QUEUES(1));
3350                 amdgpu_ring_write(kiq_ring, PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
3351                 amdgpu_ring_write(kiq_ring, 0);
3352                 amdgpu_ring_write(kiq_ring, 0);
3353                 amdgpu_ring_write(kiq_ring, 0);
3354         }
3355         r = amdgpu_ring_test_helper(kiq_ring);
3356         if (r)
3357                 DRM_ERROR("KCQ disable failed\n");
3358
3359         return r;
3360 }
3361
3362 static int gfx_v9_0_hw_fini(void *handle)
3363 {
3364         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3365
3366         amdgpu_irq_put(adev, &adev->gfx.cp_ecc_error_irq, 0);
3367         amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
3368         amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
3369
3370         /* disable KCQ to avoid CPC touch memory not valid anymore */
3371         gfx_v9_0_kcq_disable(adev);
3372
3373         if (amdgpu_sriov_vf(adev)) {
3374                 gfx_v9_0_cp_gfx_enable(adev, false);
3375                 /* must disable polling for SRIOV when hw finished, otherwise
3376                  * CPC engine may still keep fetching WB address which is already
3377                  * invalid after sw finished and trigger DMAR reading error in
3378                  * hypervisor side.
3379                  */
3380                 WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
3381                 return 0;
3382         }
3383
3384         /* Use deinitialize sequence from CAIL when unbinding device from driver,
3385          * otherwise KIQ is hanging when binding back
3386          */
3387         if (!adev->in_gpu_reset && !adev->in_suspend) {
3388                 mutex_lock(&adev->srbm_mutex);
3389                 soc15_grbm_select(adev, adev->gfx.kiq.ring.me,
3390                                 adev->gfx.kiq.ring.pipe,
3391                                 adev->gfx.kiq.ring.queue, 0);
3392                 gfx_v9_0_kiq_fini_register(&adev->gfx.kiq.ring);
3393                 soc15_grbm_select(adev, 0, 0, 0, 0);
3394                 mutex_unlock(&adev->srbm_mutex);
3395         }
3396
3397         gfx_v9_0_cp_enable(adev, false);
3398         adev->gfx.rlc.funcs->stop(adev);
3399
3400         gfx_v9_0_csb_vram_unpin(adev);
3401
3402         return 0;
3403 }
3404
3405 static int gfx_v9_0_suspend(void *handle)
3406 {
3407         return gfx_v9_0_hw_fini(handle);
3408 }
3409
3410 static int gfx_v9_0_resume(void *handle)
3411 {
3412         return gfx_v9_0_hw_init(handle);
3413 }
3414
3415 static bool gfx_v9_0_is_idle(void *handle)
3416 {
3417         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3418
3419         if (REG_GET_FIELD(RREG32_SOC15(GC, 0, mmGRBM_STATUS),
3420                                 GRBM_STATUS, GUI_ACTIVE))
3421                 return false;
3422         else
3423                 return true;
3424 }
3425
3426 static int gfx_v9_0_wait_for_idle(void *handle)
3427 {
3428         unsigned i;
3429         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3430
3431         for (i = 0; i < adev->usec_timeout; i++) {
3432                 if (gfx_v9_0_is_idle(handle))
3433                         return 0;
3434                 udelay(1);
3435         }
3436         return -ETIMEDOUT;
3437 }
3438
3439 static int gfx_v9_0_soft_reset(void *handle)
3440 {
3441         u32 grbm_soft_reset = 0;
3442         u32 tmp;
3443         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3444
3445         /* GRBM_STATUS */
3446         tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS);
3447         if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
3448                    GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
3449                    GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
3450                    GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
3451                    GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
3452                    GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK)) {
3453                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
3454                                                 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
3455                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
3456                                                 GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
3457         }
3458
3459         if (tmp & (GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
3460                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
3461                                                 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
3462         }
3463
3464         /* GRBM_STATUS2 */
3465         tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS2);
3466         if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
3467                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
3468                                                 GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
3469
3470
3471         if (grbm_soft_reset) {
3472                 /* stop the rlc */
3473                 adev->gfx.rlc.funcs->stop(adev);
3474
3475                 /* Disable GFX parsing/prefetching */
3476                 gfx_v9_0_cp_gfx_enable(adev, false);
3477
3478                 /* Disable MEC parsing/prefetching */
3479                 gfx_v9_0_cp_compute_enable(adev, false);
3480
3481                 if (grbm_soft_reset) {
3482                         tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
3483                         tmp |= grbm_soft_reset;
3484                         dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
3485                         WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
3486                         tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
3487
3488                         udelay(50);
3489
3490                         tmp &= ~grbm_soft_reset;
3491                         WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
3492                         tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
3493                 }
3494
3495                 /* Wait a little for things to settle down */
3496                 udelay(50);
3497         }
3498         return 0;
3499 }
3500
3501 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev)
3502 {
3503         uint64_t clock;
3504
3505         mutex_lock(&adev->gfx.gpu_clock_mutex);
3506         WREG32_SOC15(GC, 0, mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
3507         clock = (uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_LSB) |
3508                 ((uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
3509         mutex_unlock(&adev->gfx.gpu_clock_mutex);
3510         return clock;
3511 }
3512
3513 static void gfx_v9_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
3514                                           uint32_t vmid,
3515                                           uint32_t gds_base, uint32_t gds_size,
3516                                           uint32_t gws_base, uint32_t gws_size,
3517                                           uint32_t oa_base, uint32_t oa_size)
3518 {
3519         struct amdgpu_device *adev = ring->adev;
3520
3521         /* GDS Base */
3522         gfx_v9_0_write_data_to_reg(ring, 0, false,
3523                                    SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_BASE) + 2 * vmid,
3524                                    gds_base);
3525
3526         /* GDS Size */
3527         gfx_v9_0_write_data_to_reg(ring, 0, false,
3528                                    SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE) + 2 * vmid,
3529                                    gds_size);
3530
3531         /* GWS */
3532         gfx_v9_0_write_data_to_reg(ring, 0, false,
3533                                    SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID0) + vmid,
3534                                    gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
3535
3536         /* OA */
3537         gfx_v9_0_write_data_to_reg(ring, 0, false,
3538                                    SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID0) + vmid,
3539                                    (1 << (oa_size + oa_base)) - (1 << oa_base));
3540 }
3541
3542 static const u32 vgpr_init_compute_shader[] =
3543 {
3544         0xb07c0000, 0xbe8000ff,
3545         0x000000f8, 0xbf110800,
3546         0x7e000280, 0x7e020280,
3547         0x7e040280, 0x7e060280,
3548         0x7e080280, 0x7e0a0280,
3549         0x7e0c0280, 0x7e0e0280,
3550         0x80808800, 0xbe803200,
3551         0xbf84fff5, 0xbf9c0000,
3552         0xd28c0001, 0x0001007f,
3553         0xd28d0001, 0x0002027e,
3554         0x10020288, 0xb8810904,
3555         0xb7814000, 0xd1196a01,
3556         0x00000301, 0xbe800087,
3557         0xbefc00c1, 0xd89c4000,
3558         0x00020201, 0xd89cc080,
3559         0x00040401, 0x320202ff,
3560         0x00000800, 0x80808100,
3561         0xbf84fff8, 0x7e020280,
3562         0xbf810000, 0x00000000,
3563 };
3564
3565 static const u32 sgpr_init_compute_shader[] =
3566 {
3567         0xb07c0000, 0xbe8000ff,
3568         0x0000005f, 0xbee50080,
3569         0xbe812c65, 0xbe822c65,
3570         0xbe832c65, 0xbe842c65,
3571         0xbe852c65, 0xb77c0005,
3572         0x80808500, 0xbf84fff8,
3573         0xbe800080, 0xbf810000,
3574 };
3575
3576 static const struct soc15_reg_entry vgpr_init_regs[] = {
3577    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff },
3578    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff },
3579    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff },
3580    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff },
3581    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x1000000 }, /* CU_GROUP_COUNT=1 */
3582    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 256*2 },
3583    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 1 },
3584    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
3585    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x100007f }, /* VGPRS=15 (256 logical VGPRs, SGPRS=1 (16 SGPRs, BULKY=1 */
3586    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x400000 },  /* 64KB LDS */
3587 };
3588
3589 static const struct soc15_reg_entry sgpr_init_regs[] = {
3590    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff },
3591    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff },
3592    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff },
3593    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff },
3594    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x1000000 }, /* CU_GROUP_COUNT=1 */
3595    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 256*2 },
3596    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 1 },
3597    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
3598    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x340 }, /* SGPRS=13 (112 GPRS) */
3599    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x0 },
3600 };
3601
3602 static const struct soc15_reg_entry sec_ded_counter_registers[] = {
3603    { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT) },
3604    { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT) },
3605    { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT) },
3606    { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT) },
3607    { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT) },
3608    { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT) },
3609    { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT) },
3610    { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT) },
3611    { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT) },
3612    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT) },
3613    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_GRBM_CNT) },
3614    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_DED) },
3615    { SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT) },
3616    { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT) },
3617    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_DED_CNT) },
3618    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_INFO) },
3619    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_SEC_CNT) },
3620    { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT) },
3621    { SOC15_REG_ENTRY(GC, 0, mmTCP_ATC_EDC_GATCL1_CNT) },
3622    { SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT) },
3623    { SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT) },
3624    { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2) },
3625    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT) },
3626    { SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT) },
3627    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT) },
3628    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT) },
3629    { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT) },
3630    { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2) },
3631    { SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT) },
3632    { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2) },
3633    { SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT) },
3634 };
3635
3636 static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
3637 {
3638         struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
3639         struct amdgpu_ib ib;
3640         struct dma_fence *f = NULL;
3641         int r, i, j;
3642         unsigned total_size, vgpr_offset, sgpr_offset;
3643         u64 gpu_addr;
3644
3645         /* only support when RAS is enabled */
3646         if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
3647                 return 0;
3648
3649         /* bail if the compute ring is not ready */
3650         if (!ring->sched.ready)
3651                 return 0;
3652
3653         total_size =
3654                 ((ARRAY_SIZE(vgpr_init_regs) * 3) + 4 + 5 + 2) * 4;
3655         total_size +=
3656                 ((ARRAY_SIZE(sgpr_init_regs) * 3) + 4 + 5 + 2) * 4;
3657         total_size = ALIGN(total_size, 256);
3658         vgpr_offset = total_size;
3659         total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256);
3660         sgpr_offset = total_size;
3661         total_size += sizeof(sgpr_init_compute_shader);
3662
3663         /* allocate an indirect buffer to put the commands in */
3664         memset(&ib, 0, sizeof(ib));
3665         r = amdgpu_ib_get(adev, NULL, total_size, &ib);
3666         if (r) {
3667                 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
3668                 return r;
3669         }
3670
3671         /* load the compute shaders */
3672         for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++)
3673                 ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i];
3674
3675         for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
3676                 ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
3677
3678         /* init the ib length to 0 */
3679         ib.length_dw = 0;
3680
3681         /* VGPR */
3682         /* write the register state for the compute dispatch */
3683         for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i++) {
3684                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
3685                 ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(vgpr_init_regs[i])
3686                                                                 - PACKET3_SET_SH_REG_START;
3687                 ib.ptr[ib.length_dw++] = vgpr_init_regs[i].reg_value;
3688         }
3689         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
3690         gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
3691         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
3692         ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
3693                                                         - PACKET3_SET_SH_REG_START;
3694         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
3695         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
3696
3697         /* write dispatch packet */
3698         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
3699         ib.ptr[ib.length_dw++] = 128; /* x */
3700         ib.ptr[ib.length_dw++] = 1; /* y */
3701         ib.ptr[ib.length_dw++] = 1; /* z */
3702         ib.ptr[ib.length_dw++] =
3703                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
3704
3705         /* write CS partial flush packet */
3706         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
3707         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
3708
3709         /* SGPR */
3710         /* write the register state for the compute dispatch */
3711         for (i = 0; i < ARRAY_SIZE(sgpr_init_regs); i++) {
3712                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
3713                 ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(sgpr_init_regs[i])
3714                                                                 - PACKET3_SET_SH_REG_START;
3715                 ib.ptr[ib.length_dw++] = sgpr_init_regs[i].reg_value;
3716         }
3717         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
3718         gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
3719         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
3720         ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
3721                                                         - PACKET3_SET_SH_REG_START;
3722         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
3723         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
3724
3725         /* write dispatch packet */
3726         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
3727         ib.ptr[ib.length_dw++] = 128; /* x */
3728         ib.ptr[ib.length_dw++] = 1; /* y */
3729         ib.ptr[ib.length_dw++] = 1; /* z */
3730         ib.ptr[ib.length_dw++] =
3731                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
3732
3733         /* write CS partial flush packet */
3734         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
3735         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
3736
3737         /* shedule the ib on the ring */
3738         r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
3739         if (r) {
3740                 DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
3741                 goto fail;
3742         }
3743
3744         /* wait for the GPU to finish processing the IB */
3745         r = dma_fence_wait(f, false);
3746         if (r) {
3747                 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
3748                 goto fail;
3749         }
3750
3751         /* read back registers to clear the counters */
3752         mutex_lock(&adev->grbm_idx_mutex);
3753         for (j = 0; j < 16; j++) {
3754                 gfx_v9_0_select_se_sh(adev, 0x01, 0x0, j);
3755                 for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++)
3756                         RREG32(SOC15_REG_ENTRY_OFFSET(sec_ded_counter_registers[i]));
3757                 gfx_v9_0_select_se_sh(adev, 0x02, 0x0, j);
3758                 for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++)
3759                         RREG32(SOC15_REG_ENTRY_OFFSET(sec_ded_counter_registers[i]));
3760                 gfx_v9_0_select_se_sh(adev, 0x03, 0x0, j);
3761                 for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++)
3762                         RREG32(SOC15_REG_ENTRY_OFFSET(sec_ded_counter_registers[i]));
3763                 gfx_v9_0_select_se_sh(adev, 0x04, 0x0, j);
3764                 for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++)
3765                         RREG32(SOC15_REG_ENTRY_OFFSET(sec_ded_counter_registers[i]));
3766         }
3767         WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, 0xe0000000);
3768         mutex_unlock(&adev->grbm_idx_mutex);
3769
3770 fail:
3771         amdgpu_ib_free(adev, &ib, NULL);
3772         dma_fence_put(f);
3773
3774         return r;
3775 }
3776
3777 static int gfx_v9_0_early_init(void *handle)
3778 {
3779         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3780
3781         adev->gfx.num_gfx_rings = GFX9_NUM_GFX_RINGS;
3782         adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
3783         gfx_v9_0_set_ring_funcs(adev);
3784         gfx_v9_0_set_irq_funcs(adev);
3785         gfx_v9_0_set_gds_init(adev);
3786         gfx_v9_0_set_rlc_funcs(adev);
3787
3788         return 0;
3789 }
3790
3791 static int gfx_v9_0_process_ras_data_cb(struct amdgpu_device *adev,
3792                 struct amdgpu_iv_entry *entry);
3793
3794 static int gfx_v9_0_ecc_late_init(void *handle)
3795 {
3796         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3797         struct ras_common_if **ras_if = &adev->gfx.ras_if;
3798         struct ras_ih_if ih_info = {
3799                 .cb = gfx_v9_0_process_ras_data_cb,
3800         };
3801         struct ras_fs_if fs_info = {
3802                 .sysfs_name = "gfx_err_count",
3803                 .debugfs_name = "gfx_err_inject",
3804         };
3805         struct ras_common_if ras_block = {
3806                 .block = AMDGPU_RAS_BLOCK__GFX,
3807                 .type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE,
3808                 .sub_block_index = 0,
3809                 .name = "gfx",
3810         };
3811         int r;
3812
3813         if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) {
3814                 amdgpu_ras_feature_enable_on_boot(adev, &ras_block, 0);
3815                 return 0;
3816         }
3817
3818         /* requires IBs so do in late init after IB pool is initialized */
3819         r = gfx_v9_0_do_edc_gpr_workarounds(adev);
3820         if (r)
3821                 return r;
3822
3823         /* handle resume path. */
3824         if (*ras_if) {
3825                 /* resend ras TA enable cmd during resume.
3826                  * prepare to handle failure.
3827                  */
3828                 ih_info.head = **ras_if;
3829                 r = amdgpu_ras_feature_enable_on_boot(adev, *ras_if, 1);
3830                 if (r) {
3831                         if (r == -EAGAIN) {
3832                                 /* request a gpu reset. will run again. */
3833                                 amdgpu_ras_request_reset_on_boot(adev,
3834                                                 AMDGPU_RAS_BLOCK__GFX);
3835                                 return 0;
3836                         }
3837                         /* fail to enable ras, cleanup all. */
3838                         goto irq;
3839                 }
3840                 /* enable successfully. continue. */
3841                 goto resume;
3842         }
3843
3844         *ras_if = kmalloc(sizeof(**ras_if), GFP_KERNEL);
3845         if (!*ras_if)
3846                 return -ENOMEM;
3847
3848         **ras_if = ras_block;
3849
3850         r = amdgpu_ras_feature_enable_on_boot(adev, *ras_if, 1);
3851         if (r) {
3852                 if (r == -EAGAIN) {
3853                         amdgpu_ras_request_reset_on_boot(adev,
3854                                         AMDGPU_RAS_BLOCK__GFX);
3855                         r = 0;
3856                 }
3857                 goto feature;
3858         }
3859
3860         ih_info.head = **ras_if;
3861         fs_info.head = **ras_if;
3862
3863         r = amdgpu_ras_interrupt_add_handler(adev, &ih_info);
3864         if (r)
3865                 goto interrupt;
3866
3867         r = amdgpu_ras_debugfs_create(adev, &fs_info);
3868         if (r)
3869                 goto debugfs;
3870
3871         r = amdgpu_ras_sysfs_create(adev, &fs_info);
3872         if (r)
3873                 goto sysfs;
3874 resume:
3875         r = amdgpu_irq_get(adev, &adev->gfx.cp_ecc_error_irq, 0);
3876         if (r)
3877                 goto irq;
3878
3879         return 0;
3880 irq:
3881         amdgpu_ras_sysfs_remove(adev, *ras_if);
3882 sysfs:
3883         amdgpu_ras_debugfs_remove(adev, *ras_if);
3884 debugfs:
3885         amdgpu_ras_interrupt_remove_handler(adev, &ih_info);
3886 interrupt:
3887         amdgpu_ras_feature_enable(adev, *ras_if, 0);
3888 feature:
3889         kfree(*ras_if);
3890         *ras_if = NULL;
3891         return r;
3892 }
3893
3894 static int gfx_v9_0_late_init(void *handle)
3895 {
3896         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3897         int r;
3898
3899         r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
3900         if (r)
3901                 return r;
3902
3903         r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
3904         if (r)
3905                 return r;
3906
3907         r = gfx_v9_0_ecc_late_init(handle);
3908         if (r)
3909                 return r;
3910
3911         return 0;
3912 }
3913
3914 static bool gfx_v9_0_is_rlc_enabled(struct amdgpu_device *adev)
3915 {
3916         uint32_t rlc_setting;
3917
3918         /* if RLC is not enabled, do nothing */
3919         rlc_setting = RREG32_SOC15(GC, 0, mmRLC_CNTL);
3920         if (!(rlc_setting & RLC_CNTL__RLC_ENABLE_F32_MASK))
3921                 return false;
3922
3923         return true;
3924 }
3925
3926 static void gfx_v9_0_set_safe_mode(struct amdgpu_device *adev)
3927 {
3928         uint32_t data;
3929         unsigned i;
3930
3931         data = RLC_SAFE_MODE__CMD_MASK;
3932         data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
3933         WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data);
3934
3935         /* wait for RLC_SAFE_MODE */
3936         for (i = 0; i < adev->usec_timeout; i++) {
3937                 if (!REG_GET_FIELD(RREG32_SOC15(GC, 0, mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
3938                         break;
3939                 udelay(1);
3940         }
3941 }
3942
3943 static void gfx_v9_0_unset_safe_mode(struct amdgpu_device *adev)
3944 {
3945         uint32_t data;
3946
3947         data = RLC_SAFE_MODE__CMD_MASK;
3948         WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data);
3949 }
3950
3951 static void gfx_v9_0_update_gfx_cg_power_gating(struct amdgpu_device *adev,
3952                                                 bool enable)
3953 {
3954         amdgpu_gfx_rlc_enter_safe_mode(adev);
3955
3956         if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
3957                 gfx_v9_0_enable_gfx_cg_power_gating(adev, true);
3958                 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
3959                         gfx_v9_0_enable_gfx_pipeline_powergating(adev, true);
3960         } else {
3961                 gfx_v9_0_enable_gfx_cg_power_gating(adev, false);
3962                 gfx_v9_0_enable_gfx_pipeline_powergating(adev, false);
3963         }
3964
3965         amdgpu_gfx_rlc_exit_safe_mode(adev);
3966 }
3967
3968 static void gfx_v9_0_update_gfx_mg_power_gating(struct amdgpu_device *adev,
3969                                                 bool enable)
3970 {
3971         /* TODO: double check if we need to perform under safe mode */
3972         /* gfx_v9_0_enter_rlc_safe_mode(adev); */
3973
3974         if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
3975                 gfx_v9_0_enable_gfx_static_mg_power_gating(adev, true);
3976         else
3977                 gfx_v9_0_enable_gfx_static_mg_power_gating(adev, false);
3978
3979         if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
3980                 gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, true);
3981         else
3982                 gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, false);
3983
3984         /* gfx_v9_0_exit_rlc_safe_mode(adev); */
3985 }
3986
3987 static void gfx_v9_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
3988                                                       bool enable)
3989 {
3990         uint32_t data, def;
3991
3992         amdgpu_gfx_rlc_enter_safe_mode(adev);
3993
3994         /* It is disabled by HW by default */
3995         if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
3996                 /* 1 - RLC_CGTT_MGCG_OVERRIDE */
3997                 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
3998
3999                 if (adev->asic_type != CHIP_VEGA12)
4000                         data &= ~RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK;
4001
4002                 data &= ~(RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
4003                           RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK |
4004                           RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK);
4005
4006                 /* only for Vega10 & Raven1 */
4007                 data |= RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK;
4008
4009                 if (def != data)
4010                         WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4011
4012                 /* MGLS is a global flag to control all MGLS in GFX */
4013                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
4014                         /* 2 - RLC memory Light sleep */
4015                         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
4016                                 def = data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
4017                                 data |= RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
4018                                 if (def != data)
4019                                         WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data);
4020                         }
4021                         /* 3 - CP memory Light sleep */
4022                         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
4023                                 def = data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
4024                                 data |= CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
4025                                 if (def != data)
4026                                         WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data);
4027                         }
4028                 }
4029         } else {
4030                 /* 1 - MGCG_OVERRIDE */
4031                 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4032
4033                 if (adev->asic_type != CHIP_VEGA12)
4034                         data |= RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK;
4035
4036                 data |= (RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK |
4037                          RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
4038                          RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK |
4039                          RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK);
4040
4041                 if (def != data)
4042                         WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4043
4044                 /* 2 - disable MGLS in RLC */
4045                 data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
4046                 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
4047                         data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
4048                         WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data);
4049                 }
4050
4051                 /* 3 - disable MGLS in CP */
4052                 data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
4053                 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
4054                         data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
4055                         WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data);
4056                 }
4057         }
4058
4059         amdgpu_gfx_rlc_exit_safe_mode(adev);
4060 }
4061
4062 static void gfx_v9_0_update_3d_clock_gating(struct amdgpu_device *adev,
4063                                            bool enable)
4064 {
4065         uint32_t data, def;
4066
4067         amdgpu_gfx_rlc_enter_safe_mode(adev);
4068
4069         /* Enable 3D CGCG/CGLS */
4070         if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG)) {
4071                 /* write cmd to clear cgcg/cgls ov */
4072                 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4073                 /* unset CGCG override */
4074                 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_GFX3D_CG_OVERRIDE_MASK;
4075                 /* update CGCG and CGLS override bits */
4076                 if (def != data)
4077                         WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4078
4079                 /* enable 3Dcgcg FSM(0x0000363f) */
4080                 def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
4081
4082                 data = (0x36 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
4083                         RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK;
4084                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS)
4085                         data |= (0x000F << RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
4086                                 RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK;
4087                 if (def != data)
4088                         WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data);
4089
4090                 /* set IDLE_POLL_COUNT(0x00900100) */
4091                 def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL);
4092                 data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
4093                         (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
4094                 if (def != data)
4095                         WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data);
4096         } else {
4097                 /* Disable CGCG/CGLS */
4098                 def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
4099                 /* disable cgcg, cgls should be disabled */
4100                 data &= ~(RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK |
4101                           RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK);
4102                 /* disable cgcg and cgls in FSM */
4103                 if (def != data)
4104                         WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data);
4105         }
4106
4107         amdgpu_gfx_rlc_exit_safe_mode(adev);
4108 }
4109
4110 static void gfx_v9_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
4111                                                       bool enable)
4112 {
4113         uint32_t def, data;
4114
4115         amdgpu_gfx_rlc_enter_safe_mode(adev);
4116
4117         if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
4118                 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4119                 /* unset CGCG override */
4120                 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGCG_OVERRIDE_MASK;
4121                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
4122                         data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
4123                 else
4124                         data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
4125                 /* update CGCG and CGLS override bits */
4126                 if (def != data)
4127                         WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4128
4129                 /* enable cgcg FSM(0x0000363F) */
4130                 def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
4131
4132                 data = (0x36 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
4133                         RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
4134                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
4135                         data |= (0x000F << RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
4136                                 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
4137                 if (def != data)
4138                         WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data);
4139
4140                 /* set IDLE_POLL_COUNT(0x00900100) */
4141                 def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL);
4142                 data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
4143                         (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
4144                 if (def != data)
4145                         WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data);
4146         } else {
4147                 def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
4148                 /* reset CGCG/CGLS bits */
4149                 data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
4150                 /* disable cgcg and cgls in FSM */
4151                 if (def != data)
4152                         WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data);
4153         }
4154
4155         amdgpu_gfx_rlc_exit_safe_mode(adev);
4156 }
4157
4158 static int gfx_v9_0_update_gfx_clock_gating(struct amdgpu_device *adev,
4159                                             bool enable)
4160 {
4161         if (enable) {
4162                 /* CGCG/CGLS should be enabled after MGCG/MGLS
4163                  * ===  MGCG + MGLS ===
4164                  */
4165                 gfx_v9_0_update_medium_grain_clock_gating(adev, enable);
4166                 /* ===  CGCG /CGLS for GFX 3D Only === */
4167                 gfx_v9_0_update_3d_clock_gating(adev, enable);
4168                 /* ===  CGCG + CGLS === */
4169                 gfx_v9_0_update_coarse_grain_clock_gating(adev, enable);
4170         } else {
4171                 /* CGCG/CGLS should be disabled before MGCG/MGLS
4172                  * ===  CGCG + CGLS ===
4173                  */
4174                 gfx_v9_0_update_coarse_grain_clock_gating(adev, enable);
4175                 /* ===  CGCG /CGLS for GFX 3D Only === */
4176                 gfx_v9_0_update_3d_clock_gating(adev, enable);
4177                 /* ===  MGCG + MGLS === */
4178                 gfx_v9_0_update_medium_grain_clock_gating(adev, enable);
4179         }
4180         return 0;
4181 }
4182
4183 static const struct amdgpu_rlc_funcs gfx_v9_0_rlc_funcs = {
4184         .is_rlc_enabled = gfx_v9_0_is_rlc_enabled,
4185         .set_safe_mode = gfx_v9_0_set_safe_mode,
4186         .unset_safe_mode = gfx_v9_0_unset_safe_mode,
4187         .init = gfx_v9_0_rlc_init,
4188         .get_csb_size = gfx_v9_0_get_csb_size,
4189         .get_csb_buffer = gfx_v9_0_get_csb_buffer,
4190         .get_cp_table_num = gfx_v9_0_cp_jump_table_num,
4191         .resume = gfx_v9_0_rlc_resume,
4192         .stop = gfx_v9_0_rlc_stop,
4193         .reset = gfx_v9_0_rlc_reset,
4194         .start = gfx_v9_0_rlc_start
4195 };
4196
4197 static int gfx_v9_0_set_powergating_state(void *handle,
4198                                           enum amd_powergating_state state)
4199 {
4200         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4201         bool enable = (state == AMD_PG_STATE_GATE) ? true : false;
4202
4203         switch (adev->asic_type) {
4204         case CHIP_RAVEN:
4205                 if (!enable) {
4206                         amdgpu_gfx_off_ctrl(adev, false);
4207                         cancel_delayed_work_sync(&adev->gfx.gfx_off_delay_work);
4208                 }
4209                 if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
4210                         gfx_v9_0_enable_sck_slow_down_on_power_up(adev, true);
4211                         gfx_v9_0_enable_sck_slow_down_on_power_down(adev, true);
4212                 } else {
4213                         gfx_v9_0_enable_sck_slow_down_on_power_up(adev, false);
4214                         gfx_v9_0_enable_sck_slow_down_on_power_down(adev, false);
4215                 }
4216
4217                 if (adev->pg_flags & AMD_PG_SUPPORT_CP)
4218                         gfx_v9_0_enable_cp_power_gating(adev, true);
4219                 else
4220                         gfx_v9_0_enable_cp_power_gating(adev, false);
4221
4222                 /* update gfx cgpg state */
4223                 gfx_v9_0_update_gfx_cg_power_gating(adev, enable);
4224
4225                 /* update mgcg state */
4226                 gfx_v9_0_update_gfx_mg_power_gating(adev, enable);
4227
4228                 if (enable)
4229                         amdgpu_gfx_off_ctrl(adev, true);
4230                 break;
4231         case CHIP_VEGA12:
4232                 if (!enable) {
4233                         amdgpu_gfx_off_ctrl(adev, false);
4234                         cancel_delayed_work_sync(&adev->gfx.gfx_off_delay_work);
4235                 } else {
4236                         amdgpu_gfx_off_ctrl(adev, true);
4237                 }
4238                 break;
4239         default:
4240                 break;
4241         }
4242
4243         return 0;
4244 }
4245
4246 static int gfx_v9_0_set_clockgating_state(void *handle,
4247                                           enum amd_clockgating_state state)
4248 {
4249         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4250
4251         if (amdgpu_sriov_vf(adev))
4252                 return 0;
4253
4254         switch (adev->asic_type) {
4255         case CHIP_VEGA10:
4256         case CHIP_VEGA12:
4257         case CHIP_VEGA20:
4258         case CHIP_RAVEN:
4259                 gfx_v9_0_update_gfx_clock_gating(adev,
4260                                                  state == AMD_CG_STATE_GATE ? true : false);
4261                 break;
4262         default:
4263                 break;
4264         }
4265         return 0;
4266 }
4267
4268 static void gfx_v9_0_get_clockgating_state(void *handle, u32 *flags)
4269 {
4270         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4271         int data;
4272
4273         if (amdgpu_sriov_vf(adev))
4274                 *flags = 0;
4275
4276         /* AMD_CG_SUPPORT_GFX_MGCG */
4277         data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4278         if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK))
4279                 *flags |= AMD_CG_SUPPORT_GFX_MGCG;
4280
4281         /* AMD_CG_SUPPORT_GFX_CGCG */
4282         data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
4283         if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
4284                 *flags |= AMD_CG_SUPPORT_GFX_CGCG;
4285
4286         /* AMD_CG_SUPPORT_GFX_CGLS */
4287         if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
4288                 *flags |= AMD_CG_SUPPORT_GFX_CGLS;
4289
4290         /* AMD_CG_SUPPORT_GFX_RLC_LS */
4291         data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
4292         if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK)
4293                 *flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS;
4294
4295         /* AMD_CG_SUPPORT_GFX_CP_LS */
4296         data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
4297         if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK)
4298                 *flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS;
4299
4300         /* AMD_CG_SUPPORT_GFX_3D_CGCG */
4301         data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
4302         if (data & RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK)
4303                 *flags |= AMD_CG_SUPPORT_GFX_3D_CGCG;
4304
4305         /* AMD_CG_SUPPORT_GFX_3D_CGLS */
4306         if (data & RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK)
4307                 *flags |= AMD_CG_SUPPORT_GFX_3D_CGLS;
4308 }
4309
4310 static u64 gfx_v9_0_ring_get_rptr_gfx(struct amdgpu_ring *ring)
4311 {
4312         return ring->adev->wb.wb[ring->rptr_offs]; /* gfx9 is 32bit rptr*/
4313 }
4314
4315 static u64 gfx_v9_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
4316 {
4317         struct amdgpu_device *adev = ring->adev;
4318         u64 wptr;
4319
4320         /* XXX check if swapping is necessary on BE */
4321         if (ring->use_doorbell) {
4322                 wptr = atomic64_read((atomic64_t *)&adev->wb.wb[ring->wptr_offs]);
4323         } else {
4324                 wptr = RREG32_SOC15(GC, 0, mmCP_RB0_WPTR);
4325                 wptr += (u64)RREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI) << 32;
4326         }
4327
4328         return wptr;
4329 }
4330
4331 static void gfx_v9_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
4332 {
4333         struct amdgpu_device *adev = ring->adev;
4334
4335         if (ring->use_doorbell) {
4336                 /* XXX check if swapping is necessary on BE */
4337                 atomic64_set((atomic64_t*)&adev->wb.wb[ring->wptr_offs], ring->wptr);
4338                 WDOORBELL64(ring->doorbell_index, ring->wptr);
4339         } else {
4340                 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
4341                 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
4342         }
4343 }
4344
4345 static void gfx_v9_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
4346 {
4347         struct amdgpu_device *adev = ring->adev;
4348         u32 ref_and_mask, reg_mem_engine;
4349         const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio_funcs->hdp_flush_reg;
4350
4351         if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
4352                 switch (ring->me) {
4353                 case 1:
4354                         ref_and_mask = nbio_hf_reg->ref_and_mask_cp2 << ring->pipe;
4355                         break;
4356                 case 2:
4357                         ref_and_mask = nbio_hf_reg->ref_and_mask_cp6 << ring->pipe;
4358                         break;
4359                 default:
4360                         return;
4361                 }
4362                 reg_mem_engine = 0;
4363         } else {
4364                 ref_and_mask = nbio_hf_reg->ref_and_mask_cp0;
4365                 reg_mem_engine = 1; /* pfp */
4366         }
4367
4368         gfx_v9_0_wait_reg_mem(ring, reg_mem_engine, 0, 1,
4369                               adev->nbio_funcs->get_hdp_flush_req_offset(adev),
4370                               adev->nbio_funcs->get_hdp_flush_done_offset(adev),
4371                               ref_and_mask, ref_and_mask, 0x20);
4372 }
4373
4374 static void gfx_v9_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
4375                                         struct amdgpu_job *job,
4376                                         struct amdgpu_ib *ib,
4377                                         uint32_t flags)
4378 {
4379         unsigned vmid = AMDGPU_JOB_GET_VMID(job);
4380         u32 header, control = 0;
4381
4382         if (ib->flags & AMDGPU_IB_FLAG_CE)
4383                 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
4384         else
4385                 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
4386
4387         control |= ib->length_dw | (vmid << 24);
4388
4389         if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) {
4390                 control |= INDIRECT_BUFFER_PRE_ENB(1);
4391
4392                 if (!(ib->flags & AMDGPU_IB_FLAG_CE))
4393                         gfx_v9_0_ring_emit_de_meta(ring);
4394         }
4395
4396         amdgpu_ring_write(ring, header);
4397         BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
4398         amdgpu_ring_write(ring,
4399 #ifdef __BIG_ENDIAN
4400                 (2 << 0) |
4401 #endif
4402                 lower_32_bits(ib->gpu_addr));
4403         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
4404         amdgpu_ring_write(ring, control);
4405 }
4406
4407 static void gfx_v9_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
4408                                           struct amdgpu_job *job,
4409                                           struct amdgpu_ib *ib,
4410                                           uint32_t flags)
4411 {
4412         unsigned vmid = AMDGPU_JOB_GET_VMID(job);
4413         u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
4414
4415         /* Currently, there is a high possibility to get wave ID mismatch
4416          * between ME and GDS, leading to a hw deadlock, because ME generates
4417          * different wave IDs than the GDS expects. This situation happens
4418          * randomly when at least 5 compute pipes use GDS ordered append.
4419          * The wave IDs generated by ME are also wrong after suspend/resume.
4420          * Those are probably bugs somewhere else in the kernel driver.
4421          *
4422          * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and
4423          * GDS to 0 for this ring (me/pipe).
4424          */
4425         if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) {
4426                 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
4427                 amdgpu_ring_write(ring, mmGDS_COMPUTE_MAX_WAVE_ID);
4428                 amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id);
4429         }
4430
4431         amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
4432         BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
4433         amdgpu_ring_write(ring,
4434 #ifdef __BIG_ENDIAN
4435                                 (2 << 0) |
4436 #endif
4437                                 lower_32_bits(ib->gpu_addr));
4438         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
4439         amdgpu_ring_write(ring, control);
4440 }
4441
4442 static void gfx_v9_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
4443                                      u64 seq, unsigned flags)
4444 {
4445         bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
4446         bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
4447         bool writeback = flags & AMDGPU_FENCE_FLAG_TC_WB_ONLY;
4448
4449         /* RELEASE_MEM - flush caches, send int */
4450         amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6));
4451         amdgpu_ring_write(ring, ((writeback ? (EOP_TC_WB_ACTION_EN |
4452                                                EOP_TC_NC_ACTION_EN) :
4453                                               (EOP_TCL1_ACTION_EN |
4454                                                EOP_TC_ACTION_EN |
4455                                                EOP_TC_WB_ACTION_EN |
4456                                                EOP_TC_MD_ACTION_EN)) |
4457                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
4458                                  EVENT_INDEX(5)));
4459         amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
4460
4461         /*
4462          * the address should be Qword aligned if 64bit write, Dword
4463          * aligned if only send 32bit data low (discard data high)
4464          */
4465         if (write64bit)
4466                 BUG_ON(addr & 0x7);
4467         else
4468                 BUG_ON(addr & 0x3);
4469         amdgpu_ring_write(ring, lower_32_bits(addr));
4470         amdgpu_ring_write(ring, upper_32_bits(addr));
4471         amdgpu_ring_write(ring, lower_32_bits(seq));
4472         amdgpu_ring_write(ring, upper_32_bits(seq));
4473         amdgpu_ring_write(ring, 0);
4474 }
4475
4476 static void gfx_v9_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
4477 {
4478         int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
4479         uint32_t seq = ring->fence_drv.sync_seq;
4480         uint64_t addr = ring->fence_drv.gpu_addr;
4481
4482         gfx_v9_0_wait_reg_mem(ring, usepfp, 1, 0,
4483                               lower_32_bits(addr), upper_32_bits(addr),
4484                               seq, 0xffffffff, 4);
4485 }
4486
4487 static void gfx_v9_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
4488                                         unsigned vmid, uint64_t pd_addr)
4489 {
4490         amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
4491
4492         /* compute doesn't have PFP */
4493         if (ring->funcs->type == AMDGPU_RING_TYPE_GFX) {
4494                 /* sync PFP to ME, otherwise we might get invalid PFP reads */
4495                 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
4496                 amdgpu_ring_write(ring, 0x0);
4497         }
4498 }
4499
4500 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
4501 {
4502         return ring->adev->wb.wb[ring->rptr_offs]; /* gfx9 hardware is 32bit rptr */
4503 }
4504
4505 static u64 gfx_v9_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
4506 {
4507         u64 wptr;
4508
4509         /* XXX check if swapping is necessary on BE */
4510         if (ring->use_doorbell)
4511                 wptr = atomic64_read((atomic64_t *)&ring->adev->wb.wb[ring->wptr_offs]);
4512         else
4513                 BUG();
4514         return wptr;
4515 }
4516
4517 static void gfx_v9_0_ring_set_pipe_percent(struct amdgpu_ring *ring,
4518                                            bool acquire)
4519 {
4520         struct amdgpu_device *adev = ring->adev;
4521         int pipe_num, tmp, reg;
4522         int pipe_percent = acquire ? SPI_WCL_PIPE_PERCENT_GFX__VALUE_MASK : 0x1;
4523
4524         pipe_num = ring->me * adev->gfx.mec.num_pipe_per_mec + ring->pipe;
4525
4526         /* first me only has 2 entries, GFX and HP3D */
4527         if (ring->me > 0)
4528                 pipe_num -= 2;
4529
4530         reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_GFX) + pipe_num;
4531         tmp = RREG32(reg);
4532         tmp = REG_SET_FIELD(tmp, SPI_WCL_PIPE_PERCENT_GFX, VALUE, pipe_percent);
4533         WREG32(reg, tmp);
4534 }
4535
4536 static void gfx_v9_0_pipe_reserve_resources(struct amdgpu_device *adev,
4537                                             struct amdgpu_ring *ring,
4538                                             bool acquire)
4539 {
4540         int i, pipe;
4541         bool reserve;
4542         struct amdgpu_ring *iring;
4543
4544         mutex_lock(&adev->gfx.pipe_reserve_mutex);
4545         pipe = amdgpu_gfx_queue_to_bit(adev, ring->me, ring->pipe, 0);
4546         if (acquire)
4547                 set_bit(pipe, adev->gfx.pipe_reserve_bitmap);
4548         else
4549                 clear_bit(pipe, adev->gfx.pipe_reserve_bitmap);
4550
4551         if (!bitmap_weight(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES)) {
4552                 /* Clear all reservations - everyone reacquires all resources */
4553                 for (i = 0; i < adev->gfx.num_gfx_rings; ++i)
4554                         gfx_v9_0_ring_set_pipe_percent(&adev->gfx.gfx_ring[i],
4555                                                        true);
4556
4557                 for (i = 0; i < adev->gfx.num_compute_rings; ++i)
4558                         gfx_v9_0_ring_set_pipe_percent(&adev->gfx.compute_ring[i],
4559                                                        true);
4560         } else {
4561                 /* Lower all pipes without a current reservation */
4562                 for (i = 0; i < adev->gfx.num_gfx_rings; ++i) {
4563                         iring = &adev->gfx.gfx_ring[i];
4564                         pipe = amdgpu_gfx_queue_to_bit(adev,
4565                                                        iring->me,
4566                                                        iring->pipe,
4567                                                        0);
4568                         reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap);
4569                         gfx_v9_0_ring_set_pipe_percent(iring, reserve);
4570                 }
4571
4572                 for (i = 0; i < adev->gfx.num_compute_rings; ++i) {
4573                         iring = &adev->gfx.compute_ring[i];
4574                         pipe = amdgpu_gfx_queue_to_bit(adev,
4575                                                        iring->me,
4576                                                        iring->pipe,
4577                                                        0);
4578                         reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap);
4579                         gfx_v9_0_ring_set_pipe_percent(iring, reserve);
4580                 }
4581         }
4582
4583         mutex_unlock(&adev->gfx.pipe_reserve_mutex);
4584 }
4585
4586 static void gfx_v9_0_hqd_set_priority(struct amdgpu_device *adev,
4587                                       struct amdgpu_ring *ring,
4588                                       bool acquire)
4589 {
4590         uint32_t pipe_priority = acquire ? 0x2 : 0x0;
4591         uint32_t queue_priority = acquire ? 0xf : 0x0;
4592
4593         mutex_lock(&adev->srbm_mutex);
4594         soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4595
4596         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PIPE_PRIORITY, pipe_priority);
4597         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_QUEUE_PRIORITY, queue_priority);
4598
4599         soc15_grbm_select(adev, 0, 0, 0, 0);
4600         mutex_unlock(&adev->srbm_mutex);
4601 }
4602
4603 static void gfx_v9_0_ring_set_priority_compute(struct amdgpu_ring *ring,
4604                                                enum drm_sched_priority priority)
4605 {
4606         struct amdgpu_device *adev = ring->adev;
4607         bool acquire = priority == DRM_SCHED_PRIORITY_HIGH_HW;
4608
4609         if (ring->funcs->type != AMDGPU_RING_TYPE_COMPUTE)
4610                 return;
4611
4612         gfx_v9_0_hqd_set_priority(adev, ring, acquire);
4613         gfx_v9_0_pipe_reserve_resources(adev, ring, acquire);
4614 }
4615
4616 static void gfx_v9_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
4617 {
4618         struct amdgpu_device *adev = ring->adev;
4619
4620         /* XXX check if swapping is necessary on BE */
4621         if (ring->use_doorbell) {
4622                 atomic64_set((atomic64_t*)&adev->wb.wb[ring->wptr_offs], ring->wptr);
4623                 WDOORBELL64(ring->doorbell_index, ring->wptr);
4624         } else{
4625                 BUG(); /* only DOORBELL method supported on gfx9 now */
4626         }
4627 }
4628
4629 static void gfx_v9_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
4630                                          u64 seq, unsigned int flags)
4631 {
4632         struct amdgpu_device *adev = ring->adev;
4633
4634         /* we only allocate 32bit for each seq wb address */
4635         BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
4636
4637         /* write fence seq to the "addr" */
4638         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4639         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4640                                  WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
4641         amdgpu_ring_write(ring, lower_32_bits(addr));
4642         amdgpu_ring_write(ring, upper_32_bits(addr));
4643         amdgpu_ring_write(ring, lower_32_bits(seq));
4644
4645         if (flags & AMDGPU_FENCE_FLAG_INT) {
4646                 /* set register to trigger INT */
4647                 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4648                 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4649                                          WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
4650                 amdgpu_ring_write(ring, SOC15_REG_OFFSET(GC, 0, mmCPC_INT_STATUS));
4651                 amdgpu_ring_write(ring, 0);
4652                 amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
4653         }
4654 }
4655
4656 static void gfx_v9_ring_emit_sb(struct amdgpu_ring *ring)
4657 {
4658         amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
4659         amdgpu_ring_write(ring, 0);
4660 }
4661
4662 static void gfx_v9_0_ring_emit_ce_meta(struct amdgpu_ring *ring)
4663 {
4664         struct v9_ce_ib_state ce_payload = {0};
4665         uint64_t csa_addr;
4666         int cnt;
4667
4668         cnt = (sizeof(ce_payload) >> 2) + 4 - 2;
4669         csa_addr = amdgpu_csa_vaddr(ring->adev);
4670
4671         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
4672         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
4673                                  WRITE_DATA_DST_SEL(8) |
4674                                  WR_CONFIRM) |
4675                                  WRITE_DATA_CACHE_POLICY(0));
4676         amdgpu_ring_write(ring, lower_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, ce_payload)));
4677         amdgpu_ring_write(ring, upper_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, ce_payload)));
4678         amdgpu_ring_write_multiple(ring, (void *)&ce_payload, sizeof(ce_payload) >> 2);
4679 }
4680
4681 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring)
4682 {
4683         struct v9_de_ib_state de_payload = {0};
4684         uint64_t csa_addr, gds_addr;
4685         int cnt;
4686
4687         csa_addr = amdgpu_csa_vaddr(ring->adev);
4688         gds_addr = csa_addr + 4096;
4689         de_payload.gds_backup_addrlo = lower_32_bits(gds_addr);
4690         de_payload.gds_backup_addrhi = upper_32_bits(gds_addr);
4691
4692         cnt = (sizeof(de_payload) >> 2) + 4 - 2;
4693         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
4694         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
4695                                  WRITE_DATA_DST_SEL(8) |
4696                                  WR_CONFIRM) |
4697                                  WRITE_DATA_CACHE_POLICY(0));
4698         amdgpu_ring_write(ring, lower_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, de_payload)));
4699         amdgpu_ring_write(ring, upper_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, de_payload)));
4700         amdgpu_ring_write_multiple(ring, (void *)&de_payload, sizeof(de_payload) >> 2);
4701 }
4702
4703 static void gfx_v9_0_ring_emit_tmz(struct amdgpu_ring *ring, bool start)
4704 {
4705         amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0));
4706         amdgpu_ring_write(ring, FRAME_CMD(start ? 0 : 1)); /* frame_end */
4707 }
4708
4709 static void gfx_v9_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
4710 {
4711         uint32_t dw2 = 0;
4712
4713         if (amdgpu_sriov_vf(ring->adev))
4714                 gfx_v9_0_ring_emit_ce_meta(ring);
4715
4716         gfx_v9_0_ring_emit_tmz(ring, true);
4717
4718         dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
4719         if (flags & AMDGPU_HAVE_CTX_SWITCH) {
4720                 /* set load_global_config & load_global_uconfig */
4721                 dw2 |= 0x8001;
4722                 /* set load_cs_sh_regs */
4723                 dw2 |= 0x01000000;
4724                 /* set load_per_context_state & load_gfx_sh_regs for GFX */
4725                 dw2 |= 0x10002;
4726
4727                 /* set load_ce_ram if preamble presented */
4728                 if (AMDGPU_PREAMBLE_IB_PRESENT & flags)
4729                         dw2 |= 0x10000000;
4730         } else {
4731                 /* still load_ce_ram if this is the first time preamble presented
4732                  * although there is no context switch happens.
4733                  */
4734                 if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags)
4735                         dw2 |= 0x10000000;
4736         }
4737
4738         amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4739         amdgpu_ring_write(ring, dw2);
4740         amdgpu_ring_write(ring, 0);
4741 }
4742
4743 static unsigned gfx_v9_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring)
4744 {
4745         unsigned ret;
4746         amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
4747         amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
4748         amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
4749         amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */
4750         ret = ring->wptr & ring->buf_mask;
4751         amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */
4752         return ret;
4753 }
4754
4755 static void gfx_v9_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset)
4756 {
4757         unsigned cur;
4758         BUG_ON(offset > ring->buf_mask);
4759         BUG_ON(ring->ring[offset] != 0x55aa55aa);
4760
4761         cur = (ring->wptr & ring->buf_mask) - 1;
4762         if (likely(cur > offset))
4763                 ring->ring[offset] = cur - offset;
4764         else
4765                 ring->ring[offset] = (ring->ring_size>>2) - offset + cur;
4766 }
4767
4768 static void gfx_v9_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg)
4769 {
4770         struct amdgpu_device *adev = ring->adev;
4771
4772         amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
4773         amdgpu_ring_write(ring, 0 |     /* src: register*/
4774                                 (5 << 8) |      /* dst: memory */
4775                                 (1 << 20));     /* write confirm */
4776         amdgpu_ring_write(ring, reg);
4777         amdgpu_ring_write(ring, 0);
4778         amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
4779                                 adev->virt.reg_val_offs * 4));
4780         amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
4781                                 adev->virt.reg_val_offs * 4));
4782 }
4783
4784 static void gfx_v9_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
4785                                     uint32_t val)
4786 {
4787         uint32_t cmd = 0;
4788
4789         switch (ring->funcs->type) {
4790         case AMDGPU_RING_TYPE_GFX:
4791                 cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM;
4792                 break;
4793         case AMDGPU_RING_TYPE_KIQ:
4794                 cmd = (1 << 16); /* no inc addr */
4795                 break;
4796         default:
4797                 cmd = WR_CONFIRM;
4798                 break;
4799         }
4800         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4801         amdgpu_ring_write(ring, cmd);
4802         amdgpu_ring_write(ring, reg);
4803         amdgpu_ring_write(ring, 0);
4804         amdgpu_ring_write(ring, val);
4805 }
4806
4807 static void gfx_v9_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
4808                                         uint32_t val, uint32_t mask)
4809 {
4810         gfx_v9_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20);
4811 }
4812
4813 static void gfx_v9_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
4814                                                   uint32_t reg0, uint32_t reg1,
4815                                                   uint32_t ref, uint32_t mask)
4816 {
4817         int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
4818         struct amdgpu_device *adev = ring->adev;
4819         bool fw_version_ok = (ring->funcs->type == AMDGPU_RING_TYPE_GFX) ?
4820                 adev->gfx.me_fw_write_wait : adev->gfx.mec_fw_write_wait;
4821
4822         if (fw_version_ok)
4823                 gfx_v9_0_wait_reg_mem(ring, usepfp, 0, 1, reg0, reg1,
4824                                       ref, mask, 0x20);
4825         else
4826                 amdgpu_ring_emit_reg_write_reg_wait_helper(ring, reg0, reg1,
4827                                                            ref, mask);
4828 }
4829
4830 static void gfx_v9_0_ring_soft_recovery(struct amdgpu_ring *ring, unsigned vmid)
4831 {
4832         struct amdgpu_device *adev = ring->adev;
4833         uint32_t value = 0;
4834
4835         value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03);
4836         value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01);
4837         value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1);
4838         value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid);
4839         WREG32(mmSQ_CMD, value);
4840 }
4841
4842 static void gfx_v9_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
4843                                                  enum amdgpu_interrupt_state state)
4844 {
4845         switch (state) {
4846         case AMDGPU_IRQ_STATE_DISABLE:
4847         case AMDGPU_IRQ_STATE_ENABLE:
4848                 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
4849                                TIME_STAMP_INT_ENABLE,
4850                                state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
4851                 break;
4852         default:
4853                 break;
4854         }
4855 }
4856
4857 static void gfx_v9_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
4858                                                      int me, int pipe,
4859                                                      enum amdgpu_interrupt_state state)
4860 {
4861         u32 mec_int_cntl, mec_int_cntl_reg;
4862
4863         /*
4864          * amdgpu controls only the first MEC. That's why this function only
4865          * handles the setting of interrupts for this specific MEC. All other
4866          * pipes' interrupts are set by amdkfd.
4867          */
4868
4869         if (me == 1) {
4870                 switch (pipe) {
4871                 case 0:
4872                         mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE0_INT_CNTL);
4873                         break;
4874                 case 1:
4875                         mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE1_INT_CNTL);
4876                         break;
4877                 case 2:
4878                         mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE2_INT_CNTL);
4879                         break;
4880                 case 3:
4881                         mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE3_INT_CNTL);
4882                         break;
4883                 default:
4884                         DRM_DEBUG("invalid pipe %d\n", pipe);
4885                         return;
4886                 }
4887         } else {
4888                 DRM_DEBUG("invalid me %d\n", me);
4889                 return;
4890         }
4891
4892         switch (state) {
4893         case AMDGPU_IRQ_STATE_DISABLE:
4894                 mec_int_cntl = RREG32(mec_int_cntl_reg);
4895                 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
4896                                              TIME_STAMP_INT_ENABLE, 0);
4897                 WREG32(mec_int_cntl_reg, mec_int_cntl);
4898                 break;
4899         case AMDGPU_IRQ_STATE_ENABLE:
4900                 mec_int_cntl = RREG32(mec_int_cntl_reg);
4901                 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
4902                                              TIME_STAMP_INT_ENABLE, 1);
4903                 WREG32(mec_int_cntl_reg, mec_int_cntl);
4904                 break;
4905         default:
4906                 break;
4907         }
4908 }
4909
4910 static int gfx_v9_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
4911                                              struct amdgpu_irq_src *source,
4912                                              unsigned type,
4913                                              enum amdgpu_interrupt_state state)
4914 {
4915         switch (state) {
4916         case AMDGPU_IRQ_STATE_DISABLE:
4917         case AMDGPU_IRQ_STATE_ENABLE:
4918                 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
4919                                PRIV_REG_INT_ENABLE,
4920                                state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
4921                 break;
4922         default:
4923                 break;
4924         }
4925
4926         return 0;
4927 }
4928
4929 static int gfx_v9_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
4930                                               struct amdgpu_irq_src *source,
4931                                               unsigned type,
4932                                               enum amdgpu_interrupt_state state)
4933 {
4934         switch (state) {
4935         case AMDGPU_IRQ_STATE_DISABLE:
4936         case AMDGPU_IRQ_STATE_ENABLE:
4937                 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
4938                                PRIV_INSTR_INT_ENABLE,
4939                                state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
4940         default:
4941                 break;
4942         }
4943
4944         return 0;
4945 }
4946
4947 #define ENABLE_ECC_ON_ME_PIPE(me, pipe)                         \
4948         WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\
4949                         CP_ECC_ERROR_INT_ENABLE, 1)
4950
4951 #define DISABLE_ECC_ON_ME_PIPE(me, pipe)                        \
4952         WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\
4953                         CP_ECC_ERROR_INT_ENABLE, 0)
4954
4955 static int gfx_v9_0_set_cp_ecc_error_state(struct amdgpu_device *adev,
4956                                               struct amdgpu_irq_src *source,
4957                                               unsigned type,
4958                                               enum amdgpu_interrupt_state state)
4959 {
4960         switch (state) {
4961         case AMDGPU_IRQ_STATE_DISABLE:
4962                 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
4963                                 CP_ECC_ERROR_INT_ENABLE, 0);
4964                 DISABLE_ECC_ON_ME_PIPE(1, 0);
4965                 DISABLE_ECC_ON_ME_PIPE(1, 1);
4966                 DISABLE_ECC_ON_ME_PIPE(1, 2);
4967                 DISABLE_ECC_ON_ME_PIPE(1, 3);
4968                 break;
4969
4970         case AMDGPU_IRQ_STATE_ENABLE:
4971                 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
4972                                 CP_ECC_ERROR_INT_ENABLE, 1);
4973                 ENABLE_ECC_ON_ME_PIPE(1, 0);
4974                 ENABLE_ECC_ON_ME_PIPE(1, 1);
4975                 ENABLE_ECC_ON_ME_PIPE(1, 2);
4976                 ENABLE_ECC_ON_ME_PIPE(1, 3);
4977                 break;
4978         default:
4979                 break;
4980         }
4981
4982         return 0;
4983 }
4984
4985
4986 static int gfx_v9_0_set_eop_interrupt_state(struct amdgpu_device *adev,
4987                                             struct amdgpu_irq_src *src,
4988                                             unsigned type,
4989                                             enum amdgpu_interrupt_state state)
4990 {
4991         switch (type) {
4992         case AMDGPU_CP_IRQ_GFX_EOP:
4993                 gfx_v9_0_set_gfx_eop_interrupt_state(adev, state);
4994                 break;
4995         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
4996                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
4997                 break;
4998         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
4999                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
5000                 break;
5001         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
5002                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
5003                 break;
5004         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
5005                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
5006                 break;
5007         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
5008                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
5009                 break;
5010         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
5011                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
5012                 break;
5013         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
5014                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
5015                 break;
5016         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
5017                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
5018                 break;
5019         default:
5020                 break;
5021         }
5022         return 0;
5023 }
5024
5025 static int gfx_v9_0_eop_irq(struct amdgpu_device *adev,
5026                             struct amdgpu_irq_src *source,
5027                             struct amdgpu_iv_entry *entry)
5028 {
5029         int i;
5030         u8 me_id, pipe_id, queue_id;
5031         struct amdgpu_ring *ring;
5032
5033         DRM_DEBUG("IH: CP EOP\n");
5034         me_id = (entry->ring_id & 0x0c) >> 2;
5035         pipe_id = (entry->ring_id & 0x03) >> 0;
5036         queue_id = (entry->ring_id & 0x70) >> 4;
5037
5038         switch (me_id) {
5039         case 0:
5040                 amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
5041                 break;
5042         case 1:
5043         case 2:
5044                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5045                         ring = &adev->gfx.compute_ring[i];
5046                         /* Per-queue interrupt is supported for MEC starting from VI.
5047                           * The interrupt can only be enabled/disabled per pipe instead of per queue.
5048                           */
5049                         if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
5050                                 amdgpu_fence_process(ring);
5051                 }
5052                 break;
5053         }
5054         return 0;
5055 }
5056
5057 static void gfx_v9_0_fault(struct amdgpu_device *adev,
5058                            struct amdgpu_iv_entry *entry)
5059 {
5060         u8 me_id, pipe_id, queue_id;
5061         struct amdgpu_ring *ring;
5062         int i;
5063
5064         me_id = (entry->ring_id & 0x0c) >> 2;
5065         pipe_id = (entry->ring_id & 0x03) >> 0;
5066         queue_id = (entry->ring_id & 0x70) >> 4;
5067
5068         switch (me_id) {
5069         case 0:
5070                 drm_sched_fault(&adev->gfx.gfx_ring[0].sched);
5071                 break;
5072         case 1:
5073         case 2:
5074                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5075                         ring = &adev->gfx.compute_ring[i];
5076                         if (ring->me == me_id && ring->pipe == pipe_id &&
5077                             ring->queue == queue_id)
5078                                 drm_sched_fault(&ring->sched);
5079                 }
5080                 break;
5081         }
5082 }
5083
5084 static int gfx_v9_0_priv_reg_irq(struct amdgpu_device *adev,
5085                                  struct amdgpu_irq_src *source,
5086                                  struct amdgpu_iv_entry *entry)
5087 {
5088         DRM_ERROR("Illegal register access in command stream\n");
5089         gfx_v9_0_fault(adev, entry);
5090         return 0;
5091 }
5092
5093 static int gfx_v9_0_priv_inst_irq(struct amdgpu_device *adev,
5094                                   struct amdgpu_irq_src *source,
5095                                   struct amdgpu_iv_entry *entry)
5096 {
5097         DRM_ERROR("Illegal instruction in command stream\n");
5098         gfx_v9_0_fault(adev, entry);
5099         return 0;
5100 }
5101
5102 static int gfx_v9_0_process_ras_data_cb(struct amdgpu_device *adev,
5103                 struct amdgpu_iv_entry *entry)
5104 {
5105         /* TODO ue will trigger an interrupt. */
5106         kgd2kfd_set_sram_ecc_flag(adev->kfd.dev);
5107         amdgpu_ras_reset_gpu(adev, 0);
5108         return AMDGPU_RAS_UE;
5109 }
5110
5111 static int gfx_v9_0_cp_ecc_error_irq(struct amdgpu_device *adev,
5112                                   struct amdgpu_irq_src *source,
5113                                   struct amdgpu_iv_entry *entry)
5114 {
5115         struct ras_common_if *ras_if = adev->gfx.ras_if;
5116         struct ras_dispatch_if ih_data = {
5117                 .entry = entry,
5118         };
5119
5120         if (!ras_if)
5121                 return 0;
5122
5123         ih_data.head = *ras_if;
5124
5125         DRM_ERROR("CP ECC ERROR IRQ\n");
5126         amdgpu_ras_interrupt_dispatch(adev, &ih_data);
5127         return 0;
5128 }
5129
5130 static const struct amd_ip_funcs gfx_v9_0_ip_funcs = {
5131         .name = "gfx_v9_0",
5132         .early_init = gfx_v9_0_early_init,
5133         .late_init = gfx_v9_0_late_init,
5134         .sw_init = gfx_v9_0_sw_init,
5135         .sw_fini = gfx_v9_0_sw_fini,
5136         .hw_init = gfx_v9_0_hw_init,
5137         .hw_fini = gfx_v9_0_hw_fini,
5138         .suspend = gfx_v9_0_suspend,
5139         .resume = gfx_v9_0_resume,
5140         .is_idle = gfx_v9_0_is_idle,
5141         .wait_for_idle = gfx_v9_0_wait_for_idle,
5142         .soft_reset = gfx_v9_0_soft_reset,
5143         .set_clockgating_state = gfx_v9_0_set_clockgating_state,
5144         .set_powergating_state = gfx_v9_0_set_powergating_state,
5145         .get_clockgating_state = gfx_v9_0_get_clockgating_state,
5146 };
5147
5148 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = {
5149         .type = AMDGPU_RING_TYPE_GFX,
5150         .align_mask = 0xff,
5151         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
5152         .support_64bit_ptrs = true,
5153         .vmhub = AMDGPU_GFXHUB,
5154         .get_rptr = gfx_v9_0_ring_get_rptr_gfx,
5155         .get_wptr = gfx_v9_0_ring_get_wptr_gfx,
5156         .set_wptr = gfx_v9_0_ring_set_wptr_gfx,
5157         .emit_frame_size = /* totally 242 maximum if 16 IBs */
5158                 5 +  /* COND_EXEC */
5159                 7 +  /* PIPELINE_SYNC */
5160                 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
5161                 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
5162                 2 + /* VM_FLUSH */
5163                 8 +  /* FENCE for VM_FLUSH */
5164                 20 + /* GDS switch */
5165                 4 + /* double SWITCH_BUFFER,
5166                        the first COND_EXEC jump to the place just
5167                            prior to this double SWITCH_BUFFER  */
5168                 5 + /* COND_EXEC */
5169                 7 +      /*     HDP_flush */
5170                 4 +      /*     VGT_flush */
5171                 14 + /* CE_META */
5172                 31 + /* DE_META */
5173                 3 + /* CNTX_CTRL */
5174                 5 + /* HDP_INVL */
5175                 8 + 8 + /* FENCE x2 */
5176                 2, /* SWITCH_BUFFER */
5177         .emit_ib_size = 4, /* gfx_v9_0_ring_emit_ib_gfx */
5178         .emit_ib = gfx_v9_0_ring_emit_ib_gfx,
5179         .emit_fence = gfx_v9_0_ring_emit_fence,
5180         .emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
5181         .emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
5182         .emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
5183         .emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
5184         .test_ring = gfx_v9_0_ring_test_ring,
5185         .test_ib = gfx_v9_0_ring_test_ib,
5186         .insert_nop = amdgpu_ring_insert_nop,
5187         .pad_ib = amdgpu_ring_generic_pad_ib,
5188         .emit_switch_buffer = gfx_v9_ring_emit_sb,
5189         .emit_cntxcntl = gfx_v9_ring_emit_cntxcntl,
5190         .init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec,
5191         .patch_cond_exec = gfx_v9_0_ring_emit_patch_cond_exec,
5192         .emit_tmz = gfx_v9_0_ring_emit_tmz,
5193         .emit_wreg = gfx_v9_0_ring_emit_wreg,
5194         .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
5195         .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
5196         .soft_recovery = gfx_v9_0_ring_soft_recovery,
5197 };
5198
5199 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = {
5200         .type = AMDGPU_RING_TYPE_COMPUTE,
5201         .align_mask = 0xff,
5202         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
5203         .support_64bit_ptrs = true,
5204         .vmhub = AMDGPU_GFXHUB,
5205         .get_rptr = gfx_v9_0_ring_get_rptr_compute,
5206         .get_wptr = gfx_v9_0_ring_get_wptr_compute,
5207         .set_wptr = gfx_v9_0_ring_set_wptr_compute,
5208         .emit_frame_size =
5209                 20 + /* gfx_v9_0_ring_emit_gds_switch */
5210                 7 + /* gfx_v9_0_ring_emit_hdp_flush */
5211                 5 + /* hdp invalidate */
5212                 7 + /* gfx_v9_0_ring_emit_pipeline_sync */
5213                 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
5214                 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
5215                 2 + /* gfx_v9_0_ring_emit_vm_flush */
5216                 8 + 8 + 8, /* gfx_v9_0_ring_emit_fence x3 for user fence, vm fence */
5217         .emit_ib_size = 7, /* gfx_v9_0_ring_emit_ib_compute */
5218         .emit_ib = gfx_v9_0_ring_emit_ib_compute,
5219         .emit_fence = gfx_v9_0_ring_emit_fence,
5220         .emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
5221         .emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
5222         .emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
5223         .emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
5224         .test_ring = gfx_v9_0_ring_test_ring,
5225         .test_ib = gfx_v9_0_ring_test_ib,
5226         .insert_nop = amdgpu_ring_insert_nop,
5227         .pad_ib = amdgpu_ring_generic_pad_ib,
5228         .set_priority = gfx_v9_0_ring_set_priority_compute,
5229         .emit_wreg = gfx_v9_0_ring_emit_wreg,
5230         .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
5231         .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
5232 };
5233
5234 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = {
5235         .type = AMDGPU_RING_TYPE_KIQ,
5236         .align_mask = 0xff,
5237         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
5238         .support_64bit_ptrs = true,
5239         .vmhub = AMDGPU_GFXHUB,
5240         .get_rptr = gfx_v9_0_ring_get_rptr_compute,
5241         .get_wptr = gfx_v9_0_ring_get_wptr_compute,
5242         .set_wptr = gfx_v9_0_ring_set_wptr_compute,
5243         .emit_frame_size =
5244                 20 + /* gfx_v9_0_ring_emit_gds_switch */
5245                 7 + /* gfx_v9_0_ring_emit_hdp_flush */
5246                 5 + /* hdp invalidate */
5247                 7 + /* gfx_v9_0_ring_emit_pipeline_sync */
5248                 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
5249                 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
5250                 2 + /* gfx_v9_0_ring_emit_vm_flush */
5251                 8 + 8 + 8, /* gfx_v9_0_ring_emit_fence_kiq x3 for user fence, vm fence */
5252         .emit_ib_size = 7, /* gfx_v9_0_ring_emit_ib_compute */
5253         .emit_fence = gfx_v9_0_ring_emit_fence_kiq,
5254         .test_ring = gfx_v9_0_ring_test_ring,
5255         .insert_nop = amdgpu_ring_insert_nop,
5256         .pad_ib = amdgpu_ring_generic_pad_ib,
5257         .emit_rreg = gfx_v9_0_ring_emit_rreg,
5258         .emit_wreg = gfx_v9_0_ring_emit_wreg,
5259         .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
5260         .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
5261 };
5262
5263 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev)
5264 {
5265         int i;
5266
5267         adev->gfx.kiq.ring.funcs = &gfx_v9_0_ring_funcs_kiq;
5268
5269         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
5270                 adev->gfx.gfx_ring[i].funcs = &gfx_v9_0_ring_funcs_gfx;
5271
5272         for (i = 0; i < adev->gfx.num_compute_rings; i++)
5273                 adev->gfx.compute_ring[i].funcs = &gfx_v9_0_ring_funcs_compute;
5274 }
5275
5276 static const struct amdgpu_irq_src_funcs gfx_v9_0_eop_irq_funcs = {
5277         .set = gfx_v9_0_set_eop_interrupt_state,
5278         .process = gfx_v9_0_eop_irq,
5279 };
5280
5281 static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_reg_irq_funcs = {
5282         .set = gfx_v9_0_set_priv_reg_fault_state,
5283         .process = gfx_v9_0_priv_reg_irq,
5284 };
5285
5286 static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_inst_irq_funcs = {
5287         .set = gfx_v9_0_set_priv_inst_fault_state,
5288         .process = gfx_v9_0_priv_inst_irq,
5289 };
5290
5291 static const struct amdgpu_irq_src_funcs gfx_v9_0_cp_ecc_error_irq_funcs = {
5292         .set = gfx_v9_0_set_cp_ecc_error_state,
5293         .process = gfx_v9_0_cp_ecc_error_irq,
5294 };
5295
5296
5297 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev)
5298 {
5299         adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
5300         adev->gfx.eop_irq.funcs = &gfx_v9_0_eop_irq_funcs;
5301
5302         adev->gfx.priv_reg_irq.num_types = 1;
5303         adev->gfx.priv_reg_irq.funcs = &gfx_v9_0_priv_reg_irq_funcs;
5304
5305         adev->gfx.priv_inst_irq.num_types = 1;
5306         adev->gfx.priv_inst_irq.funcs = &gfx_v9_0_priv_inst_irq_funcs;
5307
5308         adev->gfx.cp_ecc_error_irq.num_types = 2; /*C5 ECC error and C9 FUE error*/
5309         adev->gfx.cp_ecc_error_irq.funcs = &gfx_v9_0_cp_ecc_error_irq_funcs;
5310 }
5311
5312 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev)
5313 {
5314         switch (adev->asic_type) {
5315         case CHIP_VEGA10:
5316         case CHIP_VEGA12:
5317         case CHIP_VEGA20:
5318         case CHIP_RAVEN:
5319                 adev->gfx.rlc.funcs = &gfx_v9_0_rlc_funcs;
5320                 break;
5321         default:
5322                 break;
5323         }
5324 }
5325
5326 static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev)
5327 {
5328         /* init asci gds info */
5329         switch (adev->asic_type) {
5330         case CHIP_VEGA10:
5331         case CHIP_VEGA12:
5332         case CHIP_VEGA20:
5333                 adev->gds.gds_size = 0x10000;
5334                 break;
5335         case CHIP_RAVEN:
5336                 adev->gds.gds_size = 0x1000;
5337                 break;
5338         default:
5339                 adev->gds.gds_size = 0x10000;
5340                 break;
5341         }
5342
5343         switch (adev->asic_type) {
5344         case CHIP_VEGA10:
5345         case CHIP_VEGA20:
5346                 adev->gds.gds_compute_max_wave_id = 0x7ff;
5347                 break;
5348         case CHIP_VEGA12:
5349                 adev->gds.gds_compute_max_wave_id = 0x27f;
5350                 break;
5351         case CHIP_RAVEN:
5352                 if (adev->rev_id >= 0x8)
5353                         adev->gds.gds_compute_max_wave_id = 0x77; /* raven2 */
5354                 else
5355                         adev->gds.gds_compute_max_wave_id = 0x15f; /* raven1 */
5356                 break;
5357         default:
5358                 /* this really depends on the chip */
5359                 adev->gds.gds_compute_max_wave_id = 0x7ff;
5360                 break;
5361         }
5362
5363         adev->gds.gws_size = 64;
5364         adev->gds.oa_size = 16;
5365 }
5366
5367 static void gfx_v9_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
5368                                                  u32 bitmap)
5369 {
5370         u32 data;
5371
5372         if (!bitmap)
5373                 return;
5374
5375         data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
5376         data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
5377
5378         WREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG, data);
5379 }
5380
5381 static u32 gfx_v9_0_get_cu_active_bitmap(struct amdgpu_device *adev)
5382 {
5383         u32 data, mask;
5384
5385         data = RREG32_SOC15(GC, 0, mmCC_GC_SHADER_ARRAY_CONFIG);
5386         data |= RREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG);
5387
5388         data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
5389         data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
5390
5391         mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh);
5392
5393         return (~data) & mask;
5394 }
5395
5396 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
5397                                  struct amdgpu_cu_info *cu_info)
5398 {
5399         int i, j, k, counter, active_cu_number = 0;
5400         u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
5401         unsigned disable_masks[4 * 2];
5402
5403         if (!adev || !cu_info)
5404                 return -EINVAL;
5405
5406         amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2);
5407
5408         mutex_lock(&adev->grbm_idx_mutex);
5409         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
5410                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
5411                         mask = 1;
5412                         ao_bitmap = 0;
5413                         counter = 0;
5414                         gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
5415                         if (i < 4 && j < 2)
5416                                 gfx_v9_0_set_user_cu_inactive_bitmap(
5417                                         adev, disable_masks[i * 2 + j]);
5418                         bitmap = gfx_v9_0_get_cu_active_bitmap(adev);
5419                         cu_info->bitmap[i][j] = bitmap;
5420
5421                         for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
5422                                 if (bitmap & mask) {
5423                                         if (counter < adev->gfx.config.max_cu_per_sh)
5424                                                 ao_bitmap |= mask;
5425                                         counter ++;
5426                                 }
5427                                 mask <<= 1;
5428                         }
5429                         active_cu_number += counter;
5430                         if (i < 2 && j < 2)
5431                                 ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
5432                         cu_info->ao_cu_bitmap[i][j] = ao_bitmap;
5433                 }
5434         }
5435         gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
5436         mutex_unlock(&adev->grbm_idx_mutex);
5437
5438         cu_info->number = active_cu_number;
5439         cu_info->ao_cu_mask = ao_cu_mask;
5440         cu_info->simd_per_cu = NUM_SIMD_PER_CU;
5441
5442         return 0;
5443 }
5444
5445 const struct amdgpu_ip_block_version gfx_v9_0_ip_block =
5446 {
5447         .type = AMD_IP_BLOCK_TYPE_GFX,
5448         .major = 9,
5449         .minor = 0,
5450         .rev = 0,
5451         .funcs = &gfx_v9_0_ip_funcs,
5452 };
This page took 0.36358 seconds and 4 git commands to generate.