]> Git Repo - linux.git/blob - drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
drm/amdgpu: rename amdgpu_gfx_compute_mqd_sw_init
[linux.git] / drivers / gpu / drm / amd / amdgpu / gfx_v9_0.c
1 /*
2  * Copyright 2016 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23 #include <linux/kernel.h>
24 #include <linux/firmware.h>
25 #include <drm/drmP.h>
26 #include "amdgpu.h"
27 #include "amdgpu_gfx.h"
28 #include "soc15.h"
29 #include "soc15d.h"
30 #include "amdgpu_atomfirmware.h"
31 #include "amdgpu_pm.h"
32
33 #include "gc/gc_9_0_offset.h"
34 #include "gc/gc_9_0_sh_mask.h"
35 #include "vega10_enum.h"
36 #include "hdp/hdp_4_0_offset.h"
37
38 #include "soc15.h"
39 #include "soc15_common.h"
40 #include "clearstate_gfx9.h"
41 #include "v9_structs.h"
42
43 #include "ivsrcid/gfx/irqsrcs_gfx_9_0.h"
44
45 #include "amdgpu_ras.h"
46
47 #define GFX9_NUM_GFX_RINGS     1
48 #define GFX9_MEC_HPD_SIZE 4096
49 #define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L
50 #define RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET 0x00000000L
51
52 #define mmPWR_MISC_CNTL_STATUS                                  0x0183
53 #define mmPWR_MISC_CNTL_STATUS_BASE_IDX                         0
54 #define PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN__SHIFT        0x0
55 #define PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT          0x1
56 #define PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK          0x00000001L
57 #define PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK            0x00000006L
58
59 MODULE_FIRMWARE("amdgpu/vega10_ce.bin");
60 MODULE_FIRMWARE("amdgpu/vega10_pfp.bin");
61 MODULE_FIRMWARE("amdgpu/vega10_me.bin");
62 MODULE_FIRMWARE("amdgpu/vega10_mec.bin");
63 MODULE_FIRMWARE("amdgpu/vega10_mec2.bin");
64 MODULE_FIRMWARE("amdgpu/vega10_rlc.bin");
65
66 MODULE_FIRMWARE("amdgpu/vega12_ce.bin");
67 MODULE_FIRMWARE("amdgpu/vega12_pfp.bin");
68 MODULE_FIRMWARE("amdgpu/vega12_me.bin");
69 MODULE_FIRMWARE("amdgpu/vega12_mec.bin");
70 MODULE_FIRMWARE("amdgpu/vega12_mec2.bin");
71 MODULE_FIRMWARE("amdgpu/vega12_rlc.bin");
72
73 MODULE_FIRMWARE("amdgpu/vega20_ce.bin");
74 MODULE_FIRMWARE("amdgpu/vega20_pfp.bin");
75 MODULE_FIRMWARE("amdgpu/vega20_me.bin");
76 MODULE_FIRMWARE("amdgpu/vega20_mec.bin");
77 MODULE_FIRMWARE("amdgpu/vega20_mec2.bin");
78 MODULE_FIRMWARE("amdgpu/vega20_rlc.bin");
79
80 MODULE_FIRMWARE("amdgpu/raven_ce.bin");
81 MODULE_FIRMWARE("amdgpu/raven_pfp.bin");
82 MODULE_FIRMWARE("amdgpu/raven_me.bin");
83 MODULE_FIRMWARE("amdgpu/raven_mec.bin");
84 MODULE_FIRMWARE("amdgpu/raven_mec2.bin");
85 MODULE_FIRMWARE("amdgpu/raven_rlc.bin");
86
87 MODULE_FIRMWARE("amdgpu/picasso_ce.bin");
88 MODULE_FIRMWARE("amdgpu/picasso_pfp.bin");
89 MODULE_FIRMWARE("amdgpu/picasso_me.bin");
90 MODULE_FIRMWARE("amdgpu/picasso_mec.bin");
91 MODULE_FIRMWARE("amdgpu/picasso_mec2.bin");
92 MODULE_FIRMWARE("amdgpu/picasso_rlc.bin");
93 MODULE_FIRMWARE("amdgpu/picasso_rlc_am4.bin");
94
95 MODULE_FIRMWARE("amdgpu/raven2_ce.bin");
96 MODULE_FIRMWARE("amdgpu/raven2_pfp.bin");
97 MODULE_FIRMWARE("amdgpu/raven2_me.bin");
98 MODULE_FIRMWARE("amdgpu/raven2_mec.bin");
99 MODULE_FIRMWARE("amdgpu/raven2_mec2.bin");
100 MODULE_FIRMWARE("amdgpu/raven2_rlc.bin");
101 MODULE_FIRMWARE("amdgpu/raven_kicker_rlc.bin");
102
103 static const struct soc15_reg_golden golden_settings_gc_9_0[] =
104 {
105         SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000400),
106         SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0x80000000, 0x80000000),
107         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
108         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
109         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
110         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
111         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000),
112         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800),
113         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800),
114         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x0000ff87),
115         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x0000ff8f),
116         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000),
117         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
118         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68),
119         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197),
120         SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
121         SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff),
122         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000000, 0x00000800),
123         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000000, 0x00000800),
124         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00000000, 0x00008000)
125 };
126
127 static const struct soc15_reg_golden golden_settings_gc_9_0_vg10[] =
128 {
129         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0x0000f000, 0x00012107),
130         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
131         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080),
132         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080),
133         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080),
134         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042),
135         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x2a114042),
136         SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080),
137         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0x00008000, 0x00048000),
138         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080),
139         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080),
140         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080),
141         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080),
142         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080),
143         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000),
144         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x01000107),
145         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x00001800, 0x00000800),
146         SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080)
147 };
148
149 static const struct soc15_reg_golden golden_settings_gc_9_0_vg20[] =
150 {
151         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x0f000080, 0x04000080),
152         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000),
153         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
154         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xf3e777ff, 0x22014042),
155         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xf3e777ff, 0x22014042),
156         SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0x00003e00, 0x00000400),
157         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xff840000, 0x04040000),
158         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00030000),
159         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff010f, 0x01000107),
160         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0x000b0000, 0x000b0000),
161         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01000000, 0x01000000)
162 };
163
164 static const struct soc15_reg_golden golden_settings_gc_9_1[] =
165 {
166         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
167         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080),
168         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080),
169         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080),
170         SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420),
171         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
172         SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080),
173         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
174         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
175         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
176         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080),
177         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080),
178         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080),
179         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080),
180         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080),
181         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
182         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
183         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003120),
184         SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
185         SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000000ff),
186         SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080),
187         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000000, 0x00000800),
188         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000000, 0x00000800),
189         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00000000, 0x00008000)
190 };
191
192 static const struct soc15_reg_golden golden_settings_gc_9_1_rv1[] =
193 {
194         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
195         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24000042),
196         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24000042),
197         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04048000),
198         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_MODE_CNTL_1, 0x06000000, 0x06000000),
199         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000),
200         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x00000800)
201 };
202
203 static const struct soc15_reg_golden golden_settings_gc_9_1_rv2[] =
204 {
205         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0xff7fffff, 0x04000000),
206         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
207         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0xff7fffff, 0x0a000000),
208         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x7f0fffff, 0x08000080),
209         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0xff8fffff, 0x08000080),
210         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x7f8fffff, 0x08000080),
211         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x26013041),
212         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x26013041),
213         SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x3f8fffff, 0x08000080),
214         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
215         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0xff0fffff, 0x08000080),
216         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0xff0fffff, 0x08000080),
217         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0xff0fffff, 0x08000080),
218         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0xff0fffff, 0x08000080),
219         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0xff0fffff, 0x08000080),
220         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
221         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010),
222         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000),
223         SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x3f8fffff, 0x08000080),
224 };
225
226 static const struct soc15_reg_golden golden_settings_gc_9_x_common[] =
227 {
228         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_SD_CNTL, 0xffffffff, 0x000001ff),
229         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_INDEX, 0xffffffff, 0x00000000),
230         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_DATA, 0xffffffff, 0x2544c382)
231 };
232
233 static const struct soc15_reg_golden golden_settings_gc_9_2_1[] =
234 {
235         SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420),
236         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
237         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
238         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
239         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
240         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000),
241         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800),
242         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800),
243         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x0000ff87),
244         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x0000ff8f),
245         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000),
246         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
247         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68),
248         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197),
249         SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
250         SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff)
251 };
252
253 static const struct soc15_reg_golden golden_settings_gc_9_2_1_vg12[] =
254 {
255         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x00000080, 0x04000080),
256         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
257         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000),
258         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24104041),
259         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24104041),
260         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
261         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff03ff, 0x01000107),
262         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
263         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x76325410),
264         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000),
265         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000000, 0x00000800),
266         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000000, 0x00000800),
267         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00000000, 0x00008000)
268 };
269
270 static const u32 GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[] =
271 {
272         mmRLC_SRM_INDEX_CNTL_ADDR_0 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
273         mmRLC_SRM_INDEX_CNTL_ADDR_1 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
274         mmRLC_SRM_INDEX_CNTL_ADDR_2 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
275         mmRLC_SRM_INDEX_CNTL_ADDR_3 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
276         mmRLC_SRM_INDEX_CNTL_ADDR_4 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
277         mmRLC_SRM_INDEX_CNTL_ADDR_5 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
278         mmRLC_SRM_INDEX_CNTL_ADDR_6 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
279         mmRLC_SRM_INDEX_CNTL_ADDR_7 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
280 };
281
282 static const u32 GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[] =
283 {
284         mmRLC_SRM_INDEX_CNTL_DATA_0 - mmRLC_SRM_INDEX_CNTL_DATA_0,
285         mmRLC_SRM_INDEX_CNTL_DATA_1 - mmRLC_SRM_INDEX_CNTL_DATA_0,
286         mmRLC_SRM_INDEX_CNTL_DATA_2 - mmRLC_SRM_INDEX_CNTL_DATA_0,
287         mmRLC_SRM_INDEX_CNTL_DATA_3 - mmRLC_SRM_INDEX_CNTL_DATA_0,
288         mmRLC_SRM_INDEX_CNTL_DATA_4 - mmRLC_SRM_INDEX_CNTL_DATA_0,
289         mmRLC_SRM_INDEX_CNTL_DATA_5 - mmRLC_SRM_INDEX_CNTL_DATA_0,
290         mmRLC_SRM_INDEX_CNTL_DATA_6 - mmRLC_SRM_INDEX_CNTL_DATA_0,
291         mmRLC_SRM_INDEX_CNTL_DATA_7 - mmRLC_SRM_INDEX_CNTL_DATA_0,
292 };
293
294 #define VEGA10_GB_ADDR_CONFIG_GOLDEN 0x2a114042
295 #define VEGA12_GB_ADDR_CONFIG_GOLDEN 0x24104041
296 #define RAVEN_GB_ADDR_CONFIG_GOLDEN 0x24000042
297 #define RAVEN2_GB_ADDR_CONFIG_GOLDEN 0x26013041
298
299 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev);
300 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev);
301 static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev);
302 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev);
303 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
304                                  struct amdgpu_cu_info *cu_info);
305 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev);
306 static void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num, u32 instance);
307 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring);
308 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring);
309
310 static void gfx_v9_0_init_golden_registers(struct amdgpu_device *adev)
311 {
312         switch (adev->asic_type) {
313         case CHIP_VEGA10:
314                 if (!amdgpu_virt_support_skip_setting(adev)) {
315                         soc15_program_register_sequence(adev,
316                                                          golden_settings_gc_9_0,
317                                                          ARRAY_SIZE(golden_settings_gc_9_0));
318                         soc15_program_register_sequence(adev,
319                                                          golden_settings_gc_9_0_vg10,
320                                                          ARRAY_SIZE(golden_settings_gc_9_0_vg10));
321                 }
322                 break;
323         case CHIP_VEGA12:
324                 soc15_program_register_sequence(adev,
325                                                 golden_settings_gc_9_2_1,
326                                                 ARRAY_SIZE(golden_settings_gc_9_2_1));
327                 soc15_program_register_sequence(adev,
328                                                 golden_settings_gc_9_2_1_vg12,
329                                                 ARRAY_SIZE(golden_settings_gc_9_2_1_vg12));
330                 break;
331         case CHIP_VEGA20:
332                 soc15_program_register_sequence(adev,
333                                                 golden_settings_gc_9_0,
334                                                 ARRAY_SIZE(golden_settings_gc_9_0));
335                 soc15_program_register_sequence(adev,
336                                                 golden_settings_gc_9_0_vg20,
337                                                 ARRAY_SIZE(golden_settings_gc_9_0_vg20));
338                 break;
339         case CHIP_RAVEN:
340                 soc15_program_register_sequence(adev, golden_settings_gc_9_1,
341                                                 ARRAY_SIZE(golden_settings_gc_9_1));
342                 if (adev->rev_id >= 8)
343                         soc15_program_register_sequence(adev,
344                                                         golden_settings_gc_9_1_rv2,
345                                                         ARRAY_SIZE(golden_settings_gc_9_1_rv2));
346                 else
347                         soc15_program_register_sequence(adev,
348                                                         golden_settings_gc_9_1_rv1,
349                                                         ARRAY_SIZE(golden_settings_gc_9_1_rv1));
350                 break;
351         default:
352                 break;
353         }
354
355         soc15_program_register_sequence(adev, golden_settings_gc_9_x_common,
356                                         (const u32)ARRAY_SIZE(golden_settings_gc_9_x_common));
357 }
358
359 static void gfx_v9_0_scratch_init(struct amdgpu_device *adev)
360 {
361         adev->gfx.scratch.num_reg = 8;
362         adev->gfx.scratch.reg_base = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0);
363         adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1;
364 }
365
366 static void gfx_v9_0_write_data_to_reg(struct amdgpu_ring *ring, int eng_sel,
367                                        bool wc, uint32_t reg, uint32_t val)
368 {
369         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
370         amdgpu_ring_write(ring, WRITE_DATA_ENGINE_SEL(eng_sel) |
371                                 WRITE_DATA_DST_SEL(0) |
372                                 (wc ? WR_CONFIRM : 0));
373         amdgpu_ring_write(ring, reg);
374         amdgpu_ring_write(ring, 0);
375         amdgpu_ring_write(ring, val);
376 }
377
378 static void gfx_v9_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel,
379                                   int mem_space, int opt, uint32_t addr0,
380                                   uint32_t addr1, uint32_t ref, uint32_t mask,
381                                   uint32_t inv)
382 {
383         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
384         amdgpu_ring_write(ring,
385                                  /* memory (1) or register (0) */
386                                  (WAIT_REG_MEM_MEM_SPACE(mem_space) |
387                                  WAIT_REG_MEM_OPERATION(opt) | /* wait */
388                                  WAIT_REG_MEM_FUNCTION(3) |  /* equal */
389                                  WAIT_REG_MEM_ENGINE(eng_sel)));
390
391         if (mem_space)
392                 BUG_ON(addr0 & 0x3); /* Dword align */
393         amdgpu_ring_write(ring, addr0);
394         amdgpu_ring_write(ring, addr1);
395         amdgpu_ring_write(ring, ref);
396         amdgpu_ring_write(ring, mask);
397         amdgpu_ring_write(ring, inv); /* poll interval */
398 }
399
400 static int gfx_v9_0_ring_test_ring(struct amdgpu_ring *ring)
401 {
402         struct amdgpu_device *adev = ring->adev;
403         uint32_t scratch;
404         uint32_t tmp = 0;
405         unsigned i;
406         int r;
407
408         r = amdgpu_gfx_scratch_get(adev, &scratch);
409         if (r)
410                 return r;
411
412         WREG32(scratch, 0xCAFEDEAD);
413         r = amdgpu_ring_alloc(ring, 3);
414         if (r)
415                 goto error_free_scratch;
416
417         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
418         amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
419         amdgpu_ring_write(ring, 0xDEADBEEF);
420         amdgpu_ring_commit(ring);
421
422         for (i = 0; i < adev->usec_timeout; i++) {
423                 tmp = RREG32(scratch);
424                 if (tmp == 0xDEADBEEF)
425                         break;
426                 DRM_UDELAY(1);
427         }
428
429         if (i >= adev->usec_timeout)
430                 r = -ETIMEDOUT;
431
432 error_free_scratch:
433         amdgpu_gfx_scratch_free(adev, scratch);
434         return r;
435 }
436
437 static int gfx_v9_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
438 {
439         struct amdgpu_device *adev = ring->adev;
440         struct amdgpu_ib ib;
441         struct dma_fence *f = NULL;
442
443         unsigned index;
444         uint64_t gpu_addr;
445         uint32_t tmp;
446         long r;
447
448         r = amdgpu_device_wb_get(adev, &index);
449         if (r)
450                 return r;
451
452         gpu_addr = adev->wb.gpu_addr + (index * 4);
453         adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
454         memset(&ib, 0, sizeof(ib));
455         r = amdgpu_ib_get(adev, NULL, 16, &ib);
456         if (r)
457                 goto err1;
458
459         ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3);
460         ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
461         ib.ptr[2] = lower_32_bits(gpu_addr);
462         ib.ptr[3] = upper_32_bits(gpu_addr);
463         ib.ptr[4] = 0xDEADBEEF;
464         ib.length_dw = 5;
465
466         r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
467         if (r)
468                 goto err2;
469
470         r = dma_fence_wait_timeout(f, false, timeout);
471         if (r == 0) {
472                 r = -ETIMEDOUT;
473                 goto err2;
474         } else if (r < 0) {
475                 goto err2;
476         }
477
478         tmp = adev->wb.wb[index];
479         if (tmp == 0xDEADBEEF)
480                 r = 0;
481         else
482                 r = -EINVAL;
483
484 err2:
485         amdgpu_ib_free(adev, &ib, NULL);
486         dma_fence_put(f);
487 err1:
488         amdgpu_device_wb_free(adev, index);
489         return r;
490 }
491
492
493 static void gfx_v9_0_free_microcode(struct amdgpu_device *adev)
494 {
495         release_firmware(adev->gfx.pfp_fw);
496         adev->gfx.pfp_fw = NULL;
497         release_firmware(adev->gfx.me_fw);
498         adev->gfx.me_fw = NULL;
499         release_firmware(adev->gfx.ce_fw);
500         adev->gfx.ce_fw = NULL;
501         release_firmware(adev->gfx.rlc_fw);
502         adev->gfx.rlc_fw = NULL;
503         release_firmware(adev->gfx.mec_fw);
504         adev->gfx.mec_fw = NULL;
505         release_firmware(adev->gfx.mec2_fw);
506         adev->gfx.mec2_fw = NULL;
507
508         kfree(adev->gfx.rlc.register_list_format);
509 }
510
511 static void gfx_v9_0_init_rlc_ext_microcode(struct amdgpu_device *adev)
512 {
513         const struct rlc_firmware_header_v2_1 *rlc_hdr;
514
515         rlc_hdr = (const struct rlc_firmware_header_v2_1 *)adev->gfx.rlc_fw->data;
516         adev->gfx.rlc_srlc_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_ucode_ver);
517         adev->gfx.rlc_srlc_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_feature_ver);
518         adev->gfx.rlc.save_restore_list_cntl_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_cntl_size_bytes);
519         adev->gfx.rlc.save_restore_list_cntl = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_cntl_offset_bytes);
520         adev->gfx.rlc_srlg_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_ucode_ver);
521         adev->gfx.rlc_srlg_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_feature_ver);
522         adev->gfx.rlc.save_restore_list_gpm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_gpm_size_bytes);
523         adev->gfx.rlc.save_restore_list_gpm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_gpm_offset_bytes);
524         adev->gfx.rlc_srls_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_ucode_ver);
525         adev->gfx.rlc_srls_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_feature_ver);
526         adev->gfx.rlc.save_restore_list_srm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_srm_size_bytes);
527         adev->gfx.rlc.save_restore_list_srm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_srm_offset_bytes);
528         adev->gfx.rlc.reg_list_format_direct_reg_list_length =
529                         le32_to_cpu(rlc_hdr->reg_list_format_direct_reg_list_length);
530 }
531
532 static void gfx_v9_0_check_fw_write_wait(struct amdgpu_device *adev)
533 {
534         adev->gfx.me_fw_write_wait = false;
535         adev->gfx.mec_fw_write_wait = false;
536
537         switch (adev->asic_type) {
538         case CHIP_VEGA10:
539                 if ((adev->gfx.me_fw_version >= 0x0000009c) &&
540                     (adev->gfx.me_feature_version >= 42) &&
541                     (adev->gfx.pfp_fw_version >=  0x000000b1) &&
542                     (adev->gfx.pfp_feature_version >= 42))
543                         adev->gfx.me_fw_write_wait = true;
544
545                 if ((adev->gfx.mec_fw_version >=  0x00000193) &&
546                     (adev->gfx.mec_feature_version >= 42))
547                         adev->gfx.mec_fw_write_wait = true;
548                 break;
549         case CHIP_VEGA12:
550                 if ((adev->gfx.me_fw_version >= 0x0000009c) &&
551                     (adev->gfx.me_feature_version >= 44) &&
552                     (adev->gfx.pfp_fw_version >=  0x000000b2) &&
553                     (adev->gfx.pfp_feature_version >= 44))
554                         adev->gfx.me_fw_write_wait = true;
555
556                 if ((adev->gfx.mec_fw_version >=  0x00000196) &&
557                     (adev->gfx.mec_feature_version >= 44))
558                         adev->gfx.mec_fw_write_wait = true;
559                 break;
560         case CHIP_VEGA20:
561                 if ((adev->gfx.me_fw_version >= 0x0000009c) &&
562                     (adev->gfx.me_feature_version >= 44) &&
563                     (adev->gfx.pfp_fw_version >=  0x000000b2) &&
564                     (adev->gfx.pfp_feature_version >= 44))
565                         adev->gfx.me_fw_write_wait = true;
566
567                 if ((adev->gfx.mec_fw_version >=  0x00000197) &&
568                     (adev->gfx.mec_feature_version >= 44))
569                         adev->gfx.mec_fw_write_wait = true;
570                 break;
571         case CHIP_RAVEN:
572                 if ((adev->gfx.me_fw_version >= 0x0000009c) &&
573                     (adev->gfx.me_feature_version >= 42) &&
574                     (adev->gfx.pfp_fw_version >=  0x000000b1) &&
575                     (adev->gfx.pfp_feature_version >= 42))
576                         adev->gfx.me_fw_write_wait = true;
577
578                 if ((adev->gfx.mec_fw_version >=  0x00000192) &&
579                     (adev->gfx.mec_feature_version >= 42))
580                         adev->gfx.mec_fw_write_wait = true;
581                 break;
582         default:
583                 break;
584         }
585 }
586
587 static void gfx_v9_0_check_if_need_gfxoff(struct amdgpu_device *adev)
588 {
589         switch (adev->asic_type) {
590         case CHIP_VEGA10:
591         case CHIP_VEGA12:
592         case CHIP_VEGA20:
593                 break;
594         case CHIP_RAVEN:
595                 if (adev->rev_id >= 0x8 || adev->pdev->device == 0x15d8)
596                         break;
597                 if ((adev->gfx.rlc_fw_version != 106 &&
598                      adev->gfx.rlc_fw_version < 531) ||
599                     (adev->gfx.rlc_fw_version == 53815) ||
600                     (adev->gfx.rlc_feature_version < 1) ||
601                     !adev->gfx.rlc.is_rlc_v2_1)
602                         adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
603                 break;
604         default:
605                 break;
606         }
607 }
608
609 static int gfx_v9_0_init_microcode(struct amdgpu_device *adev)
610 {
611         const char *chip_name;
612         char fw_name[30];
613         int err;
614         struct amdgpu_firmware_info *info = NULL;
615         const struct common_firmware_header *header = NULL;
616         const struct gfx_firmware_header_v1_0 *cp_hdr;
617         const struct rlc_firmware_header_v2_0 *rlc_hdr;
618         unsigned int *tmp = NULL;
619         unsigned int i = 0;
620         uint16_t version_major;
621         uint16_t version_minor;
622         uint32_t smu_version;
623
624         DRM_DEBUG("\n");
625
626         switch (adev->asic_type) {
627         case CHIP_VEGA10:
628                 chip_name = "vega10";
629                 break;
630         case CHIP_VEGA12:
631                 chip_name = "vega12";
632                 break;
633         case CHIP_VEGA20:
634                 chip_name = "vega20";
635                 break;
636         case CHIP_RAVEN:
637                 if (adev->rev_id >= 8)
638                         chip_name = "raven2";
639                 else if (adev->pdev->device == 0x15d8)
640                         chip_name = "picasso";
641                 else
642                         chip_name = "raven";
643                 break;
644         default:
645                 BUG();
646         }
647
648         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
649         err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
650         if (err)
651                 goto out;
652         err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
653         if (err)
654                 goto out;
655         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
656         adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
657         adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
658
659         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
660         err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
661         if (err)
662                 goto out;
663         err = amdgpu_ucode_validate(adev->gfx.me_fw);
664         if (err)
665                 goto out;
666         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
667         adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
668         adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
669
670         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
671         err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
672         if (err)
673                 goto out;
674         err = amdgpu_ucode_validate(adev->gfx.ce_fw);
675         if (err)
676                 goto out;
677         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
678         adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
679         adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
680
681         /*
682          * For Picasso && AM4 SOCKET board, we use picasso_rlc_am4.bin
683          * instead of picasso_rlc.bin.
684          * Judgment method:
685          * PCO AM4: revision >= 0xC8 && revision <= 0xCF
686          *          or revision >= 0xD8 && revision <= 0xDF
687          * otherwise is PCO FP5
688          */
689         if (!strcmp(chip_name, "picasso") &&
690                 (((adev->pdev->revision >= 0xC8) && (adev->pdev->revision <= 0xCF)) ||
691                 ((adev->pdev->revision >= 0xD8) && (adev->pdev->revision <= 0xDF))))
692                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc_am4.bin", chip_name);
693         else if (!strcmp(chip_name, "raven") && (amdgpu_pm_load_smu_firmware(adev, &smu_version) == 0) &&
694                 (smu_version >= 0x41e2b))
695                 /**
696                 *SMC is loaded by SBIOS on APU and it's able to get the SMU version directly.
697                 */
698                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_kicker_rlc.bin", chip_name);
699         else
700                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
701         err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
702         if (err)
703                 goto out;
704         err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
705         rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
706
707         version_major = le16_to_cpu(rlc_hdr->header.header_version_major);
708         version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor);
709         if (version_major == 2 && version_minor == 1)
710                 adev->gfx.rlc.is_rlc_v2_1 = true;
711
712         adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
713         adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
714         adev->gfx.rlc.save_and_restore_offset =
715                         le32_to_cpu(rlc_hdr->save_and_restore_offset);
716         adev->gfx.rlc.clear_state_descriptor_offset =
717                         le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
718         adev->gfx.rlc.avail_scratch_ram_locations =
719                         le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
720         adev->gfx.rlc.reg_restore_list_size =
721                         le32_to_cpu(rlc_hdr->reg_restore_list_size);
722         adev->gfx.rlc.reg_list_format_start =
723                         le32_to_cpu(rlc_hdr->reg_list_format_start);
724         adev->gfx.rlc.reg_list_format_separate_start =
725                         le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
726         adev->gfx.rlc.starting_offsets_start =
727                         le32_to_cpu(rlc_hdr->starting_offsets_start);
728         adev->gfx.rlc.reg_list_format_size_bytes =
729                         le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
730         adev->gfx.rlc.reg_list_size_bytes =
731                         le32_to_cpu(rlc_hdr->reg_list_size_bytes);
732         adev->gfx.rlc.register_list_format =
733                         kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
734                                 adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
735         if (!adev->gfx.rlc.register_list_format) {
736                 err = -ENOMEM;
737                 goto out;
738         }
739
740         tmp = (unsigned int *)((uintptr_t)rlc_hdr +
741                         le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
742         for (i = 0 ; i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2); i++)
743                 adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]);
744
745         adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
746
747         tmp = (unsigned int *)((uintptr_t)rlc_hdr +
748                         le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
749         for (i = 0 ; i < (adev->gfx.rlc.reg_list_size_bytes >> 2); i++)
750                 adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
751
752         if (adev->gfx.rlc.is_rlc_v2_1)
753                 gfx_v9_0_init_rlc_ext_microcode(adev);
754
755         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
756         err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
757         if (err)
758                 goto out;
759         err = amdgpu_ucode_validate(adev->gfx.mec_fw);
760         if (err)
761                 goto out;
762         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
763         adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
764         adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
765
766
767         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
768         err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
769         if (!err) {
770                 err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
771                 if (err)
772                         goto out;
773                 cp_hdr = (const struct gfx_firmware_header_v1_0 *)
774                 adev->gfx.mec2_fw->data;
775                 adev->gfx.mec2_fw_version =
776                 le32_to_cpu(cp_hdr->header.ucode_version);
777                 adev->gfx.mec2_feature_version =
778                 le32_to_cpu(cp_hdr->ucode_feature_version);
779         } else {
780                 err = 0;
781                 adev->gfx.mec2_fw = NULL;
782         }
783
784         if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
785                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
786                 info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
787                 info->fw = adev->gfx.pfp_fw;
788                 header = (const struct common_firmware_header *)info->fw->data;
789                 adev->firmware.fw_size +=
790                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
791
792                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
793                 info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
794                 info->fw = adev->gfx.me_fw;
795                 header = (const struct common_firmware_header *)info->fw->data;
796                 adev->firmware.fw_size +=
797                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
798
799                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
800                 info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
801                 info->fw = adev->gfx.ce_fw;
802                 header = (const struct common_firmware_header *)info->fw->data;
803                 adev->firmware.fw_size +=
804                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
805
806                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
807                 info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
808                 info->fw = adev->gfx.rlc_fw;
809                 header = (const struct common_firmware_header *)info->fw->data;
810                 adev->firmware.fw_size +=
811                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
812
813                 if (adev->gfx.rlc.is_rlc_v2_1 &&
814                     adev->gfx.rlc.save_restore_list_cntl_size_bytes &&
815                     adev->gfx.rlc.save_restore_list_gpm_size_bytes &&
816                     adev->gfx.rlc.save_restore_list_srm_size_bytes) {
817                         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL];
818                         info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL;
819                         info->fw = adev->gfx.rlc_fw;
820                         adev->firmware.fw_size +=
821                                 ALIGN(adev->gfx.rlc.save_restore_list_cntl_size_bytes, PAGE_SIZE);
822
823                         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM];
824                         info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM;
825                         info->fw = adev->gfx.rlc_fw;
826                         adev->firmware.fw_size +=
827                                 ALIGN(adev->gfx.rlc.save_restore_list_gpm_size_bytes, PAGE_SIZE);
828
829                         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM];
830                         info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM;
831                         info->fw = adev->gfx.rlc_fw;
832                         adev->firmware.fw_size +=
833                                 ALIGN(adev->gfx.rlc.save_restore_list_srm_size_bytes, PAGE_SIZE);
834                 }
835
836                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
837                 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
838                 info->fw = adev->gfx.mec_fw;
839                 header = (const struct common_firmware_header *)info->fw->data;
840                 cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data;
841                 adev->firmware.fw_size +=
842                         ALIGN(le32_to_cpu(header->ucode_size_bytes) - le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
843
844                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1_JT];
845                 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1_JT;
846                 info->fw = adev->gfx.mec_fw;
847                 adev->firmware.fw_size +=
848                         ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
849
850                 if (adev->gfx.mec2_fw) {
851                         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
852                         info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
853                         info->fw = adev->gfx.mec2_fw;
854                         header = (const struct common_firmware_header *)info->fw->data;
855                         cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data;
856                         adev->firmware.fw_size +=
857                                 ALIGN(le32_to_cpu(header->ucode_size_bytes) - le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
858                         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2_JT];
859                         info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2_JT;
860                         info->fw = adev->gfx.mec2_fw;
861                         adev->firmware.fw_size +=
862                                 ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
863                 }
864
865         }
866
867 out:
868         gfx_v9_0_check_if_need_gfxoff(adev);
869         gfx_v9_0_check_fw_write_wait(adev);
870         if (err) {
871                 dev_err(adev->dev,
872                         "gfx9: Failed to load firmware \"%s\"\n",
873                         fw_name);
874                 release_firmware(adev->gfx.pfp_fw);
875                 adev->gfx.pfp_fw = NULL;
876                 release_firmware(adev->gfx.me_fw);
877                 adev->gfx.me_fw = NULL;
878                 release_firmware(adev->gfx.ce_fw);
879                 adev->gfx.ce_fw = NULL;
880                 release_firmware(adev->gfx.rlc_fw);
881                 adev->gfx.rlc_fw = NULL;
882                 release_firmware(adev->gfx.mec_fw);
883                 adev->gfx.mec_fw = NULL;
884                 release_firmware(adev->gfx.mec2_fw);
885                 adev->gfx.mec2_fw = NULL;
886         }
887         return err;
888 }
889
890 static u32 gfx_v9_0_get_csb_size(struct amdgpu_device *adev)
891 {
892         u32 count = 0;
893         const struct cs_section_def *sect = NULL;
894         const struct cs_extent_def *ext = NULL;
895
896         /* begin clear state */
897         count += 2;
898         /* context control state */
899         count += 3;
900
901         for (sect = gfx9_cs_data; sect->section != NULL; ++sect) {
902                 for (ext = sect->section; ext->extent != NULL; ++ext) {
903                         if (sect->id == SECT_CONTEXT)
904                                 count += 2 + ext->reg_count;
905                         else
906                                 return 0;
907                 }
908         }
909
910         /* end clear state */
911         count += 2;
912         /* clear state */
913         count += 2;
914
915         return count;
916 }
917
918 static void gfx_v9_0_get_csb_buffer(struct amdgpu_device *adev,
919                                     volatile u32 *buffer)
920 {
921         u32 count = 0, i;
922         const struct cs_section_def *sect = NULL;
923         const struct cs_extent_def *ext = NULL;
924
925         if (adev->gfx.rlc.cs_data == NULL)
926                 return;
927         if (buffer == NULL)
928                 return;
929
930         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
931         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
932
933         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
934         buffer[count++] = cpu_to_le32(0x80000000);
935         buffer[count++] = cpu_to_le32(0x80000000);
936
937         for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
938                 for (ext = sect->section; ext->extent != NULL; ++ext) {
939                         if (sect->id == SECT_CONTEXT) {
940                                 buffer[count++] =
941                                         cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
942                                 buffer[count++] = cpu_to_le32(ext->reg_index -
943                                                 PACKET3_SET_CONTEXT_REG_START);
944                                 for (i = 0; i < ext->reg_count; i++)
945                                         buffer[count++] = cpu_to_le32(ext->extent[i]);
946                         } else {
947                                 return;
948                         }
949                 }
950         }
951
952         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
953         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
954
955         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
956         buffer[count++] = cpu_to_le32(0);
957 }
958
959 static void gfx_v9_0_init_always_on_cu_mask(struct amdgpu_device *adev)
960 {
961         struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
962         uint32_t pg_always_on_cu_num = 2;
963         uint32_t always_on_cu_num;
964         uint32_t i, j, k;
965         uint32_t mask, cu_bitmap, counter;
966
967         if (adev->flags & AMD_IS_APU)
968                 always_on_cu_num = 4;
969         else if (adev->asic_type == CHIP_VEGA12)
970                 always_on_cu_num = 8;
971         else
972                 always_on_cu_num = 12;
973
974         mutex_lock(&adev->grbm_idx_mutex);
975         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
976                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
977                         mask = 1;
978                         cu_bitmap = 0;
979                         counter = 0;
980                         gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
981
982                         for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
983                                 if (cu_info->bitmap[i][j] & mask) {
984                                         if (counter == pg_always_on_cu_num)
985                                                 WREG32_SOC15(GC, 0, mmRLC_PG_ALWAYS_ON_CU_MASK, cu_bitmap);
986                                         if (counter < always_on_cu_num)
987                                                 cu_bitmap |= mask;
988                                         else
989                                                 break;
990                                         counter++;
991                                 }
992                                 mask <<= 1;
993                         }
994
995                         WREG32_SOC15(GC, 0, mmRLC_LB_ALWAYS_ACTIVE_CU_MASK, cu_bitmap);
996                         cu_info->ao_cu_bitmap[i][j] = cu_bitmap;
997                 }
998         }
999         gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1000         mutex_unlock(&adev->grbm_idx_mutex);
1001 }
1002
1003 static void gfx_v9_0_init_lbpw(struct amdgpu_device *adev)
1004 {
1005         uint32_t data;
1006
1007         /* set mmRLC_LB_THR_CONFIG_1/2/3/4 */
1008         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F);
1009         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x0333A5A7);
1010         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077);
1011         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x30 | 0x40 << 8 | 0x02FA << 16));
1012
1013         /* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */
1014         WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000);
1015
1016         /* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */
1017         WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000500);
1018
1019         mutex_lock(&adev->grbm_idx_mutex);
1020         /* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/
1021         gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1022         WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff);
1023
1024         /* set mmRLC_LB_PARAMS = 0x003F_1006 */
1025         data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003);
1026         data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010);
1027         data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F);
1028         WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data);
1029
1030         /* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */
1031         data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7);
1032         data &= 0x0000FFFF;
1033         data |= 0x00C00000;
1034         WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data);
1035
1036         /*
1037          * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xF (4 CUs AON for Raven),
1038          * programmed in gfx_v9_0_init_always_on_cu_mask()
1039          */
1040
1041         /* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved,
1042          * but used for RLC_LB_CNTL configuration */
1043         data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK;
1044         data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09);
1045         data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000);
1046         WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data);
1047         mutex_unlock(&adev->grbm_idx_mutex);
1048
1049         gfx_v9_0_init_always_on_cu_mask(adev);
1050 }
1051
1052 static void gfx_v9_4_init_lbpw(struct amdgpu_device *adev)
1053 {
1054         uint32_t data;
1055
1056         /* set mmRLC_LB_THR_CONFIG_1/2/3/4 */
1057         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F);
1058         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x033388F8);
1059         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077);
1060         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x10 | 0x27 << 8 | 0x02FA << 16));
1061
1062         /* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */
1063         WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000);
1064
1065         /* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */
1066         WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000800);
1067
1068         mutex_lock(&adev->grbm_idx_mutex);
1069         /* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/
1070         gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1071         WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff);
1072
1073         /* set mmRLC_LB_PARAMS = 0x003F_1006 */
1074         data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003);
1075         data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010);
1076         data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F);
1077         WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data);
1078
1079         /* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */
1080         data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7);
1081         data &= 0x0000FFFF;
1082         data |= 0x00C00000;
1083         WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data);
1084
1085         /*
1086          * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xFFF (12 CUs AON),
1087          * programmed in gfx_v9_0_init_always_on_cu_mask()
1088          */
1089
1090         /* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved,
1091          * but used for RLC_LB_CNTL configuration */
1092         data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK;
1093         data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09);
1094         data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000);
1095         WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data);
1096         mutex_unlock(&adev->grbm_idx_mutex);
1097
1098         gfx_v9_0_init_always_on_cu_mask(adev);
1099 }
1100
1101 static void gfx_v9_0_enable_lbpw(struct amdgpu_device *adev, bool enable)
1102 {
1103         WREG32_FIELD15(GC, 0, RLC_LB_CNTL, LOAD_BALANCE_ENABLE, enable ? 1 : 0);
1104 }
1105
1106 static int gfx_v9_0_cp_jump_table_num(struct amdgpu_device *adev)
1107 {
1108         return 5;
1109 }
1110
1111 static int gfx_v9_0_rlc_init(struct amdgpu_device *adev)
1112 {
1113         const struct cs_section_def *cs_data;
1114         int r;
1115
1116         adev->gfx.rlc.cs_data = gfx9_cs_data;
1117
1118         cs_data = adev->gfx.rlc.cs_data;
1119
1120         if (cs_data) {
1121                 /* init clear state block */
1122                 r = amdgpu_gfx_rlc_init_csb(adev);
1123                 if (r)
1124                         return r;
1125         }
1126
1127         if (adev->asic_type == CHIP_RAVEN) {
1128                 /* TODO: double check the cp_table_size for RV */
1129                 adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
1130                 r = amdgpu_gfx_rlc_init_cpt(adev);
1131                 if (r)
1132                         return r;
1133         }
1134
1135         switch (adev->asic_type) {
1136         case CHIP_RAVEN:
1137                 gfx_v9_0_init_lbpw(adev);
1138                 break;
1139         case CHIP_VEGA20:
1140                 gfx_v9_4_init_lbpw(adev);
1141                 break;
1142         default:
1143                 break;
1144         }
1145
1146         return 0;
1147 }
1148
1149 static int gfx_v9_0_csb_vram_pin(struct amdgpu_device *adev)
1150 {
1151         int r;
1152
1153         r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false);
1154         if (unlikely(r != 0))
1155                 return r;
1156
1157         r = amdgpu_bo_pin(adev->gfx.rlc.clear_state_obj,
1158                         AMDGPU_GEM_DOMAIN_VRAM);
1159         if (!r)
1160                 adev->gfx.rlc.clear_state_gpu_addr =
1161                         amdgpu_bo_gpu_offset(adev->gfx.rlc.clear_state_obj);
1162
1163         amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1164
1165         return r;
1166 }
1167
1168 static void gfx_v9_0_csb_vram_unpin(struct amdgpu_device *adev)
1169 {
1170         int r;
1171
1172         if (!adev->gfx.rlc.clear_state_obj)
1173                 return;
1174
1175         r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, true);
1176         if (likely(r == 0)) {
1177                 amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj);
1178                 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1179         }
1180 }
1181
1182 static void gfx_v9_0_mec_fini(struct amdgpu_device *adev)
1183 {
1184         amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
1185         amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_obj, NULL, NULL);
1186 }
1187
1188 static int gfx_v9_0_mec_init(struct amdgpu_device *adev)
1189 {
1190         int r;
1191         u32 *hpd;
1192         const __le32 *fw_data;
1193         unsigned fw_size;
1194         u32 *fw;
1195         size_t mec_hpd_size;
1196
1197         const struct gfx_firmware_header_v1_0 *mec_hdr;
1198
1199         bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
1200
1201         /* take ownership of the relevant compute queues */
1202         amdgpu_gfx_compute_queue_acquire(adev);
1203         mec_hpd_size = adev->gfx.num_compute_rings * GFX9_MEC_HPD_SIZE;
1204
1205         r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
1206                                       AMDGPU_GEM_DOMAIN_VRAM,
1207                                       &adev->gfx.mec.hpd_eop_obj,
1208                                       &adev->gfx.mec.hpd_eop_gpu_addr,
1209                                       (void **)&hpd);
1210         if (r) {
1211                 dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
1212                 gfx_v9_0_mec_fini(adev);
1213                 return r;
1214         }
1215
1216         memset(hpd, 0, adev->gfx.mec.hpd_eop_obj->tbo.mem.size);
1217
1218         amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
1219         amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1220
1221         mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1222
1223         fw_data = (const __le32 *)
1224                 (adev->gfx.mec_fw->data +
1225                  le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
1226         fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
1227
1228         r = amdgpu_bo_create_reserved(adev, mec_hdr->header.ucode_size_bytes,
1229                                       PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
1230                                       &adev->gfx.mec.mec_fw_obj,
1231                                       &adev->gfx.mec.mec_fw_gpu_addr,
1232                                       (void **)&fw);
1233         if (r) {
1234                 dev_warn(adev->dev, "(%d) create mec firmware bo failed\n", r);
1235                 gfx_v9_0_mec_fini(adev);
1236                 return r;
1237         }
1238
1239         memcpy(fw, fw_data, fw_size);
1240
1241         amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj);
1242         amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj);
1243
1244         return 0;
1245 }
1246
1247 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
1248 {
1249         WREG32_SOC15(GC, 0, mmSQ_IND_INDEX,
1250                 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
1251                 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
1252                 (address << SQ_IND_INDEX__INDEX__SHIFT) |
1253                 (SQ_IND_INDEX__FORCE_READ_MASK));
1254         return RREG32_SOC15(GC, 0, mmSQ_IND_DATA);
1255 }
1256
1257 static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
1258                            uint32_t wave, uint32_t thread,
1259                            uint32_t regno, uint32_t num, uint32_t *out)
1260 {
1261         WREG32_SOC15(GC, 0, mmSQ_IND_INDEX,
1262                 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
1263                 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
1264                 (regno << SQ_IND_INDEX__INDEX__SHIFT) |
1265                 (thread << SQ_IND_INDEX__THREAD_ID__SHIFT) |
1266                 (SQ_IND_INDEX__FORCE_READ_MASK) |
1267                 (SQ_IND_INDEX__AUTO_INCR_MASK));
1268         while (num--)
1269                 *(out++) = RREG32_SOC15(GC, 0, mmSQ_IND_DATA);
1270 }
1271
1272 static void gfx_v9_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
1273 {
1274         /* type 1 wave data */
1275         dst[(*no_fields)++] = 1;
1276         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS);
1277         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO);
1278         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI);
1279         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO);
1280         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI);
1281         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID);
1282         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0);
1283         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1);
1284         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC);
1285         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC);
1286         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS);
1287         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS);
1288         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0);
1289         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0);
1290 }
1291
1292 static void gfx_v9_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
1293                                      uint32_t wave, uint32_t start,
1294                                      uint32_t size, uint32_t *dst)
1295 {
1296         wave_read_regs(
1297                 adev, simd, wave, 0,
1298                 start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
1299 }
1300
1301 static void gfx_v9_0_read_wave_vgprs(struct amdgpu_device *adev, uint32_t simd,
1302                                      uint32_t wave, uint32_t thread,
1303                                      uint32_t start, uint32_t size,
1304                                      uint32_t *dst)
1305 {
1306         wave_read_regs(
1307                 adev, simd, wave, thread,
1308                 start + SQIND_WAVE_VGPRS_OFFSET, size, dst);
1309 }
1310
1311 static void gfx_v9_0_select_me_pipe_q(struct amdgpu_device *adev,
1312                                   u32 me, u32 pipe, u32 q)
1313 {
1314         soc15_grbm_select(adev, me, pipe, q, 0);
1315 }
1316
1317 static const struct amdgpu_gfx_funcs gfx_v9_0_gfx_funcs = {
1318         .get_gpu_clock_counter = &gfx_v9_0_get_gpu_clock_counter,
1319         .select_se_sh = &gfx_v9_0_select_se_sh,
1320         .read_wave_data = &gfx_v9_0_read_wave_data,
1321         .read_wave_sgprs = &gfx_v9_0_read_wave_sgprs,
1322         .read_wave_vgprs = &gfx_v9_0_read_wave_vgprs,
1323         .select_me_pipe_q = &gfx_v9_0_select_me_pipe_q
1324 };
1325
1326 static int gfx_v9_0_gpu_early_init(struct amdgpu_device *adev)
1327 {
1328         u32 gb_addr_config;
1329         int err;
1330
1331         adev->gfx.funcs = &gfx_v9_0_gfx_funcs;
1332
1333         switch (adev->asic_type) {
1334         case CHIP_VEGA10:
1335                 adev->gfx.config.max_hw_contexts = 8;
1336                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1337                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1338                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1339                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1340                 gb_addr_config = VEGA10_GB_ADDR_CONFIG_GOLDEN;
1341                 break;
1342         case CHIP_VEGA12:
1343                 adev->gfx.config.max_hw_contexts = 8;
1344                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1345                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1346                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1347                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1348                 gb_addr_config = VEGA12_GB_ADDR_CONFIG_GOLDEN;
1349                 DRM_INFO("fix gfx.config for vega12\n");
1350                 break;
1351         case CHIP_VEGA20:
1352                 adev->gfx.config.max_hw_contexts = 8;
1353                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1354                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1355                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1356                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1357                 gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
1358                 gb_addr_config &= ~0xf3e777ff;
1359                 gb_addr_config |= 0x22014042;
1360                 /* check vbios table if gpu info is not available */
1361                 err = amdgpu_atomfirmware_get_gfx_info(adev);
1362                 if (err)
1363                         return err;
1364                 break;
1365         case CHIP_RAVEN:
1366                 adev->gfx.config.max_hw_contexts = 8;
1367                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1368                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1369                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1370                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1371                 if (adev->rev_id >= 8)
1372                         gb_addr_config = RAVEN2_GB_ADDR_CONFIG_GOLDEN;
1373                 else
1374                         gb_addr_config = RAVEN_GB_ADDR_CONFIG_GOLDEN;
1375                 break;
1376         default:
1377                 BUG();
1378                 break;
1379         }
1380
1381         adev->gfx.config.gb_addr_config = gb_addr_config;
1382
1383         adev->gfx.config.gb_addr_config_fields.num_pipes = 1 <<
1384                         REG_GET_FIELD(
1385                                         adev->gfx.config.gb_addr_config,
1386                                         GB_ADDR_CONFIG,
1387                                         NUM_PIPES);
1388
1389         adev->gfx.config.max_tile_pipes =
1390                 adev->gfx.config.gb_addr_config_fields.num_pipes;
1391
1392         adev->gfx.config.gb_addr_config_fields.num_banks = 1 <<
1393                         REG_GET_FIELD(
1394                                         adev->gfx.config.gb_addr_config,
1395                                         GB_ADDR_CONFIG,
1396                                         NUM_BANKS);
1397         adev->gfx.config.gb_addr_config_fields.max_compress_frags = 1 <<
1398                         REG_GET_FIELD(
1399                                         adev->gfx.config.gb_addr_config,
1400                                         GB_ADDR_CONFIG,
1401                                         MAX_COMPRESSED_FRAGS);
1402         adev->gfx.config.gb_addr_config_fields.num_rb_per_se = 1 <<
1403                         REG_GET_FIELD(
1404                                         adev->gfx.config.gb_addr_config,
1405                                         GB_ADDR_CONFIG,
1406                                         NUM_RB_PER_SE);
1407         adev->gfx.config.gb_addr_config_fields.num_se = 1 <<
1408                         REG_GET_FIELD(
1409                                         adev->gfx.config.gb_addr_config,
1410                                         GB_ADDR_CONFIG,
1411                                         NUM_SHADER_ENGINES);
1412         adev->gfx.config.gb_addr_config_fields.pipe_interleave_size = 1 << (8 +
1413                         REG_GET_FIELD(
1414                                         adev->gfx.config.gb_addr_config,
1415                                         GB_ADDR_CONFIG,
1416                                         PIPE_INTERLEAVE_SIZE));
1417
1418         return 0;
1419 }
1420
1421 static int gfx_v9_0_ngg_create_buf(struct amdgpu_device *adev,
1422                                    struct amdgpu_ngg_buf *ngg_buf,
1423                                    int size_se,
1424                                    int default_size_se)
1425 {
1426         int r;
1427
1428         if (size_se < 0) {
1429                 dev_err(adev->dev, "Buffer size is invalid: %d\n", size_se);
1430                 return -EINVAL;
1431         }
1432         size_se = size_se ? size_se : default_size_se;
1433
1434         ngg_buf->size = size_se * adev->gfx.config.max_shader_engines;
1435         r = amdgpu_bo_create_kernel(adev, ngg_buf->size,
1436                                     PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM,
1437                                     &ngg_buf->bo,
1438                                     &ngg_buf->gpu_addr,
1439                                     NULL);
1440         if (r) {
1441                 dev_err(adev->dev, "(%d) failed to create NGG buffer\n", r);
1442                 return r;
1443         }
1444         ngg_buf->bo_size = amdgpu_bo_size(ngg_buf->bo);
1445
1446         return r;
1447 }
1448
1449 static int gfx_v9_0_ngg_fini(struct amdgpu_device *adev)
1450 {
1451         int i;
1452
1453         for (i = 0; i < NGG_BUF_MAX; i++)
1454                 amdgpu_bo_free_kernel(&adev->gfx.ngg.buf[i].bo,
1455                                       &adev->gfx.ngg.buf[i].gpu_addr,
1456                                       NULL);
1457
1458         memset(&adev->gfx.ngg.buf[0], 0,
1459                         sizeof(struct amdgpu_ngg_buf) * NGG_BUF_MAX);
1460
1461         adev->gfx.ngg.init = false;
1462
1463         return 0;
1464 }
1465
1466 static int gfx_v9_0_ngg_init(struct amdgpu_device *adev)
1467 {
1468         int r;
1469
1470         if (!amdgpu_ngg || adev->gfx.ngg.init == true)
1471                 return 0;
1472
1473         /* GDS reserve memory: 64 bytes alignment */
1474         adev->gfx.ngg.gds_reserve_size = ALIGN(5 * 4, 0x40);
1475         adev->gds.gds_size -= adev->gfx.ngg.gds_reserve_size;
1476         adev->gfx.ngg.gds_reserve_addr = RREG32_SOC15(GC, 0, mmGDS_VMID0_BASE);
1477         adev->gfx.ngg.gds_reserve_addr += RREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE);
1478
1479         /* Primitive Buffer */
1480         r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_PRIM],
1481                                     amdgpu_prim_buf_per_se,
1482                                     64 * 1024);
1483         if (r) {
1484                 dev_err(adev->dev, "Failed to create Primitive Buffer\n");
1485                 goto err;
1486         }
1487
1488         /* Position Buffer */
1489         r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_POS],
1490                                     amdgpu_pos_buf_per_se,
1491                                     256 * 1024);
1492         if (r) {
1493                 dev_err(adev->dev, "Failed to create Position Buffer\n");
1494                 goto err;
1495         }
1496
1497         /* Control Sideband */
1498         r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_CNTL],
1499                                     amdgpu_cntl_sb_buf_per_se,
1500                                     256);
1501         if (r) {
1502                 dev_err(adev->dev, "Failed to create Control Sideband Buffer\n");
1503                 goto err;
1504         }
1505
1506         /* Parameter Cache, not created by default */
1507         if (amdgpu_param_buf_per_se <= 0)
1508                 goto out;
1509
1510         r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_PARAM],
1511                                     amdgpu_param_buf_per_se,
1512                                     512 * 1024);
1513         if (r) {
1514                 dev_err(adev->dev, "Failed to create Parameter Cache\n");
1515                 goto err;
1516         }
1517
1518 out:
1519         adev->gfx.ngg.init = true;
1520         return 0;
1521 err:
1522         gfx_v9_0_ngg_fini(adev);
1523         return r;
1524 }
1525
1526 static int gfx_v9_0_ngg_en(struct amdgpu_device *adev)
1527 {
1528         struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
1529         int r;
1530         u32 data, base;
1531
1532         if (!amdgpu_ngg)
1533                 return 0;
1534
1535         /* Program buffer size */
1536         data = REG_SET_FIELD(0, WD_BUF_RESOURCE_1, INDEX_BUF_SIZE,
1537                              adev->gfx.ngg.buf[NGG_PRIM].size >> 8);
1538         data = REG_SET_FIELD(data, WD_BUF_RESOURCE_1, POS_BUF_SIZE,
1539                              adev->gfx.ngg.buf[NGG_POS].size >> 8);
1540         WREG32_SOC15(GC, 0, mmWD_BUF_RESOURCE_1, data);
1541
1542         data = REG_SET_FIELD(0, WD_BUF_RESOURCE_2, CNTL_SB_BUF_SIZE,
1543                              adev->gfx.ngg.buf[NGG_CNTL].size >> 8);
1544         data = REG_SET_FIELD(data, WD_BUF_RESOURCE_2, PARAM_BUF_SIZE,
1545                              adev->gfx.ngg.buf[NGG_PARAM].size >> 10);
1546         WREG32_SOC15(GC, 0, mmWD_BUF_RESOURCE_2, data);
1547
1548         /* Program buffer base address */
1549         base = lower_32_bits(adev->gfx.ngg.buf[NGG_PRIM].gpu_addr);
1550         data = REG_SET_FIELD(0, WD_INDEX_BUF_BASE, BASE, base);
1551         WREG32_SOC15(GC, 0, mmWD_INDEX_BUF_BASE, data);
1552
1553         base = upper_32_bits(adev->gfx.ngg.buf[NGG_PRIM].gpu_addr);
1554         data = REG_SET_FIELD(0, WD_INDEX_BUF_BASE_HI, BASE_HI, base);
1555         WREG32_SOC15(GC, 0, mmWD_INDEX_BUF_BASE_HI, data);
1556
1557         base = lower_32_bits(adev->gfx.ngg.buf[NGG_POS].gpu_addr);
1558         data = REG_SET_FIELD(0, WD_POS_BUF_BASE, BASE, base);
1559         WREG32_SOC15(GC, 0, mmWD_POS_BUF_BASE, data);
1560
1561         base = upper_32_bits(adev->gfx.ngg.buf[NGG_POS].gpu_addr);
1562         data = REG_SET_FIELD(0, WD_POS_BUF_BASE_HI, BASE_HI, base);
1563         WREG32_SOC15(GC, 0, mmWD_POS_BUF_BASE_HI, data);
1564
1565         base = lower_32_bits(adev->gfx.ngg.buf[NGG_CNTL].gpu_addr);
1566         data = REG_SET_FIELD(0, WD_CNTL_SB_BUF_BASE, BASE, base);
1567         WREG32_SOC15(GC, 0, mmWD_CNTL_SB_BUF_BASE, data);
1568
1569         base = upper_32_bits(adev->gfx.ngg.buf[NGG_CNTL].gpu_addr);
1570         data = REG_SET_FIELD(0, WD_CNTL_SB_BUF_BASE_HI, BASE_HI, base);
1571         WREG32_SOC15(GC, 0, mmWD_CNTL_SB_BUF_BASE_HI, data);
1572
1573         /* Clear GDS reserved memory */
1574         r = amdgpu_ring_alloc(ring, 17);
1575         if (r) {
1576                 DRM_ERROR("amdgpu: NGG failed to lock ring %s (%d).\n",
1577                           ring->name, r);
1578                 return r;
1579         }
1580
1581         gfx_v9_0_write_data_to_reg(ring, 0, false,
1582                                    SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE),
1583                                    (adev->gds.gds_size +
1584                                     adev->gfx.ngg.gds_reserve_size));
1585
1586         amdgpu_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
1587         amdgpu_ring_write(ring, (PACKET3_DMA_DATA_CP_SYNC |
1588                                 PACKET3_DMA_DATA_DST_SEL(1) |
1589                                 PACKET3_DMA_DATA_SRC_SEL(2)));
1590         amdgpu_ring_write(ring, 0);
1591         amdgpu_ring_write(ring, 0);
1592         amdgpu_ring_write(ring, adev->gfx.ngg.gds_reserve_addr);
1593         amdgpu_ring_write(ring, 0);
1594         amdgpu_ring_write(ring, PACKET3_DMA_DATA_CMD_RAW_WAIT |
1595                                 adev->gfx.ngg.gds_reserve_size);
1596
1597         gfx_v9_0_write_data_to_reg(ring, 0, false,
1598                                    SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE), 0);
1599
1600         amdgpu_ring_commit(ring);
1601
1602         return 0;
1603 }
1604
1605 static int gfx_v9_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
1606                                       int mec, int pipe, int queue)
1607 {
1608         int r;
1609         unsigned irq_type;
1610         struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
1611
1612         ring = &adev->gfx.compute_ring[ring_id];
1613
1614         /* mec0 is me1 */
1615         ring->me = mec + 1;
1616         ring->pipe = pipe;
1617         ring->queue = queue;
1618
1619         ring->ring_obj = NULL;
1620         ring->use_doorbell = true;
1621         ring->doorbell_index = (adev->doorbell_index.mec_ring0 + ring_id) << 1;
1622         ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
1623                                 + (ring_id * GFX9_MEC_HPD_SIZE);
1624         sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
1625
1626         irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
1627                 + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
1628                 + ring->pipe;
1629
1630         /* type-2 packets are deprecated on MEC, use type-3 instead */
1631         r = amdgpu_ring_init(adev, ring, 1024,
1632                              &adev->gfx.eop_irq, irq_type);
1633         if (r)
1634                 return r;
1635
1636
1637         return 0;
1638 }
1639
1640 static int gfx_v9_0_sw_init(void *handle)
1641 {
1642         int i, j, k, r, ring_id;
1643         struct amdgpu_ring *ring;
1644         struct amdgpu_kiq *kiq;
1645         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1646
1647         switch (adev->asic_type) {
1648         case CHIP_VEGA10:
1649         case CHIP_VEGA12:
1650         case CHIP_VEGA20:
1651         case CHIP_RAVEN:
1652                 adev->gfx.mec.num_mec = 2;
1653                 break;
1654         default:
1655                 adev->gfx.mec.num_mec = 1;
1656                 break;
1657         }
1658
1659         adev->gfx.mec.num_pipe_per_mec = 4;
1660         adev->gfx.mec.num_queue_per_pipe = 8;
1661
1662         /* EOP Event */
1663         r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_EOP_INTERRUPT, &adev->gfx.eop_irq);
1664         if (r)
1665                 return r;
1666
1667         /* Privileged reg */
1668         r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_REG_FAULT,
1669                               &adev->gfx.priv_reg_irq);
1670         if (r)
1671                 return r;
1672
1673         /* Privileged inst */
1674         r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_INSTR_FAULT,
1675                               &adev->gfx.priv_inst_irq);
1676         if (r)
1677                 return r;
1678
1679         /* ECC error */
1680         r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_ECC_ERROR,
1681                               &adev->gfx.cp_ecc_error_irq);
1682         if (r)
1683                 return r;
1684
1685         /* FUE error */
1686         r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_FUE_ERROR,
1687                               &adev->gfx.cp_ecc_error_irq);
1688         if (r)
1689                 return r;
1690
1691         adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
1692
1693         gfx_v9_0_scratch_init(adev);
1694
1695         r = gfx_v9_0_init_microcode(adev);
1696         if (r) {
1697                 DRM_ERROR("Failed to load gfx firmware!\n");
1698                 return r;
1699         }
1700
1701         r = adev->gfx.rlc.funcs->init(adev);
1702         if (r) {
1703                 DRM_ERROR("Failed to init rlc BOs!\n");
1704                 return r;
1705         }
1706
1707         r = gfx_v9_0_mec_init(adev);
1708         if (r) {
1709                 DRM_ERROR("Failed to init MEC BOs!\n");
1710                 return r;
1711         }
1712
1713         /* set up the gfx ring */
1714         for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
1715                 ring = &adev->gfx.gfx_ring[i];
1716                 ring->ring_obj = NULL;
1717                 if (!i)
1718                         sprintf(ring->name, "gfx");
1719                 else
1720                         sprintf(ring->name, "gfx_%d", i);
1721                 ring->use_doorbell = true;
1722                 ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1;
1723                 r = amdgpu_ring_init(adev, ring, 1024,
1724                                      &adev->gfx.eop_irq, AMDGPU_CP_IRQ_GFX_EOP);
1725                 if (r)
1726                         return r;
1727         }
1728
1729         /* set up the compute queues - allocate horizontally across pipes */
1730         ring_id = 0;
1731         for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
1732                 for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
1733                         for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
1734                                 if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, j))
1735                                         continue;
1736
1737                                 r = gfx_v9_0_compute_ring_init(adev,
1738                                                                ring_id,
1739                                                                i, k, j);
1740                                 if (r)
1741                                         return r;
1742
1743                                 ring_id++;
1744                         }
1745                 }
1746         }
1747
1748         r = amdgpu_gfx_kiq_init(adev, GFX9_MEC_HPD_SIZE);
1749         if (r) {
1750                 DRM_ERROR("Failed to init KIQ BOs!\n");
1751                 return r;
1752         }
1753
1754         kiq = &adev->gfx.kiq;
1755         r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
1756         if (r)
1757                 return r;
1758
1759         /* create MQD for all compute queues as wel as KIQ for SRIOV case */
1760         r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct v9_mqd_allocation));
1761         if (r)
1762                 return r;
1763
1764         adev->gfx.ce_ram_size = 0x8000;
1765
1766         r = gfx_v9_0_gpu_early_init(adev);
1767         if (r)
1768                 return r;
1769
1770         r = gfx_v9_0_ngg_init(adev);
1771         if (r)
1772                 return r;
1773
1774         return 0;
1775 }
1776
1777
1778 static int gfx_v9_0_sw_fini(void *handle)
1779 {
1780         int i;
1781         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1782
1783         if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX) &&
1784                         adev->gfx.ras_if) {
1785                 struct ras_common_if *ras_if = adev->gfx.ras_if;
1786                 struct ras_ih_if ih_info = {
1787                         .head = *ras_if,
1788                 };
1789
1790                 amdgpu_ras_debugfs_remove(adev, ras_if);
1791                 amdgpu_ras_sysfs_remove(adev, ras_if);
1792                 amdgpu_ras_interrupt_remove_handler(adev,  &ih_info);
1793                 amdgpu_ras_feature_enable(adev, ras_if, 0);
1794                 kfree(ras_if);
1795         }
1796
1797         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
1798                 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
1799         for (i = 0; i < adev->gfx.num_compute_rings; i++)
1800                 amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
1801
1802         amdgpu_gfx_mqd_sw_fini(adev);
1803         amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq);
1804         amdgpu_gfx_kiq_fini(adev);
1805
1806         gfx_v9_0_mec_fini(adev);
1807         gfx_v9_0_ngg_fini(adev);
1808         amdgpu_bo_unref(&adev->gfx.rlc.clear_state_obj);
1809         if (adev->asic_type == CHIP_RAVEN) {
1810                 amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj,
1811                                 &adev->gfx.rlc.cp_table_gpu_addr,
1812                                 (void **)&adev->gfx.rlc.cp_table_ptr);
1813         }
1814         gfx_v9_0_free_microcode(adev);
1815
1816         return 0;
1817 }
1818
1819
1820 static void gfx_v9_0_tiling_mode_table_init(struct amdgpu_device *adev)
1821 {
1822         /* TODO */
1823 }
1824
1825 static void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num, u32 instance)
1826 {
1827         u32 data;
1828
1829         if (instance == 0xffffffff)
1830                 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
1831         else
1832                 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
1833
1834         if (se_num == 0xffffffff)
1835                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
1836         else
1837                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
1838
1839         if (sh_num == 0xffffffff)
1840                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
1841         else
1842                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
1843
1844         WREG32_SOC15_RLC_SHADOW(GC, 0, mmGRBM_GFX_INDEX, data);
1845 }
1846
1847 static u32 gfx_v9_0_get_rb_active_bitmap(struct amdgpu_device *adev)
1848 {
1849         u32 data, mask;
1850
1851         data = RREG32_SOC15(GC, 0, mmCC_RB_BACKEND_DISABLE);
1852         data |= RREG32_SOC15(GC, 0, mmGC_USER_RB_BACKEND_DISABLE);
1853
1854         data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK;
1855         data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT;
1856
1857         mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se /
1858                                          adev->gfx.config.max_sh_per_se);
1859
1860         return (~data) & mask;
1861 }
1862
1863 static void gfx_v9_0_setup_rb(struct amdgpu_device *adev)
1864 {
1865         int i, j;
1866         u32 data;
1867         u32 active_rbs = 0;
1868         u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
1869                                         adev->gfx.config.max_sh_per_se;
1870
1871         mutex_lock(&adev->grbm_idx_mutex);
1872         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
1873                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
1874                         gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
1875                         data = gfx_v9_0_get_rb_active_bitmap(adev);
1876                         active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
1877                                                rb_bitmap_width_per_sh);
1878                 }
1879         }
1880         gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1881         mutex_unlock(&adev->grbm_idx_mutex);
1882
1883         adev->gfx.config.backend_enable_mask = active_rbs;
1884         adev->gfx.config.num_rbs = hweight32(active_rbs);
1885 }
1886
1887 #define DEFAULT_SH_MEM_BASES    (0x6000)
1888 #define FIRST_COMPUTE_VMID      (8)
1889 #define LAST_COMPUTE_VMID       (16)
1890 static void gfx_v9_0_init_compute_vmid(struct amdgpu_device *adev)
1891 {
1892         int i;
1893         uint32_t sh_mem_config;
1894         uint32_t sh_mem_bases;
1895
1896         /*
1897          * Configure apertures:
1898          * LDS:         0x60000000'00000000 - 0x60000001'00000000 (4GB)
1899          * Scratch:     0x60000001'00000000 - 0x60000002'00000000 (4GB)
1900          * GPUVM:       0x60010000'00000000 - 0x60020000'00000000 (1TB)
1901          */
1902         sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
1903
1904         sh_mem_config = SH_MEM_ADDRESS_MODE_64 |
1905                         SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
1906                         SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT;
1907
1908         mutex_lock(&adev->srbm_mutex);
1909         for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
1910                 soc15_grbm_select(adev, 0, 0, 0, i);
1911                 /* CP and shaders */
1912                 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, sh_mem_config);
1913                 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, sh_mem_bases);
1914         }
1915         soc15_grbm_select(adev, 0, 0, 0, 0);
1916         mutex_unlock(&adev->srbm_mutex);
1917 }
1918
1919 static void gfx_v9_0_constants_init(struct amdgpu_device *adev)
1920 {
1921         u32 tmp;
1922         int i;
1923
1924         WREG32_FIELD15_RLC(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff);
1925
1926         gfx_v9_0_tiling_mode_table_init(adev);
1927
1928         gfx_v9_0_setup_rb(adev);
1929         gfx_v9_0_get_cu_info(adev, &adev->gfx.cu_info);
1930         adev->gfx.config.db_debug2 = RREG32_SOC15(GC, 0, mmDB_DEBUG2);
1931
1932         /* XXX SH_MEM regs */
1933         /* where to put LDS, scratch, GPUVM in FSA64 space */
1934         mutex_lock(&adev->srbm_mutex);
1935         for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB].num_ids; i++) {
1936                 soc15_grbm_select(adev, 0, 0, 0, i);
1937                 /* CP and shaders */
1938                 if (i == 0) {
1939                         tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
1940                                             SH_MEM_ALIGNMENT_MODE_UNALIGNED);
1941                         WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp);
1942                         WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, 0);
1943                 } else {
1944                         tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
1945                                             SH_MEM_ALIGNMENT_MODE_UNALIGNED);
1946                         WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp);
1947                         tmp = REG_SET_FIELD(0, SH_MEM_BASES, PRIVATE_BASE,
1948                                 (adev->gmc.private_aperture_start >> 48));
1949                         tmp = REG_SET_FIELD(tmp, SH_MEM_BASES, SHARED_BASE,
1950                                 (adev->gmc.shared_aperture_start >> 48));
1951                         WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, tmp);
1952                 }
1953         }
1954         soc15_grbm_select(adev, 0, 0, 0, 0);
1955
1956         mutex_unlock(&adev->srbm_mutex);
1957
1958         gfx_v9_0_init_compute_vmid(adev);
1959
1960         mutex_lock(&adev->grbm_idx_mutex);
1961         /*
1962          * making sure that the following register writes will be broadcasted
1963          * to all the shaders
1964          */
1965         gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1966
1967         WREG32_SOC15_RLC(GC, 0, mmPA_SC_FIFO_SIZE,
1968                    (adev->gfx.config.sc_prim_fifo_size_frontend <<
1969                         PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) |
1970                    (adev->gfx.config.sc_prim_fifo_size_backend <<
1971                         PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT) |
1972                    (adev->gfx.config.sc_hiz_tile_fifo_size <<
1973                         PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) |
1974                    (adev->gfx.config.sc_earlyz_tile_fifo_size <<
1975                         PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT));
1976         mutex_unlock(&adev->grbm_idx_mutex);
1977
1978 }
1979
1980 static void gfx_v9_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
1981 {
1982         u32 i, j, k;
1983         u32 mask;
1984
1985         mutex_lock(&adev->grbm_idx_mutex);
1986         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
1987                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
1988                         gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
1989                         for (k = 0; k < adev->usec_timeout; k++) {
1990                                 if (RREG32_SOC15(GC, 0, mmRLC_SERDES_CU_MASTER_BUSY) == 0)
1991                                         break;
1992                                 udelay(1);
1993                         }
1994                         if (k == adev->usec_timeout) {
1995                                 gfx_v9_0_select_se_sh(adev, 0xffffffff,
1996                                                       0xffffffff, 0xffffffff);
1997                                 mutex_unlock(&adev->grbm_idx_mutex);
1998                                 DRM_INFO("Timeout wait for RLC serdes %u,%u\n",
1999                                          i, j);
2000                                 return;
2001                         }
2002                 }
2003         }
2004         gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
2005         mutex_unlock(&adev->grbm_idx_mutex);
2006
2007         mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
2008                 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
2009                 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
2010                 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
2011         for (k = 0; k < adev->usec_timeout; k++) {
2012                 if ((RREG32_SOC15(GC, 0, mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
2013                         break;
2014                 udelay(1);
2015         }
2016 }
2017
2018 static void gfx_v9_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
2019                                                bool enable)
2020 {
2021         u32 tmp = RREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0);
2022
2023         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
2024         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
2025         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
2026         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
2027
2028         WREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0, tmp);
2029 }
2030
2031 static void gfx_v9_0_init_csb(struct amdgpu_device *adev)
2032 {
2033         /* csib */
2034         WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_HI),
2035                         adev->gfx.rlc.clear_state_gpu_addr >> 32);
2036         WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_LO),
2037                         adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
2038         WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_LENGTH),
2039                         adev->gfx.rlc.clear_state_size);
2040 }
2041
2042 static void gfx_v9_1_parse_ind_reg_list(int *register_list_format,
2043                                 int indirect_offset,
2044                                 int list_size,
2045                                 int *unique_indirect_regs,
2046                                 int unique_indirect_reg_count,
2047                                 int *indirect_start_offsets,
2048                                 int *indirect_start_offsets_count,
2049                                 int max_start_offsets_count)
2050 {
2051         int idx;
2052
2053         for (; indirect_offset < list_size; indirect_offset++) {
2054                 WARN_ON(*indirect_start_offsets_count >= max_start_offsets_count);
2055                 indirect_start_offsets[*indirect_start_offsets_count] = indirect_offset;
2056                 *indirect_start_offsets_count = *indirect_start_offsets_count + 1;
2057
2058                 while (register_list_format[indirect_offset] != 0xFFFFFFFF) {
2059                         indirect_offset += 2;
2060
2061                         /* look for the matching indice */
2062                         for (idx = 0; idx < unique_indirect_reg_count; idx++) {
2063                                 if (unique_indirect_regs[idx] ==
2064                                         register_list_format[indirect_offset] ||
2065                                         !unique_indirect_regs[idx])
2066                                         break;
2067                         }
2068
2069                         BUG_ON(idx >= unique_indirect_reg_count);
2070
2071                         if (!unique_indirect_regs[idx])
2072                                 unique_indirect_regs[idx] = register_list_format[indirect_offset];
2073
2074                         indirect_offset++;
2075                 }
2076         }
2077 }
2078
2079 static int gfx_v9_1_init_rlc_save_restore_list(struct amdgpu_device *adev)
2080 {
2081         int unique_indirect_regs[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
2082         int unique_indirect_reg_count = 0;
2083
2084         int indirect_start_offsets[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
2085         int indirect_start_offsets_count = 0;
2086
2087         int list_size = 0;
2088         int i = 0, j = 0;
2089         u32 tmp = 0;
2090
2091         u32 *register_list_format =
2092                 kmalloc(adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
2093         if (!register_list_format)
2094                 return -ENOMEM;
2095         memcpy(register_list_format, adev->gfx.rlc.register_list_format,
2096                 adev->gfx.rlc.reg_list_format_size_bytes);
2097
2098         /* setup unique_indirect_regs array and indirect_start_offsets array */
2099         unique_indirect_reg_count = ARRAY_SIZE(unique_indirect_regs);
2100         gfx_v9_1_parse_ind_reg_list(register_list_format,
2101                                     adev->gfx.rlc.reg_list_format_direct_reg_list_length,
2102                                     adev->gfx.rlc.reg_list_format_size_bytes >> 2,
2103                                     unique_indirect_regs,
2104                                     unique_indirect_reg_count,
2105                                     indirect_start_offsets,
2106                                     &indirect_start_offsets_count,
2107                                     ARRAY_SIZE(indirect_start_offsets));
2108
2109         /* enable auto inc in case it is disabled */
2110         tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL));
2111         tmp |= RLC_SRM_CNTL__AUTO_INCR_ADDR_MASK;
2112         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL), tmp);
2113
2114         /* write register_restore table to offset 0x0 using RLC_SRM_ARAM_ADDR/DATA */
2115         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_ADDR),
2116                 RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET);
2117         for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
2118                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_DATA),
2119                         adev->gfx.rlc.register_restore[i]);
2120
2121         /* load indirect register */
2122         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2123                 adev->gfx.rlc.reg_list_format_start);
2124
2125         /* direct register portion */
2126         for (i = 0; i < adev->gfx.rlc.reg_list_format_direct_reg_list_length; i++)
2127                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA),
2128                         register_list_format[i]);
2129
2130         /* indirect register portion */
2131         while (i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2)) {
2132                 if (register_list_format[i] == 0xFFFFFFFF) {
2133                         WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2134                         continue;
2135                 }
2136
2137                 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2138                 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2139
2140                 for (j = 0; j < unique_indirect_reg_count; j++) {
2141                         if (register_list_format[i] == unique_indirect_regs[j]) {
2142                                 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, j);
2143                                 break;
2144                         }
2145                 }
2146
2147                 BUG_ON(j >= unique_indirect_reg_count);
2148
2149                 i++;
2150         }
2151
2152         /* set save/restore list size */
2153         list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
2154         list_size = list_size >> 1;
2155         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2156                 adev->gfx.rlc.reg_restore_list_size);
2157         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA), list_size);
2158
2159         /* write the starting offsets to RLC scratch ram */
2160         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2161                 adev->gfx.rlc.starting_offsets_start);
2162         for (i = 0; i < ARRAY_SIZE(indirect_start_offsets); i++)
2163                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA),
2164                        indirect_start_offsets[i]);
2165
2166         /* load unique indirect regs*/
2167         for (i = 0; i < ARRAY_SIZE(unique_indirect_regs); i++) {
2168                 if (unique_indirect_regs[i] != 0) {
2169                         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_ADDR_0)
2170                                + GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[i],
2171                                unique_indirect_regs[i] & 0x3FFFF);
2172
2173                         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_DATA_0)
2174                                + GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[i],
2175                                unique_indirect_regs[i] >> 20);
2176                 }
2177         }
2178
2179         kfree(register_list_format);
2180         return 0;
2181 }
2182
2183 static void gfx_v9_0_enable_save_restore_machine(struct amdgpu_device *adev)
2184 {
2185         WREG32_FIELD15(GC, 0, RLC_SRM_CNTL, SRM_ENABLE, 1);
2186 }
2187
2188 static void pwr_10_0_gfxip_control_over_cgpg(struct amdgpu_device *adev,
2189                                              bool enable)
2190 {
2191         uint32_t data = 0;
2192         uint32_t default_data = 0;
2193
2194         default_data = data = RREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS));
2195         if (enable == true) {
2196                 /* enable GFXIP control over CGPG */
2197                 data |= PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK;
2198                 if(default_data != data)
2199                         WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2200
2201                 /* update status */
2202                 data &= ~PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK;
2203                 data |= (2 << PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT);
2204                 if(default_data != data)
2205                         WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2206         } else {
2207                 /* restore GFXIP control over GCPG */
2208                 data &= ~PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK;
2209                 if(default_data != data)
2210                         WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2211         }
2212 }
2213
2214 static void gfx_v9_0_init_gfx_power_gating(struct amdgpu_device *adev)
2215 {
2216         uint32_t data = 0;
2217
2218         if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
2219                               AMD_PG_SUPPORT_GFX_SMG |
2220                               AMD_PG_SUPPORT_GFX_DMG)) {
2221                 /* init IDLE_POLL_COUNT = 60 */
2222                 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL));
2223                 data &= ~CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT_MASK;
2224                 data |= (0x60 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
2225                 WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL), data);
2226
2227                 /* init RLC PG Delay */
2228                 data = 0;
2229                 data |= (0x10 << RLC_PG_DELAY__POWER_UP_DELAY__SHIFT);
2230                 data |= (0x10 << RLC_PG_DELAY__POWER_DOWN_DELAY__SHIFT);
2231                 data |= (0x10 << RLC_PG_DELAY__CMD_PROPAGATE_DELAY__SHIFT);
2232                 data |= (0x40 << RLC_PG_DELAY__MEM_SLEEP_DELAY__SHIFT);
2233                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY), data);
2234
2235                 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2));
2236                 data &= ~RLC_PG_DELAY_2__SERDES_CMD_DELAY_MASK;
2237                 data |= (0x4 << RLC_PG_DELAY_2__SERDES_CMD_DELAY__SHIFT);
2238                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2), data);
2239
2240                 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3));
2241                 data &= ~RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG_MASK;
2242                 data |= (0xff << RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG__SHIFT);
2243                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3), data);
2244
2245                 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL));
2246                 data &= ~RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD_MASK;
2247
2248                 /* program GRBM_REG_SAVE_GFX_IDLE_THRESHOLD to 0x55f0 */
2249                 data |= (0x55f0 << RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD__SHIFT);
2250                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL), data);
2251
2252                 pwr_10_0_gfxip_control_over_cgpg(adev, true);
2253         }
2254 }
2255
2256 static void gfx_v9_0_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
2257                                                 bool enable)
2258 {
2259         uint32_t data = 0;
2260         uint32_t default_data = 0;
2261
2262         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2263         data = REG_SET_FIELD(data, RLC_PG_CNTL,
2264                              SMU_CLK_SLOWDOWN_ON_PU_ENABLE,
2265                              enable ? 1 : 0);
2266         if (default_data != data)
2267                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2268 }
2269
2270 static void gfx_v9_0_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
2271                                                 bool enable)
2272 {
2273         uint32_t data = 0;
2274         uint32_t default_data = 0;
2275
2276         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2277         data = REG_SET_FIELD(data, RLC_PG_CNTL,
2278                              SMU_CLK_SLOWDOWN_ON_PD_ENABLE,
2279                              enable ? 1 : 0);
2280         if(default_data != data)
2281                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2282 }
2283
2284 static void gfx_v9_0_enable_cp_power_gating(struct amdgpu_device *adev,
2285                                         bool enable)
2286 {
2287         uint32_t data = 0;
2288         uint32_t default_data = 0;
2289
2290         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2291         data = REG_SET_FIELD(data, RLC_PG_CNTL,
2292                              CP_PG_DISABLE,
2293                              enable ? 0 : 1);
2294         if(default_data != data)
2295                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2296 }
2297
2298 static void gfx_v9_0_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
2299                                                 bool enable)
2300 {
2301         uint32_t data, default_data;
2302
2303         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2304         data = REG_SET_FIELD(data, RLC_PG_CNTL,
2305                              GFX_POWER_GATING_ENABLE,
2306                              enable ? 1 : 0);
2307         if(default_data != data)
2308                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2309 }
2310
2311 static void gfx_v9_0_enable_gfx_pipeline_powergating(struct amdgpu_device *adev,
2312                                                 bool enable)
2313 {
2314         uint32_t data, default_data;
2315
2316         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2317         data = REG_SET_FIELD(data, RLC_PG_CNTL,
2318                              GFX_PIPELINE_PG_ENABLE,
2319                              enable ? 1 : 0);
2320         if(default_data != data)
2321                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2322
2323         if (!enable)
2324                 /* read any GFX register to wake up GFX */
2325                 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmDB_RENDER_CONTROL));
2326 }
2327
2328 static void gfx_v9_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
2329                                                        bool enable)
2330 {
2331         uint32_t data, default_data;
2332
2333         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2334         data = REG_SET_FIELD(data, RLC_PG_CNTL,
2335                              STATIC_PER_CU_PG_ENABLE,
2336                              enable ? 1 : 0);
2337         if(default_data != data)
2338                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2339 }
2340
2341 static void gfx_v9_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
2342                                                 bool enable)
2343 {
2344         uint32_t data, default_data;
2345
2346         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2347         data = REG_SET_FIELD(data, RLC_PG_CNTL,
2348                              DYN_PER_CU_PG_ENABLE,
2349                              enable ? 1 : 0);
2350         if(default_data != data)
2351                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2352 }
2353
2354 static void gfx_v9_0_init_pg(struct amdgpu_device *adev)
2355 {
2356         gfx_v9_0_init_csb(adev);
2357
2358         /*
2359          * Rlc save restore list is workable since v2_1.
2360          * And it's needed by gfxoff feature.
2361          */
2362         if (adev->gfx.rlc.is_rlc_v2_1) {
2363                 gfx_v9_1_init_rlc_save_restore_list(adev);
2364                 gfx_v9_0_enable_save_restore_machine(adev);
2365         }
2366
2367         if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
2368                               AMD_PG_SUPPORT_GFX_SMG |
2369                               AMD_PG_SUPPORT_GFX_DMG |
2370                               AMD_PG_SUPPORT_CP |
2371                               AMD_PG_SUPPORT_GDS |
2372                               AMD_PG_SUPPORT_RLC_SMU_HS)) {
2373                 WREG32(mmRLC_JUMP_TABLE_RESTORE,
2374                        adev->gfx.rlc.cp_table_gpu_addr >> 8);
2375                 gfx_v9_0_init_gfx_power_gating(adev);
2376         }
2377 }
2378
2379 void gfx_v9_0_rlc_stop(struct amdgpu_device *adev)
2380 {
2381         WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 0);
2382         gfx_v9_0_enable_gui_idle_interrupt(adev, false);
2383         gfx_v9_0_wait_for_rlc_serdes(adev);
2384 }
2385
2386 static void gfx_v9_0_rlc_reset(struct amdgpu_device *adev)
2387 {
2388         WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
2389         udelay(50);
2390         WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
2391         udelay(50);
2392 }
2393
2394 static void gfx_v9_0_rlc_start(struct amdgpu_device *adev)
2395 {
2396 #ifdef AMDGPU_RLC_DEBUG_RETRY
2397         u32 rlc_ucode_ver;
2398 #endif
2399
2400         WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 1);
2401         udelay(50);
2402
2403         /* carrizo do enable cp interrupt after cp inited */
2404         if (!(adev->flags & AMD_IS_APU)) {
2405                 gfx_v9_0_enable_gui_idle_interrupt(adev, true);
2406                 udelay(50);
2407         }
2408
2409 #ifdef AMDGPU_RLC_DEBUG_RETRY
2410         /* RLC_GPM_GENERAL_6 : RLC Ucode version */
2411         rlc_ucode_ver = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_6);
2412         if(rlc_ucode_ver == 0x108) {
2413                 DRM_INFO("Using rlc debug ucode. mmRLC_GPM_GENERAL_6 ==0x08%x / fw_ver == %i \n",
2414                                 rlc_ucode_ver, adev->gfx.rlc_fw_version);
2415                 /* RLC_GPM_TIMER_INT_3 : Timer interval in RefCLK cycles,
2416                  * default is 0x9C4 to create a 100us interval */
2417                 WREG32_SOC15(GC, 0, mmRLC_GPM_TIMER_INT_3, 0x9C4);
2418                 /* RLC_GPM_GENERAL_12 : Minimum gap between wptr and rptr
2419                  * to disable the page fault retry interrupts, default is
2420                  * 0x100 (256) */
2421                 WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_12, 0x100);
2422         }
2423 #endif
2424 }
2425
2426 static int gfx_v9_0_rlc_load_microcode(struct amdgpu_device *adev)
2427 {
2428         const struct rlc_firmware_header_v2_0 *hdr;
2429         const __le32 *fw_data;
2430         unsigned i, fw_size;
2431
2432         if (!adev->gfx.rlc_fw)
2433                 return -EINVAL;
2434
2435         hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
2436         amdgpu_ucode_print_rlc_hdr(&hdr->header);
2437
2438         fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
2439                            le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2440         fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
2441
2442         WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR,
2443                         RLCG_UCODE_LOADING_START_ADDRESS);
2444         for (i = 0; i < fw_size; i++)
2445                 WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
2446         WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
2447
2448         return 0;
2449 }
2450
2451 static int gfx_v9_0_rlc_resume(struct amdgpu_device *adev)
2452 {
2453         int r;
2454
2455         if (amdgpu_sriov_vf(adev)) {
2456                 gfx_v9_0_init_csb(adev);
2457                 return 0;
2458         }
2459
2460         adev->gfx.rlc.funcs->stop(adev);
2461
2462         /* disable CG */
2463         WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, 0);
2464
2465         gfx_v9_0_init_pg(adev);
2466
2467         if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
2468                 /* legacy rlc firmware loading */
2469                 r = gfx_v9_0_rlc_load_microcode(adev);
2470                 if (r)
2471                         return r;
2472         }
2473
2474         switch (adev->asic_type) {
2475         case CHIP_RAVEN:
2476                 if (amdgpu_lbpw == 0)
2477                         gfx_v9_0_enable_lbpw(adev, false);
2478                 else
2479                         gfx_v9_0_enable_lbpw(adev, true);
2480                 break;
2481         case CHIP_VEGA20:
2482                 if (amdgpu_lbpw > 0)
2483                         gfx_v9_0_enable_lbpw(adev, true);
2484                 else
2485                         gfx_v9_0_enable_lbpw(adev, false);
2486                 break;
2487         default:
2488                 break;
2489         }
2490
2491         adev->gfx.rlc.funcs->start(adev);
2492
2493         return 0;
2494 }
2495
2496 static void gfx_v9_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
2497 {
2498         int i;
2499         u32 tmp = RREG32_SOC15(GC, 0, mmCP_ME_CNTL);
2500
2501         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, enable ? 0 : 1);
2502         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, enable ? 0 : 1);
2503         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, enable ? 0 : 1);
2504         if (!enable) {
2505                 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2506                         adev->gfx.gfx_ring[i].sched.ready = false;
2507         }
2508         WREG32_SOC15_RLC(GC, 0, mmCP_ME_CNTL, tmp);
2509         udelay(50);
2510 }
2511
2512 static int gfx_v9_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
2513 {
2514         const struct gfx_firmware_header_v1_0 *pfp_hdr;
2515         const struct gfx_firmware_header_v1_0 *ce_hdr;
2516         const struct gfx_firmware_header_v1_0 *me_hdr;
2517         const __le32 *fw_data;
2518         unsigned i, fw_size;
2519
2520         if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
2521                 return -EINVAL;
2522
2523         pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
2524                 adev->gfx.pfp_fw->data;
2525         ce_hdr = (const struct gfx_firmware_header_v1_0 *)
2526                 adev->gfx.ce_fw->data;
2527         me_hdr = (const struct gfx_firmware_header_v1_0 *)
2528                 adev->gfx.me_fw->data;
2529
2530         amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
2531         amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
2532         amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
2533
2534         gfx_v9_0_cp_gfx_enable(adev, false);
2535
2536         /* PFP */
2537         fw_data = (const __le32 *)
2538                 (adev->gfx.pfp_fw->data +
2539                  le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
2540         fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
2541         WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, 0);
2542         for (i = 0; i < fw_size; i++)
2543                 WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
2544         WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
2545
2546         /* CE */
2547         fw_data = (const __le32 *)
2548                 (adev->gfx.ce_fw->data +
2549                  le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
2550         fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
2551         WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, 0);
2552         for (i = 0; i < fw_size; i++)
2553                 WREG32_SOC15(GC, 0, mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
2554         WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
2555
2556         /* ME */
2557         fw_data = (const __le32 *)
2558                 (adev->gfx.me_fw->data +
2559                  le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
2560         fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
2561         WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, 0);
2562         for (i = 0; i < fw_size; i++)
2563                 WREG32_SOC15(GC, 0, mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
2564         WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version);
2565
2566         return 0;
2567 }
2568
2569 static int gfx_v9_0_cp_gfx_start(struct amdgpu_device *adev)
2570 {
2571         struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
2572         const struct cs_section_def *sect = NULL;
2573         const struct cs_extent_def *ext = NULL;
2574         int r, i, tmp;
2575
2576         /* init the CP */
2577         WREG32_SOC15(GC, 0, mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
2578         WREG32_SOC15(GC, 0, mmCP_DEVICE_ID, 1);
2579
2580         gfx_v9_0_cp_gfx_enable(adev, true);
2581
2582         r = amdgpu_ring_alloc(ring, gfx_v9_0_get_csb_size(adev) + 4 + 3);
2583         if (r) {
2584                 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
2585                 return r;
2586         }
2587
2588         amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
2589         amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
2590
2591         amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
2592         amdgpu_ring_write(ring, 0x80000000);
2593         amdgpu_ring_write(ring, 0x80000000);
2594
2595         for (sect = gfx9_cs_data; sect->section != NULL; ++sect) {
2596                 for (ext = sect->section; ext->extent != NULL; ++ext) {
2597                         if (sect->id == SECT_CONTEXT) {
2598                                 amdgpu_ring_write(ring,
2599                                        PACKET3(PACKET3_SET_CONTEXT_REG,
2600                                                ext->reg_count));
2601                                 amdgpu_ring_write(ring,
2602                                        ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
2603                                 for (i = 0; i < ext->reg_count; i++)
2604                                         amdgpu_ring_write(ring, ext->extent[i]);
2605                         }
2606                 }
2607         }
2608
2609         amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
2610         amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
2611
2612         amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
2613         amdgpu_ring_write(ring, 0);
2614
2615         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
2616         amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
2617         amdgpu_ring_write(ring, 0x8000);
2618         amdgpu_ring_write(ring, 0x8000);
2619
2620         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG,1));
2621         tmp = (PACKET3_SET_UCONFIG_REG_INDEX_TYPE |
2622                 (SOC15_REG_OFFSET(GC, 0, mmVGT_INDEX_TYPE) - PACKET3_SET_UCONFIG_REG_START));
2623         amdgpu_ring_write(ring, tmp);
2624         amdgpu_ring_write(ring, 0);
2625
2626         amdgpu_ring_commit(ring);
2627
2628         return 0;
2629 }
2630
2631 static int gfx_v9_0_cp_gfx_resume(struct amdgpu_device *adev)
2632 {
2633         struct amdgpu_ring *ring;
2634         u32 tmp;
2635         u32 rb_bufsz;
2636         u64 rb_addr, rptr_addr, wptr_gpu_addr;
2637
2638         /* Set the write pointer delay */
2639         WREG32_SOC15(GC, 0, mmCP_RB_WPTR_DELAY, 0);
2640
2641         /* set the RB to use vmid 0 */
2642         WREG32_SOC15(GC, 0, mmCP_RB_VMID, 0);
2643
2644         /* Set ring buffer size */
2645         ring = &adev->gfx.gfx_ring[0];
2646         rb_bufsz = order_base_2(ring->ring_size / 8);
2647         tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
2648         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
2649 #ifdef __BIG_ENDIAN
2650         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
2651 #endif
2652         WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp);
2653
2654         /* Initialize the ring buffer's write pointers */
2655         ring->wptr = 0;
2656         WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
2657         WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
2658
2659         /* set the wb address wether it's enabled or not */
2660         rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
2661         WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
2662         WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & CP_RB_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK);
2663
2664         wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
2665         WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr));
2666         WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr));
2667
2668         mdelay(1);
2669         WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp);
2670
2671         rb_addr = ring->gpu_addr >> 8;
2672         WREG32_SOC15(GC, 0, mmCP_RB0_BASE, rb_addr);
2673         WREG32_SOC15(GC, 0, mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
2674
2675         tmp = RREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL);
2676         if (ring->use_doorbell) {
2677                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
2678                                     DOORBELL_OFFSET, ring->doorbell_index);
2679                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
2680                                     DOORBELL_EN, 1);
2681         } else {
2682                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0);
2683         }
2684         WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL, tmp);
2685
2686         tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
2687                         DOORBELL_RANGE_LOWER, ring->doorbell_index);
2688         WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
2689
2690         WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_UPPER,
2691                        CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
2692
2693
2694         /* start the ring */
2695         gfx_v9_0_cp_gfx_start(adev);
2696         ring->sched.ready = true;
2697
2698         return 0;
2699 }
2700
2701 static void gfx_v9_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
2702 {
2703         int i;
2704
2705         if (enable) {
2706                 WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL, 0);
2707         } else {
2708                 WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL,
2709                         (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
2710                 for (i = 0; i < adev->gfx.num_compute_rings; i++)
2711                         adev->gfx.compute_ring[i].sched.ready = false;
2712                 adev->gfx.kiq.ring.sched.ready = false;
2713         }
2714         udelay(50);
2715 }
2716
2717 static int gfx_v9_0_cp_compute_load_microcode(struct amdgpu_device *adev)
2718 {
2719         const struct gfx_firmware_header_v1_0 *mec_hdr;
2720         const __le32 *fw_data;
2721         unsigned i;
2722         u32 tmp;
2723
2724         if (!adev->gfx.mec_fw)
2725                 return -EINVAL;
2726
2727         gfx_v9_0_cp_compute_enable(adev, false);
2728
2729         mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
2730         amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
2731
2732         fw_data = (const __le32 *)
2733                 (adev->gfx.mec_fw->data +
2734                  le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
2735         tmp = 0;
2736         tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, VMID, 0);
2737         tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0);
2738         WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_CNTL, tmp);
2739
2740         WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_LO,
2741                 adev->gfx.mec.mec_fw_gpu_addr & 0xFFFFF000);
2742         WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_HI,
2743                 upper_32_bits(adev->gfx.mec.mec_fw_gpu_addr));
2744
2745         /* MEC1 */
2746         WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR,
2747                          mec_hdr->jt_offset);
2748         for (i = 0; i < mec_hdr->jt_size; i++)
2749                 WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_DATA,
2750                         le32_to_cpup(fw_data + mec_hdr->jt_offset + i));
2751
2752         WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR,
2753                         adev->gfx.mec_fw_version);
2754         /* Todo : Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */
2755
2756         return 0;
2757 }
2758
2759 /* KIQ functions */
2760 static void gfx_v9_0_kiq_setting(struct amdgpu_ring *ring)
2761 {
2762         uint32_t tmp;
2763         struct amdgpu_device *adev = ring->adev;
2764
2765         /* tell RLC which is KIQ queue */
2766         tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS);
2767         tmp &= 0xffffff00;
2768         tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
2769         WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
2770         tmp |= 0x80;
2771         WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
2772 }
2773
2774 static int gfx_v9_0_kiq_kcq_enable(struct amdgpu_device *adev)
2775 {
2776         struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
2777         uint64_t queue_mask = 0;
2778         int r, i;
2779
2780         for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) {
2781                 if (!test_bit(i, adev->gfx.mec.queue_bitmap))
2782                         continue;
2783
2784                 /* This situation may be hit in the future if a new HW
2785                  * generation exposes more than 64 queues. If so, the
2786                  * definition of queue_mask needs updating */
2787                 if (WARN_ON(i >= (sizeof(queue_mask)*8))) {
2788                         DRM_ERROR("Invalid KCQ enabled: %d\n", i);
2789                         break;
2790                 }
2791
2792                 queue_mask |= (1ull << i);
2793         }
2794
2795         r = amdgpu_ring_alloc(kiq_ring, (7 * adev->gfx.num_compute_rings) + 8);
2796         if (r) {
2797                 DRM_ERROR("Failed to lock KIQ (%d).\n", r);
2798                 return r;
2799         }
2800
2801         /* set resources */
2802         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
2803         amdgpu_ring_write(kiq_ring, PACKET3_SET_RESOURCES_VMID_MASK(0) |
2804                           PACKET3_SET_RESOURCES_QUEUE_TYPE(0)); /* vmid_mask:0 queue_type:0 (KIQ) */
2805         amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask)); /* queue mask lo */
2806         amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask)); /* queue mask hi */
2807         amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */
2808         amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */
2809         amdgpu_ring_write(kiq_ring, 0); /* oac mask */
2810         amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */
2811         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
2812                 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
2813                 uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
2814                 uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
2815
2816                 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
2817                 /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
2818                 amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
2819                                   PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */
2820                                   PACKET3_MAP_QUEUES_VMID(0) | /* VMID */
2821                                   PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
2822                                   PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
2823                                   PACKET3_MAP_QUEUES_ME((ring->me == 1 ? 0 : 1)) |
2824                                   PACKET3_MAP_QUEUES_QUEUE_TYPE(0) | /*queue_type: normal compute queue */
2825                                   PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) | /* alloc format: all_on_one_pipe */
2826                                   PACKET3_MAP_QUEUES_ENGINE_SEL(0) | /* engine_sel: compute */
2827                                   PACKET3_MAP_QUEUES_NUM_QUEUES(1)); /* num_queues: must be 1 */
2828                 amdgpu_ring_write(kiq_ring, PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index));
2829                 amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
2830                 amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
2831                 amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
2832                 amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
2833         }
2834
2835         r = amdgpu_ring_test_helper(kiq_ring);
2836         if (r)
2837                 DRM_ERROR("KCQ enable failed\n");
2838
2839         return r;
2840 }
2841
2842 static int gfx_v9_0_mqd_init(struct amdgpu_ring *ring)
2843 {
2844         struct amdgpu_device *adev = ring->adev;
2845         struct v9_mqd *mqd = ring->mqd_ptr;
2846         uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
2847         uint32_t tmp;
2848
2849         mqd->header = 0xC0310800;
2850         mqd->compute_pipelinestat_enable = 0x00000001;
2851         mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
2852         mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
2853         mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
2854         mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
2855         mqd->compute_misc_reserved = 0x00000003;
2856
2857         mqd->dynamic_cu_mask_addr_lo =
2858                 lower_32_bits(ring->mqd_gpu_addr
2859                               + offsetof(struct v9_mqd_allocation, dynamic_cu_mask));
2860         mqd->dynamic_cu_mask_addr_hi =
2861                 upper_32_bits(ring->mqd_gpu_addr
2862                               + offsetof(struct v9_mqd_allocation, dynamic_cu_mask));
2863
2864         eop_base_addr = ring->eop_gpu_addr >> 8;
2865         mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
2866         mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
2867
2868         /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
2869         tmp = RREG32_SOC15(GC, 0, mmCP_HQD_EOP_CONTROL);
2870         tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
2871                         (order_base_2(GFX9_MEC_HPD_SIZE / 4) - 1));
2872
2873         mqd->cp_hqd_eop_control = tmp;
2874
2875         /* enable doorbell? */
2876         tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
2877
2878         if (ring->use_doorbell) {
2879                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
2880                                     DOORBELL_OFFSET, ring->doorbell_index);
2881                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
2882                                     DOORBELL_EN, 1);
2883                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
2884                                     DOORBELL_SOURCE, 0);
2885                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
2886                                     DOORBELL_HIT, 0);
2887         } else {
2888                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
2889                                          DOORBELL_EN, 0);
2890         }
2891
2892         mqd->cp_hqd_pq_doorbell_control = tmp;
2893
2894         /* disable the queue if it's active */
2895         ring->wptr = 0;
2896         mqd->cp_hqd_dequeue_request = 0;
2897         mqd->cp_hqd_pq_rptr = 0;
2898         mqd->cp_hqd_pq_wptr_lo = 0;
2899         mqd->cp_hqd_pq_wptr_hi = 0;
2900
2901         /* set the pointer to the MQD */
2902         mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
2903         mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
2904
2905         /* set MQD vmid to 0 */
2906         tmp = RREG32_SOC15(GC, 0, mmCP_MQD_CONTROL);
2907         tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
2908         mqd->cp_mqd_control = tmp;
2909
2910         /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
2911         hqd_gpu_addr = ring->gpu_addr >> 8;
2912         mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
2913         mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
2914
2915         /* set up the HQD, this is similar to CP_RB0_CNTL */
2916         tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_CONTROL);
2917         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
2918                             (order_base_2(ring->ring_size / 4) - 1));
2919         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
2920                         ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
2921 #ifdef __BIG_ENDIAN
2922         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
2923 #endif
2924         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
2925         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
2926         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
2927         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
2928         mqd->cp_hqd_pq_control = tmp;
2929
2930         /* set the wb address whether it's enabled or not */
2931         wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
2932         mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
2933         mqd->cp_hqd_pq_rptr_report_addr_hi =
2934                 upper_32_bits(wb_gpu_addr) & 0xffff;
2935
2936         /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
2937         wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
2938         mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
2939         mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
2940
2941         tmp = 0;
2942         /* enable the doorbell if requested */
2943         if (ring->use_doorbell) {
2944                 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
2945                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
2946                                 DOORBELL_OFFSET, ring->doorbell_index);
2947
2948                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
2949                                          DOORBELL_EN, 1);
2950                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
2951                                          DOORBELL_SOURCE, 0);
2952                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
2953                                          DOORBELL_HIT, 0);
2954         }
2955
2956         mqd->cp_hqd_pq_doorbell_control = tmp;
2957
2958         /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
2959         ring->wptr = 0;
2960         mqd->cp_hqd_pq_rptr = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR);
2961
2962         /* set the vmid for the queue */
2963         mqd->cp_hqd_vmid = 0;
2964
2965         tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PERSISTENT_STATE);
2966         tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
2967         mqd->cp_hqd_persistent_state = tmp;
2968
2969         /* set MIN_IB_AVAIL_SIZE */
2970         tmp = RREG32_SOC15(GC, 0, mmCP_HQD_IB_CONTROL);
2971         tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
2972         mqd->cp_hqd_ib_control = tmp;
2973
2974         /* activate the queue */
2975         mqd->cp_hqd_active = 1;
2976
2977         return 0;
2978 }
2979
2980 static int gfx_v9_0_kiq_init_register(struct amdgpu_ring *ring)
2981 {
2982         struct amdgpu_device *adev = ring->adev;
2983         struct v9_mqd *mqd = ring->mqd_ptr;
2984         int j;
2985
2986         /* disable wptr polling */
2987         WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
2988
2989         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR,
2990                mqd->cp_hqd_eop_base_addr_lo);
2991         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI,
2992                mqd->cp_hqd_eop_base_addr_hi);
2993
2994         /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
2995         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_CONTROL,
2996                mqd->cp_hqd_eop_control);
2997
2998         /* enable doorbell? */
2999         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
3000                mqd->cp_hqd_pq_doorbell_control);
3001
3002         /* disable the queue if it's active */
3003         if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) {
3004                 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
3005                 for (j = 0; j < adev->usec_timeout; j++) {
3006                         if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
3007                                 break;
3008                         udelay(1);
3009                 }
3010                 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST,
3011                        mqd->cp_hqd_dequeue_request);
3012                 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR,
3013                        mqd->cp_hqd_pq_rptr);
3014                 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO,
3015                        mqd->cp_hqd_pq_wptr_lo);
3016                 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI,
3017                        mqd->cp_hqd_pq_wptr_hi);
3018         }
3019
3020         /* set the pointer to the MQD */
3021         WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR,
3022                mqd->cp_mqd_base_addr_lo);
3023         WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR_HI,
3024                mqd->cp_mqd_base_addr_hi);
3025
3026         /* set MQD vmid to 0 */
3027         WREG32_SOC15_RLC(GC, 0, mmCP_MQD_CONTROL,
3028                mqd->cp_mqd_control);
3029
3030         /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3031         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE,
3032                mqd->cp_hqd_pq_base_lo);
3033         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE_HI,
3034                mqd->cp_hqd_pq_base_hi);
3035
3036         /* set up the HQD, this is similar to CP_RB0_CNTL */
3037         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_CONTROL,
3038                mqd->cp_hqd_pq_control);
3039
3040         /* set the wb address whether it's enabled or not */
3041         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR,
3042                                 mqd->cp_hqd_pq_rptr_report_addr_lo);
3043         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
3044                                 mqd->cp_hqd_pq_rptr_report_addr_hi);
3045
3046         /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
3047         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR,
3048                mqd->cp_hqd_pq_wptr_poll_addr_lo);
3049         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
3050                mqd->cp_hqd_pq_wptr_poll_addr_hi);
3051
3052         /* enable the doorbell if requested */
3053         if (ring->use_doorbell) {
3054                 WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_LOWER,
3055                                         (adev->doorbell_index.kiq * 2) << 2);
3056                 WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER,
3057                                         (adev->doorbell_index.userqueue_end * 2) << 2);
3058         }
3059
3060         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
3061                mqd->cp_hqd_pq_doorbell_control);
3062
3063         /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3064         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO,
3065                mqd->cp_hqd_pq_wptr_lo);
3066         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI,
3067                mqd->cp_hqd_pq_wptr_hi);
3068
3069         /* set the vmid for the queue */
3070         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_VMID, mqd->cp_hqd_vmid);
3071
3072         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE,
3073                mqd->cp_hqd_persistent_state);
3074
3075         /* activate the queue */
3076         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE,
3077                mqd->cp_hqd_active);
3078
3079         if (ring->use_doorbell)
3080                 WREG32_FIELD15(GC, 0, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
3081
3082         return 0;
3083 }
3084
3085 static int gfx_v9_0_kiq_fini_register(struct amdgpu_ring *ring)
3086 {
3087         struct amdgpu_device *adev = ring->adev;
3088         int j;
3089
3090         /* disable the queue if it's active */
3091         if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) {
3092
3093                 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
3094
3095                 for (j = 0; j < adev->usec_timeout; j++) {
3096                         if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
3097                                 break;
3098                         udelay(1);
3099                 }
3100
3101                 if (j == AMDGPU_MAX_USEC_TIMEOUT) {
3102                         DRM_DEBUG("KIQ dequeue request failed.\n");
3103
3104                         /* Manual disable if dequeue request times out */
3105                         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE, 0);
3106                 }
3107
3108                 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST,
3109                       0);
3110         }
3111
3112         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IQ_TIMER, 0);
3113         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IB_CONTROL, 0);
3114         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE, 0);
3115         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0x40000000);
3116         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0);
3117         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR, 0);
3118         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI, 0);
3119         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO, 0);
3120
3121         return 0;
3122 }
3123
3124 static int gfx_v9_0_kiq_init_queue(struct amdgpu_ring *ring)
3125 {
3126         struct amdgpu_device *adev = ring->adev;
3127         struct v9_mqd *mqd = ring->mqd_ptr;
3128         int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS;
3129
3130         gfx_v9_0_kiq_setting(ring);
3131
3132         if (adev->in_gpu_reset) { /* for GPU_RESET case */
3133                 /* reset MQD to a clean status */
3134                 if (adev->gfx.mec.mqd_backup[mqd_idx])
3135                         memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation));
3136
3137                 /* reset ring buffer */
3138                 ring->wptr = 0;
3139                 amdgpu_ring_clear_ring(ring);
3140
3141                 mutex_lock(&adev->srbm_mutex);
3142                 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3143                 gfx_v9_0_kiq_init_register(ring);
3144                 soc15_grbm_select(adev, 0, 0, 0, 0);
3145                 mutex_unlock(&adev->srbm_mutex);
3146         } else {
3147                 memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation));
3148                 ((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
3149                 ((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
3150                 mutex_lock(&adev->srbm_mutex);
3151                 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3152                 gfx_v9_0_mqd_init(ring);
3153                 gfx_v9_0_kiq_init_register(ring);
3154                 soc15_grbm_select(adev, 0, 0, 0, 0);
3155                 mutex_unlock(&adev->srbm_mutex);
3156
3157                 if (adev->gfx.mec.mqd_backup[mqd_idx])
3158                         memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation));
3159         }
3160
3161         return 0;
3162 }
3163
3164 static int gfx_v9_0_kcq_init_queue(struct amdgpu_ring *ring)
3165 {
3166         struct amdgpu_device *adev = ring->adev;
3167         struct v9_mqd *mqd = ring->mqd_ptr;
3168         int mqd_idx = ring - &adev->gfx.compute_ring[0];
3169
3170         if (!adev->in_gpu_reset && !adev->in_suspend) {
3171                 memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation));
3172                 ((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
3173                 ((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
3174                 mutex_lock(&adev->srbm_mutex);
3175                 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3176                 gfx_v9_0_mqd_init(ring);
3177                 soc15_grbm_select(adev, 0, 0, 0, 0);
3178                 mutex_unlock(&adev->srbm_mutex);
3179
3180                 if (adev->gfx.mec.mqd_backup[mqd_idx])
3181                         memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation));
3182         } else if (adev->in_gpu_reset) { /* for GPU_RESET case */
3183                 /* reset MQD to a clean status */
3184                 if (adev->gfx.mec.mqd_backup[mqd_idx])
3185                         memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation));
3186
3187                 /* reset ring buffer */
3188                 ring->wptr = 0;
3189                 amdgpu_ring_clear_ring(ring);
3190         } else {
3191                 amdgpu_ring_clear_ring(ring);
3192         }
3193
3194         return 0;
3195 }
3196
3197 static int gfx_v9_0_kiq_resume(struct amdgpu_device *adev)
3198 {
3199         struct amdgpu_ring *ring;
3200         int r;
3201
3202         ring = &adev->gfx.kiq.ring;
3203
3204         r = amdgpu_bo_reserve(ring->mqd_obj, false);
3205         if (unlikely(r != 0))
3206                 return r;
3207
3208         r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
3209         if (unlikely(r != 0))
3210                 return r;
3211
3212         gfx_v9_0_kiq_init_queue(ring);
3213         amdgpu_bo_kunmap(ring->mqd_obj);
3214         ring->mqd_ptr = NULL;
3215         amdgpu_bo_unreserve(ring->mqd_obj);
3216         ring->sched.ready = true;
3217         return 0;
3218 }
3219
3220 static int gfx_v9_0_kcq_resume(struct amdgpu_device *adev)
3221 {
3222         struct amdgpu_ring *ring = NULL;
3223         int r = 0, i;
3224
3225         gfx_v9_0_cp_compute_enable(adev, true);
3226
3227         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3228                 ring = &adev->gfx.compute_ring[i];
3229
3230                 r = amdgpu_bo_reserve(ring->mqd_obj, false);
3231                 if (unlikely(r != 0))
3232                         goto done;
3233                 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
3234                 if (!r) {
3235                         r = gfx_v9_0_kcq_init_queue(ring);
3236                         amdgpu_bo_kunmap(ring->mqd_obj);
3237                         ring->mqd_ptr = NULL;
3238                 }
3239                 amdgpu_bo_unreserve(ring->mqd_obj);
3240                 if (r)
3241                         goto done;
3242         }
3243
3244         r = gfx_v9_0_kiq_kcq_enable(adev);
3245 done:
3246         return r;
3247 }
3248
3249 static int gfx_v9_0_cp_resume(struct amdgpu_device *adev)
3250 {
3251         int r, i;
3252         struct amdgpu_ring *ring;
3253
3254         if (!(adev->flags & AMD_IS_APU))
3255                 gfx_v9_0_enable_gui_idle_interrupt(adev, false);
3256
3257         if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
3258                 /* legacy firmware loading */
3259                 r = gfx_v9_0_cp_gfx_load_microcode(adev);
3260                 if (r)
3261                         return r;
3262
3263                 r = gfx_v9_0_cp_compute_load_microcode(adev);
3264                 if (r)
3265                         return r;
3266         }
3267
3268         r = gfx_v9_0_kiq_resume(adev);
3269         if (r)
3270                 return r;
3271
3272         r = gfx_v9_0_cp_gfx_resume(adev);
3273         if (r)
3274                 return r;
3275
3276         r = gfx_v9_0_kcq_resume(adev);
3277         if (r)
3278                 return r;
3279
3280         ring = &adev->gfx.gfx_ring[0];
3281         r = amdgpu_ring_test_helper(ring);
3282         if (r)
3283                 return r;
3284
3285         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3286                 ring = &adev->gfx.compute_ring[i];
3287                 amdgpu_ring_test_helper(ring);
3288         }
3289
3290         gfx_v9_0_enable_gui_idle_interrupt(adev, true);
3291
3292         return 0;
3293 }
3294
3295 static void gfx_v9_0_cp_enable(struct amdgpu_device *adev, bool enable)
3296 {
3297         gfx_v9_0_cp_gfx_enable(adev, enable);
3298         gfx_v9_0_cp_compute_enable(adev, enable);
3299 }
3300
3301 static int gfx_v9_0_hw_init(void *handle)
3302 {
3303         int r;
3304         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3305
3306         gfx_v9_0_init_golden_registers(adev);
3307
3308         gfx_v9_0_constants_init(adev);
3309
3310         r = gfx_v9_0_csb_vram_pin(adev);
3311         if (r)
3312                 return r;
3313
3314         r = adev->gfx.rlc.funcs->resume(adev);
3315         if (r)
3316                 return r;
3317
3318         r = gfx_v9_0_cp_resume(adev);
3319         if (r)
3320                 return r;
3321
3322         r = gfx_v9_0_ngg_en(adev);
3323         if (r)
3324                 return r;
3325
3326         return r;
3327 }
3328
3329 static int gfx_v9_0_kcq_disable(struct amdgpu_device *adev)
3330 {
3331         int r, i;
3332         struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
3333
3334         r = amdgpu_ring_alloc(kiq_ring, 6 * adev->gfx.num_compute_rings);
3335         if (r)
3336                 DRM_ERROR("Failed to lock KIQ (%d).\n", r);
3337
3338         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3339                 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
3340
3341                 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
3342                 amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
3343                                                 PACKET3_UNMAP_QUEUES_ACTION(1) | /* RESET_QUEUES */
3344                                                 PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) |
3345                                                 PACKET3_UNMAP_QUEUES_ENGINE_SEL(0) |
3346                                                 PACKET3_UNMAP_QUEUES_NUM_QUEUES(1));
3347                 amdgpu_ring_write(kiq_ring, PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
3348                 amdgpu_ring_write(kiq_ring, 0);
3349                 amdgpu_ring_write(kiq_ring, 0);
3350                 amdgpu_ring_write(kiq_ring, 0);
3351         }
3352         r = amdgpu_ring_test_helper(kiq_ring);
3353         if (r)
3354                 DRM_ERROR("KCQ disable failed\n");
3355
3356         return r;
3357 }
3358
3359 static int gfx_v9_0_hw_fini(void *handle)
3360 {
3361         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3362
3363         amdgpu_irq_put(adev, &adev->gfx.cp_ecc_error_irq, 0);
3364         amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
3365         amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
3366
3367         /* disable KCQ to avoid CPC touch memory not valid anymore */
3368         gfx_v9_0_kcq_disable(adev);
3369
3370         if (amdgpu_sriov_vf(adev)) {
3371                 gfx_v9_0_cp_gfx_enable(adev, false);
3372                 /* must disable polling for SRIOV when hw finished, otherwise
3373                  * CPC engine may still keep fetching WB address which is already
3374                  * invalid after sw finished and trigger DMAR reading error in
3375                  * hypervisor side.
3376                  */
3377                 WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
3378                 return 0;
3379         }
3380
3381         /* Use deinitialize sequence from CAIL when unbinding device from driver,
3382          * otherwise KIQ is hanging when binding back
3383          */
3384         if (!adev->in_gpu_reset && !adev->in_suspend) {
3385                 mutex_lock(&adev->srbm_mutex);
3386                 soc15_grbm_select(adev, adev->gfx.kiq.ring.me,
3387                                 adev->gfx.kiq.ring.pipe,
3388                                 adev->gfx.kiq.ring.queue, 0);
3389                 gfx_v9_0_kiq_fini_register(&adev->gfx.kiq.ring);
3390                 soc15_grbm_select(adev, 0, 0, 0, 0);
3391                 mutex_unlock(&adev->srbm_mutex);
3392         }
3393
3394         gfx_v9_0_cp_enable(adev, false);
3395         adev->gfx.rlc.funcs->stop(adev);
3396
3397         gfx_v9_0_csb_vram_unpin(adev);
3398
3399         return 0;
3400 }
3401
3402 static int gfx_v9_0_suspend(void *handle)
3403 {
3404         return gfx_v9_0_hw_fini(handle);
3405 }
3406
3407 static int gfx_v9_0_resume(void *handle)
3408 {
3409         return gfx_v9_0_hw_init(handle);
3410 }
3411
3412 static bool gfx_v9_0_is_idle(void *handle)
3413 {
3414         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3415
3416         if (REG_GET_FIELD(RREG32_SOC15(GC, 0, mmGRBM_STATUS),
3417                                 GRBM_STATUS, GUI_ACTIVE))
3418                 return false;
3419         else
3420                 return true;
3421 }
3422
3423 static int gfx_v9_0_wait_for_idle(void *handle)
3424 {
3425         unsigned i;
3426         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3427
3428         for (i = 0; i < adev->usec_timeout; i++) {
3429                 if (gfx_v9_0_is_idle(handle))
3430                         return 0;
3431                 udelay(1);
3432         }
3433         return -ETIMEDOUT;
3434 }
3435
3436 static int gfx_v9_0_soft_reset(void *handle)
3437 {
3438         u32 grbm_soft_reset = 0;
3439         u32 tmp;
3440         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3441
3442         /* GRBM_STATUS */
3443         tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS);
3444         if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
3445                    GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
3446                    GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
3447                    GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
3448                    GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
3449                    GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK)) {
3450                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
3451                                                 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
3452                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
3453                                                 GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
3454         }
3455
3456         if (tmp & (GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
3457                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
3458                                                 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
3459         }
3460
3461         /* GRBM_STATUS2 */
3462         tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS2);
3463         if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
3464                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
3465                                                 GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
3466
3467
3468         if (grbm_soft_reset) {
3469                 /* stop the rlc */
3470                 adev->gfx.rlc.funcs->stop(adev);
3471
3472                 /* Disable GFX parsing/prefetching */
3473                 gfx_v9_0_cp_gfx_enable(adev, false);
3474
3475                 /* Disable MEC parsing/prefetching */
3476                 gfx_v9_0_cp_compute_enable(adev, false);
3477
3478                 if (grbm_soft_reset) {
3479                         tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
3480                         tmp |= grbm_soft_reset;
3481                         dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
3482                         WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
3483                         tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
3484
3485                         udelay(50);
3486
3487                         tmp &= ~grbm_soft_reset;
3488                         WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
3489                         tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
3490                 }
3491
3492                 /* Wait a little for things to settle down */
3493                 udelay(50);
3494         }
3495         return 0;
3496 }
3497
3498 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev)
3499 {
3500         uint64_t clock;
3501
3502         mutex_lock(&adev->gfx.gpu_clock_mutex);
3503         WREG32_SOC15(GC, 0, mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
3504         clock = (uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_LSB) |
3505                 ((uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
3506         mutex_unlock(&adev->gfx.gpu_clock_mutex);
3507         return clock;
3508 }
3509
3510 static void gfx_v9_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
3511                                           uint32_t vmid,
3512                                           uint32_t gds_base, uint32_t gds_size,
3513                                           uint32_t gws_base, uint32_t gws_size,
3514                                           uint32_t oa_base, uint32_t oa_size)
3515 {
3516         struct amdgpu_device *adev = ring->adev;
3517
3518         /* GDS Base */
3519         gfx_v9_0_write_data_to_reg(ring, 0, false,
3520                                    SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_BASE) + 2 * vmid,
3521                                    gds_base);
3522
3523         /* GDS Size */
3524         gfx_v9_0_write_data_to_reg(ring, 0, false,
3525                                    SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE) + 2 * vmid,
3526                                    gds_size);
3527
3528         /* GWS */
3529         gfx_v9_0_write_data_to_reg(ring, 0, false,
3530                                    SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID0) + vmid,
3531                                    gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
3532
3533         /* OA */
3534         gfx_v9_0_write_data_to_reg(ring, 0, false,
3535                                    SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID0) + vmid,
3536                                    (1 << (oa_size + oa_base)) - (1 << oa_base));
3537 }
3538
3539 static const u32 vgpr_init_compute_shader[] =
3540 {
3541         0xb07c0000, 0xbe8000ff,
3542         0x000000f8, 0xbf110800,
3543         0x7e000280, 0x7e020280,
3544         0x7e040280, 0x7e060280,
3545         0x7e080280, 0x7e0a0280,
3546         0x7e0c0280, 0x7e0e0280,
3547         0x80808800, 0xbe803200,
3548         0xbf84fff5, 0xbf9c0000,
3549         0xd28c0001, 0x0001007f,
3550         0xd28d0001, 0x0002027e,
3551         0x10020288, 0xb8810904,
3552         0xb7814000, 0xd1196a01,
3553         0x00000301, 0xbe800087,
3554         0xbefc00c1, 0xd89c4000,
3555         0x00020201, 0xd89cc080,
3556         0x00040401, 0x320202ff,
3557         0x00000800, 0x80808100,
3558         0xbf84fff8, 0x7e020280,
3559         0xbf810000, 0x00000000,
3560 };
3561
3562 static const u32 sgpr_init_compute_shader[] =
3563 {
3564         0xb07c0000, 0xbe8000ff,
3565         0x0000005f, 0xbee50080,
3566         0xbe812c65, 0xbe822c65,
3567         0xbe832c65, 0xbe842c65,
3568         0xbe852c65, 0xb77c0005,
3569         0x80808500, 0xbf84fff8,
3570         0xbe800080, 0xbf810000,
3571 };
3572
3573 static const struct soc15_reg_entry vgpr_init_regs[] = {
3574    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff },
3575    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff },
3576    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff },
3577    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff },
3578    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x1000000 }, /* CU_GROUP_COUNT=1 */
3579    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 256*2 },
3580    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 1 },
3581    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
3582    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x100007f }, /* VGPRS=15 (256 logical VGPRs, SGPRS=1 (16 SGPRs, BULKY=1 */
3583    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x400000 },  /* 64KB LDS */
3584 };
3585
3586 static const struct soc15_reg_entry sgpr_init_regs[] = {
3587    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff },
3588    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff },
3589    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff },
3590    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff },
3591    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x1000000 }, /* CU_GROUP_COUNT=1 */
3592    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 256*2 },
3593    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 1 },
3594    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
3595    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x340 }, /* SGPRS=13 (112 GPRS) */
3596    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x0 },
3597 };
3598
3599 static const struct soc15_reg_entry sec_ded_counter_registers[] = {
3600    { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT), 0, 1, 1},
3601    { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT), 0, 1, 1},
3602    { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT), 0, 1, 1},
3603    { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT), 0, 1, 1},
3604    { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT), 0, 1, 1},
3605    { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT), 0, 1, 1},
3606    { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT), 0, 1, 1},
3607    { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT), 0, 1, 1},
3608    { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT), 0, 1, 1},
3609    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT), 0, 1, 1},
3610    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_GRBM_CNT), 0, 1, 1},
3611    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_DED), 0, 1, 1},
3612    { SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT), 0, 4, 1},
3613    { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 0, 4, 6},
3614    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_DED_CNT), 0, 4, 16},
3615    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_INFO), 0, 4, 16},
3616    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_SEC_CNT), 0, 4, 16},
3617    { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 1, 16},
3618    { SOC15_REG_ENTRY(GC, 0, mmTCP_ATC_EDC_GATCL1_CNT), 0, 4, 16},
3619    { SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT), 0, 4, 16},
3620    { SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 0, 4, 16},
3621    { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 0, 4, 6},
3622    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 0, 4, 16},
3623    { SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 0, 4, 16},
3624    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), 0, 1, 1},
3625    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 0, 1, 1},
3626    { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 1, 32},
3627    { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 1, 32},
3628    { SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT), 0, 1, 72},
3629    { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0, 1, 16},
3630    { SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT), 0, 1, 2},
3631    { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 0, 4, 6},
3632 };
3633
3634 static int gfx_v9_0_do_edc_gds_workarounds(struct amdgpu_device *adev)
3635 {
3636         struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
3637         int i, r;
3638
3639         r = amdgpu_ring_alloc(ring, 7);
3640         if (r) {
3641                 DRM_ERROR("amdgpu: GDS workarounds failed to lock ring %s (%d).\n",
3642                         ring->name, r);
3643                 return r;
3644         }
3645
3646         WREG32_SOC15(GC, 0, mmGDS_VMID0_BASE, 0x00000000);
3647         WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, adev->gds.gds_size);
3648
3649         amdgpu_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
3650         amdgpu_ring_write(ring, (PACKET3_DMA_DATA_CP_SYNC |
3651                                 PACKET3_DMA_DATA_DST_SEL(1) |
3652                                 PACKET3_DMA_DATA_SRC_SEL(2) |
3653                                 PACKET3_DMA_DATA_ENGINE(0)));
3654         amdgpu_ring_write(ring, 0);
3655         amdgpu_ring_write(ring, 0);
3656         amdgpu_ring_write(ring, 0);
3657         amdgpu_ring_write(ring, 0);
3658         amdgpu_ring_write(ring, PACKET3_DMA_DATA_CMD_RAW_WAIT |
3659                                 adev->gds.gds_size);
3660
3661         amdgpu_ring_commit(ring);
3662
3663         for (i = 0; i < adev->usec_timeout; i++) {
3664                 if (ring->wptr == gfx_v9_0_ring_get_rptr_compute(ring))
3665                         break;
3666                 udelay(1);
3667         }
3668
3669         if (i >= adev->usec_timeout)
3670                 r = -ETIMEDOUT;
3671
3672         WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, 0x00000000);
3673
3674         return r;
3675 }
3676
3677 static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
3678 {
3679         struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
3680         struct amdgpu_ib ib;
3681         struct dma_fence *f = NULL;
3682         int r, i, j, k;
3683         unsigned total_size, vgpr_offset, sgpr_offset;
3684         u64 gpu_addr;
3685
3686         /* only support when RAS is enabled */
3687         if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
3688                 return 0;
3689
3690         /* bail if the compute ring is not ready */
3691         if (!ring->sched.ready)
3692                 return 0;
3693
3694         total_size =
3695                 ((ARRAY_SIZE(vgpr_init_regs) * 3) + 4 + 5 + 2) * 4;
3696         total_size +=
3697                 ((ARRAY_SIZE(sgpr_init_regs) * 3) + 4 + 5 + 2) * 4;
3698         total_size = ALIGN(total_size, 256);
3699         vgpr_offset = total_size;
3700         total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256);
3701         sgpr_offset = total_size;
3702         total_size += sizeof(sgpr_init_compute_shader);
3703
3704         /* allocate an indirect buffer to put the commands in */
3705         memset(&ib, 0, sizeof(ib));
3706         r = amdgpu_ib_get(adev, NULL, total_size, &ib);
3707         if (r) {
3708                 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
3709                 return r;
3710         }
3711
3712         /* load the compute shaders */
3713         for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++)
3714                 ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i];
3715
3716         for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
3717                 ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
3718
3719         /* init the ib length to 0 */
3720         ib.length_dw = 0;
3721
3722         /* VGPR */
3723         /* write the register state for the compute dispatch */
3724         for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i++) {
3725                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
3726                 ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(vgpr_init_regs[i])
3727                                                                 - PACKET3_SET_SH_REG_START;
3728                 ib.ptr[ib.length_dw++] = vgpr_init_regs[i].reg_value;
3729         }
3730         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
3731         gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
3732         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
3733         ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
3734                                                         - PACKET3_SET_SH_REG_START;
3735         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
3736         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
3737
3738         /* write dispatch packet */
3739         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
3740         ib.ptr[ib.length_dw++] = 128; /* x */
3741         ib.ptr[ib.length_dw++] = 1; /* y */
3742         ib.ptr[ib.length_dw++] = 1; /* z */
3743         ib.ptr[ib.length_dw++] =
3744                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
3745
3746         /* write CS partial flush packet */
3747         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
3748         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
3749
3750         /* SGPR */
3751         /* write the register state for the compute dispatch */
3752         for (i = 0; i < ARRAY_SIZE(sgpr_init_regs); i++) {
3753                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
3754                 ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(sgpr_init_regs[i])
3755                                                                 - PACKET3_SET_SH_REG_START;
3756                 ib.ptr[ib.length_dw++] = sgpr_init_regs[i].reg_value;
3757         }
3758         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
3759         gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
3760         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
3761         ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
3762                                                         - PACKET3_SET_SH_REG_START;
3763         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
3764         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
3765
3766         /* write dispatch packet */
3767         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
3768         ib.ptr[ib.length_dw++] = 128; /* x */
3769         ib.ptr[ib.length_dw++] = 1; /* y */
3770         ib.ptr[ib.length_dw++] = 1; /* z */
3771         ib.ptr[ib.length_dw++] =
3772                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
3773
3774         /* write CS partial flush packet */
3775         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
3776         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
3777
3778         /* shedule the ib on the ring */
3779         r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
3780         if (r) {
3781                 DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
3782                 goto fail;
3783         }
3784
3785         /* wait for the GPU to finish processing the IB */
3786         r = dma_fence_wait(f, false);
3787         if (r) {
3788                 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
3789                 goto fail;
3790         }
3791
3792         /* read back registers to clear the counters */
3793         mutex_lock(&adev->grbm_idx_mutex);
3794         for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++) {
3795                 for (j = 0; j < sec_ded_counter_registers[i].se_num; j++) {
3796                         for (k = 0; k < sec_ded_counter_registers[i].instance; k++) {
3797                                 gfx_v9_0_select_se_sh(adev, j, 0x0, k);
3798                                 RREG32(SOC15_REG_ENTRY_OFFSET(sec_ded_counter_registers[i]));
3799                         }
3800                 }
3801         }
3802         WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, 0xe0000000);
3803         mutex_unlock(&adev->grbm_idx_mutex);
3804
3805 fail:
3806         amdgpu_ib_free(adev, &ib, NULL);
3807         dma_fence_put(f);
3808
3809         return r;
3810 }
3811
3812 static int gfx_v9_0_early_init(void *handle)
3813 {
3814         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3815
3816         adev->gfx.num_gfx_rings = GFX9_NUM_GFX_RINGS;
3817         adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
3818         gfx_v9_0_set_ring_funcs(adev);
3819         gfx_v9_0_set_irq_funcs(adev);
3820         gfx_v9_0_set_gds_init(adev);
3821         gfx_v9_0_set_rlc_funcs(adev);
3822
3823         return 0;
3824 }
3825
3826 static int gfx_v9_0_process_ras_data_cb(struct amdgpu_device *adev,
3827                 struct amdgpu_iv_entry *entry);
3828
3829 static int gfx_v9_0_ecc_late_init(void *handle)
3830 {
3831         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3832         struct ras_common_if **ras_if = &adev->gfx.ras_if;
3833         struct ras_ih_if ih_info = {
3834                 .cb = gfx_v9_0_process_ras_data_cb,
3835         };
3836         struct ras_fs_if fs_info = {
3837                 .sysfs_name = "gfx_err_count",
3838                 .debugfs_name = "gfx_err_inject",
3839         };
3840         struct ras_common_if ras_block = {
3841                 .block = AMDGPU_RAS_BLOCK__GFX,
3842                 .type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE,
3843                 .sub_block_index = 0,
3844                 .name = "gfx",
3845         };
3846         int r;
3847
3848         if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) {
3849                 amdgpu_ras_feature_enable_on_boot(adev, &ras_block, 0);
3850                 return 0;
3851         }
3852
3853         r = gfx_v9_0_do_edc_gds_workarounds(adev);
3854         if (r)
3855                 return r;
3856
3857         /* requires IBs so do in late init after IB pool is initialized */
3858         r = gfx_v9_0_do_edc_gpr_workarounds(adev);
3859         if (r)
3860                 return r;
3861
3862         /* handle resume path. */
3863         if (*ras_if) {
3864                 /* resend ras TA enable cmd during resume.
3865                  * prepare to handle failure.
3866                  */
3867                 ih_info.head = **ras_if;
3868                 r = amdgpu_ras_feature_enable_on_boot(adev, *ras_if, 1);
3869                 if (r) {
3870                         if (r == -EAGAIN) {
3871                                 /* request a gpu reset. will run again. */
3872                                 amdgpu_ras_request_reset_on_boot(adev,
3873                                                 AMDGPU_RAS_BLOCK__GFX);
3874                                 return 0;
3875                         }
3876                         /* fail to enable ras, cleanup all. */
3877                         goto irq;
3878                 }
3879                 /* enable successfully. continue. */
3880                 goto resume;
3881         }
3882
3883         *ras_if = kmalloc(sizeof(**ras_if), GFP_KERNEL);
3884         if (!*ras_if)
3885                 return -ENOMEM;
3886
3887         **ras_if = ras_block;
3888
3889         r = amdgpu_ras_feature_enable_on_boot(adev, *ras_if, 1);
3890         if (r) {
3891                 if (r == -EAGAIN) {
3892                         amdgpu_ras_request_reset_on_boot(adev,
3893                                         AMDGPU_RAS_BLOCK__GFX);
3894                         r = 0;
3895                 }
3896                 goto feature;
3897         }
3898
3899         ih_info.head = **ras_if;
3900         fs_info.head = **ras_if;
3901
3902         r = amdgpu_ras_interrupt_add_handler(adev, &ih_info);
3903         if (r)
3904                 goto interrupt;
3905
3906         amdgpu_ras_debugfs_create(adev, &fs_info);
3907
3908         r = amdgpu_ras_sysfs_create(adev, &fs_info);
3909         if (r)
3910                 goto sysfs;
3911 resume:
3912         r = amdgpu_irq_get(adev, &adev->gfx.cp_ecc_error_irq, 0);
3913         if (r)
3914                 goto irq;
3915
3916         return 0;
3917 irq:
3918         amdgpu_ras_sysfs_remove(adev, *ras_if);
3919 sysfs:
3920         amdgpu_ras_debugfs_remove(adev, *ras_if);
3921         amdgpu_ras_interrupt_remove_handler(adev, &ih_info);
3922 interrupt:
3923         amdgpu_ras_feature_enable(adev, *ras_if, 0);
3924 feature:
3925         kfree(*ras_if);
3926         *ras_if = NULL;
3927         return r;
3928 }
3929
3930 static int gfx_v9_0_late_init(void *handle)
3931 {
3932         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3933         int r;
3934
3935         r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
3936         if (r)
3937                 return r;
3938
3939         r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
3940         if (r)
3941                 return r;
3942
3943         r = gfx_v9_0_ecc_late_init(handle);
3944         if (r)
3945                 return r;
3946
3947         return 0;
3948 }
3949
3950 static bool gfx_v9_0_is_rlc_enabled(struct amdgpu_device *adev)
3951 {
3952         uint32_t rlc_setting;
3953
3954         /* if RLC is not enabled, do nothing */
3955         rlc_setting = RREG32_SOC15(GC, 0, mmRLC_CNTL);
3956         if (!(rlc_setting & RLC_CNTL__RLC_ENABLE_F32_MASK))
3957                 return false;
3958
3959         return true;
3960 }
3961
3962 static void gfx_v9_0_set_safe_mode(struct amdgpu_device *adev)
3963 {
3964         uint32_t data;
3965         unsigned i;
3966
3967         data = RLC_SAFE_MODE__CMD_MASK;
3968         data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
3969         WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data);
3970
3971         /* wait for RLC_SAFE_MODE */
3972         for (i = 0; i < adev->usec_timeout; i++) {
3973                 if (!REG_GET_FIELD(RREG32_SOC15(GC, 0, mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
3974                         break;
3975                 udelay(1);
3976         }
3977 }
3978
3979 static void gfx_v9_0_unset_safe_mode(struct amdgpu_device *adev)
3980 {
3981         uint32_t data;
3982
3983         data = RLC_SAFE_MODE__CMD_MASK;
3984         WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data);
3985 }
3986
3987 static void gfx_v9_0_update_gfx_cg_power_gating(struct amdgpu_device *adev,
3988                                                 bool enable)
3989 {
3990         amdgpu_gfx_rlc_enter_safe_mode(adev);
3991
3992         if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
3993                 gfx_v9_0_enable_gfx_cg_power_gating(adev, true);
3994                 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
3995                         gfx_v9_0_enable_gfx_pipeline_powergating(adev, true);
3996         } else {
3997                 gfx_v9_0_enable_gfx_cg_power_gating(adev, false);
3998                 gfx_v9_0_enable_gfx_pipeline_powergating(adev, false);
3999         }
4000
4001         amdgpu_gfx_rlc_exit_safe_mode(adev);
4002 }
4003
4004 static void gfx_v9_0_update_gfx_mg_power_gating(struct amdgpu_device *adev,
4005                                                 bool enable)
4006 {
4007         /* TODO: double check if we need to perform under safe mode */
4008         /* gfx_v9_0_enter_rlc_safe_mode(adev); */
4009
4010         if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
4011                 gfx_v9_0_enable_gfx_static_mg_power_gating(adev, true);
4012         else
4013                 gfx_v9_0_enable_gfx_static_mg_power_gating(adev, false);
4014
4015         if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
4016                 gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, true);
4017         else
4018                 gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, false);
4019
4020         /* gfx_v9_0_exit_rlc_safe_mode(adev); */
4021 }
4022
4023 static void gfx_v9_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
4024                                                       bool enable)
4025 {
4026         uint32_t data, def;
4027
4028         amdgpu_gfx_rlc_enter_safe_mode(adev);
4029
4030         /* It is disabled by HW by default */
4031         if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
4032                 /* 1 - RLC_CGTT_MGCG_OVERRIDE */
4033                 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4034
4035                 if (adev->asic_type != CHIP_VEGA12)
4036                         data &= ~RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK;
4037
4038                 data &= ~(RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
4039                           RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK |
4040                           RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK);
4041
4042                 /* only for Vega10 & Raven1 */
4043                 data |= RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK;
4044
4045                 if (def != data)
4046                         WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4047
4048                 /* MGLS is a global flag to control all MGLS in GFX */
4049                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
4050                         /* 2 - RLC memory Light sleep */
4051                         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
4052                                 def = data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
4053                                 data |= RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
4054                                 if (def != data)
4055                                         WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data);
4056                         }
4057                         /* 3 - CP memory Light sleep */
4058                         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
4059                                 def = data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
4060                                 data |= CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
4061                                 if (def != data)
4062                                         WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data);
4063                         }
4064                 }
4065         } else {
4066                 /* 1 - MGCG_OVERRIDE */
4067                 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4068
4069                 if (adev->asic_type != CHIP_VEGA12)
4070                         data |= RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK;
4071
4072                 data |= (RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK |
4073                          RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
4074                          RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK |
4075                          RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK);
4076
4077                 if (def != data)
4078                         WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4079
4080                 /* 2 - disable MGLS in RLC */
4081                 data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
4082                 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
4083                         data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
4084                         WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data);
4085                 }
4086
4087                 /* 3 - disable MGLS in CP */
4088                 data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
4089                 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
4090                         data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
4091                         WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data);
4092                 }
4093         }
4094
4095         amdgpu_gfx_rlc_exit_safe_mode(adev);
4096 }
4097
4098 static void gfx_v9_0_update_3d_clock_gating(struct amdgpu_device *adev,
4099                                            bool enable)
4100 {
4101         uint32_t data, def;
4102
4103         amdgpu_gfx_rlc_enter_safe_mode(adev);
4104
4105         /* Enable 3D CGCG/CGLS */
4106         if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG)) {
4107                 /* write cmd to clear cgcg/cgls ov */
4108                 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4109                 /* unset CGCG override */
4110                 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_GFX3D_CG_OVERRIDE_MASK;
4111                 /* update CGCG and CGLS override bits */
4112                 if (def != data)
4113                         WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4114
4115                 /* enable 3Dcgcg FSM(0x0000363f) */
4116                 def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
4117
4118                 data = (0x36 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
4119                         RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK;
4120                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS)
4121                         data |= (0x000F << RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
4122                                 RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK;
4123                 if (def != data)
4124                         WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data);
4125
4126                 /* set IDLE_POLL_COUNT(0x00900100) */
4127                 def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL);
4128                 data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
4129                         (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
4130                 if (def != data)
4131                         WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data);
4132         } else {
4133                 /* Disable CGCG/CGLS */
4134                 def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
4135                 /* disable cgcg, cgls should be disabled */
4136                 data &= ~(RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK |
4137                           RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK);
4138                 /* disable cgcg and cgls in FSM */
4139                 if (def != data)
4140                         WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data);
4141         }
4142
4143         amdgpu_gfx_rlc_exit_safe_mode(adev);
4144 }
4145
4146 static void gfx_v9_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
4147                                                       bool enable)
4148 {
4149         uint32_t def, data;
4150
4151         amdgpu_gfx_rlc_enter_safe_mode(adev);
4152
4153         if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
4154                 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4155                 /* unset CGCG override */
4156                 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGCG_OVERRIDE_MASK;
4157                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
4158                         data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
4159                 else
4160                         data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
4161                 /* update CGCG and CGLS override bits */
4162                 if (def != data)
4163                         WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4164
4165                 /* enable cgcg FSM(0x0000363F) */
4166                 def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
4167
4168                 data = (0x36 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
4169                         RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
4170                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
4171                         data |= (0x000F << RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
4172                                 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
4173                 if (def != data)
4174                         WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data);
4175
4176                 /* set IDLE_POLL_COUNT(0x00900100) */
4177                 def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL);
4178                 data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
4179                         (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
4180                 if (def != data)
4181                         WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data);
4182         } else {
4183                 def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
4184                 /* reset CGCG/CGLS bits */
4185                 data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
4186                 /* disable cgcg and cgls in FSM */
4187                 if (def != data)
4188                         WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data);
4189         }
4190
4191         amdgpu_gfx_rlc_exit_safe_mode(adev);
4192 }
4193
4194 static int gfx_v9_0_update_gfx_clock_gating(struct amdgpu_device *adev,
4195                                             bool enable)
4196 {
4197         if (enable) {
4198                 /* CGCG/CGLS should be enabled after MGCG/MGLS
4199                  * ===  MGCG + MGLS ===
4200                  */
4201                 gfx_v9_0_update_medium_grain_clock_gating(adev, enable);
4202                 /* ===  CGCG /CGLS for GFX 3D Only === */
4203                 gfx_v9_0_update_3d_clock_gating(adev, enable);
4204                 /* ===  CGCG + CGLS === */
4205                 gfx_v9_0_update_coarse_grain_clock_gating(adev, enable);
4206         } else {
4207                 /* CGCG/CGLS should be disabled before MGCG/MGLS
4208                  * ===  CGCG + CGLS ===
4209                  */
4210                 gfx_v9_0_update_coarse_grain_clock_gating(adev, enable);
4211                 /* ===  CGCG /CGLS for GFX 3D Only === */
4212                 gfx_v9_0_update_3d_clock_gating(adev, enable);
4213                 /* ===  MGCG + MGLS === */
4214                 gfx_v9_0_update_medium_grain_clock_gating(adev, enable);
4215         }
4216         return 0;
4217 }
4218
4219 static const struct amdgpu_rlc_funcs gfx_v9_0_rlc_funcs = {
4220         .is_rlc_enabled = gfx_v9_0_is_rlc_enabled,
4221         .set_safe_mode = gfx_v9_0_set_safe_mode,
4222         .unset_safe_mode = gfx_v9_0_unset_safe_mode,
4223         .init = gfx_v9_0_rlc_init,
4224         .get_csb_size = gfx_v9_0_get_csb_size,
4225         .get_csb_buffer = gfx_v9_0_get_csb_buffer,
4226         .get_cp_table_num = gfx_v9_0_cp_jump_table_num,
4227         .resume = gfx_v9_0_rlc_resume,
4228         .stop = gfx_v9_0_rlc_stop,
4229         .reset = gfx_v9_0_rlc_reset,
4230         .start = gfx_v9_0_rlc_start
4231 };
4232
4233 static int gfx_v9_0_set_powergating_state(void *handle,
4234                                           enum amd_powergating_state state)
4235 {
4236         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4237         bool enable = (state == AMD_PG_STATE_GATE) ? true : false;
4238
4239         switch (adev->asic_type) {
4240         case CHIP_RAVEN:
4241                 if (!enable) {
4242                         amdgpu_gfx_off_ctrl(adev, false);
4243                         cancel_delayed_work_sync(&adev->gfx.gfx_off_delay_work);
4244                 }
4245                 if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
4246                         gfx_v9_0_enable_sck_slow_down_on_power_up(adev, true);
4247                         gfx_v9_0_enable_sck_slow_down_on_power_down(adev, true);
4248                 } else {
4249                         gfx_v9_0_enable_sck_slow_down_on_power_up(adev, false);
4250                         gfx_v9_0_enable_sck_slow_down_on_power_down(adev, false);
4251                 }
4252
4253                 if (adev->pg_flags & AMD_PG_SUPPORT_CP)
4254                         gfx_v9_0_enable_cp_power_gating(adev, true);
4255                 else
4256                         gfx_v9_0_enable_cp_power_gating(adev, false);
4257
4258                 /* update gfx cgpg state */
4259                 gfx_v9_0_update_gfx_cg_power_gating(adev, enable);
4260
4261                 /* update mgcg state */
4262                 gfx_v9_0_update_gfx_mg_power_gating(adev, enable);
4263
4264                 if (enable)
4265                         amdgpu_gfx_off_ctrl(adev, true);
4266                 break;
4267         case CHIP_VEGA12:
4268                 if (!enable) {
4269                         amdgpu_gfx_off_ctrl(adev, false);
4270                         cancel_delayed_work_sync(&adev->gfx.gfx_off_delay_work);
4271                 } else {
4272                         amdgpu_gfx_off_ctrl(adev, true);
4273                 }
4274                 break;
4275         default:
4276                 break;
4277         }
4278
4279         return 0;
4280 }
4281
4282 static int gfx_v9_0_set_clockgating_state(void *handle,
4283                                           enum amd_clockgating_state state)
4284 {
4285         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4286
4287         if (amdgpu_sriov_vf(adev))
4288                 return 0;
4289
4290         switch (adev->asic_type) {
4291         case CHIP_VEGA10:
4292         case CHIP_VEGA12:
4293         case CHIP_VEGA20:
4294         case CHIP_RAVEN:
4295                 gfx_v9_0_update_gfx_clock_gating(adev,
4296                                                  state == AMD_CG_STATE_GATE ? true : false);
4297                 break;
4298         default:
4299                 break;
4300         }
4301         return 0;
4302 }
4303
4304 static void gfx_v9_0_get_clockgating_state(void *handle, u32 *flags)
4305 {
4306         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4307         int data;
4308
4309         if (amdgpu_sriov_vf(adev))
4310                 *flags = 0;
4311
4312         /* AMD_CG_SUPPORT_GFX_MGCG */
4313         data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4314         if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK))
4315                 *flags |= AMD_CG_SUPPORT_GFX_MGCG;
4316
4317         /* AMD_CG_SUPPORT_GFX_CGCG */
4318         data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
4319         if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
4320                 *flags |= AMD_CG_SUPPORT_GFX_CGCG;
4321
4322         /* AMD_CG_SUPPORT_GFX_CGLS */
4323         if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
4324                 *flags |= AMD_CG_SUPPORT_GFX_CGLS;
4325
4326         /* AMD_CG_SUPPORT_GFX_RLC_LS */
4327         data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
4328         if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK)
4329                 *flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS;
4330
4331         /* AMD_CG_SUPPORT_GFX_CP_LS */
4332         data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
4333         if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK)
4334                 *flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS;
4335
4336         /* AMD_CG_SUPPORT_GFX_3D_CGCG */
4337         data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
4338         if (data & RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK)
4339                 *flags |= AMD_CG_SUPPORT_GFX_3D_CGCG;
4340
4341         /* AMD_CG_SUPPORT_GFX_3D_CGLS */
4342         if (data & RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK)
4343                 *flags |= AMD_CG_SUPPORT_GFX_3D_CGLS;
4344 }
4345
4346 static u64 gfx_v9_0_ring_get_rptr_gfx(struct amdgpu_ring *ring)
4347 {
4348         return ring->adev->wb.wb[ring->rptr_offs]; /* gfx9 is 32bit rptr*/
4349 }
4350
4351 static u64 gfx_v9_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
4352 {
4353         struct amdgpu_device *adev = ring->adev;
4354         u64 wptr;
4355
4356         /* XXX check if swapping is necessary on BE */
4357         if (ring->use_doorbell) {
4358                 wptr = atomic64_read((atomic64_t *)&adev->wb.wb[ring->wptr_offs]);
4359         } else {
4360                 wptr = RREG32_SOC15(GC, 0, mmCP_RB0_WPTR);
4361                 wptr += (u64)RREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI) << 32;
4362         }
4363
4364         return wptr;
4365 }
4366
4367 static void gfx_v9_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
4368 {
4369         struct amdgpu_device *adev = ring->adev;
4370
4371         if (ring->use_doorbell) {
4372                 /* XXX check if swapping is necessary on BE */
4373                 atomic64_set((atomic64_t*)&adev->wb.wb[ring->wptr_offs], ring->wptr);
4374                 WDOORBELL64(ring->doorbell_index, ring->wptr);
4375         } else {
4376                 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
4377                 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
4378         }
4379 }
4380
4381 static void gfx_v9_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
4382 {
4383         struct amdgpu_device *adev = ring->adev;
4384         u32 ref_and_mask, reg_mem_engine;
4385         const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio_funcs->hdp_flush_reg;
4386
4387         if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
4388                 switch (ring->me) {
4389                 case 1:
4390                         ref_and_mask = nbio_hf_reg->ref_and_mask_cp2 << ring->pipe;
4391                         break;
4392                 case 2:
4393                         ref_and_mask = nbio_hf_reg->ref_and_mask_cp6 << ring->pipe;
4394                         break;
4395                 default:
4396                         return;
4397                 }
4398                 reg_mem_engine = 0;
4399         } else {
4400                 ref_and_mask = nbio_hf_reg->ref_and_mask_cp0;
4401                 reg_mem_engine = 1; /* pfp */
4402         }
4403
4404         gfx_v9_0_wait_reg_mem(ring, reg_mem_engine, 0, 1,
4405                               adev->nbio_funcs->get_hdp_flush_req_offset(adev),
4406                               adev->nbio_funcs->get_hdp_flush_done_offset(adev),
4407                               ref_and_mask, ref_and_mask, 0x20);
4408 }
4409
4410 static void gfx_v9_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
4411                                         struct amdgpu_job *job,
4412                                         struct amdgpu_ib *ib,
4413                                         uint32_t flags)
4414 {
4415         unsigned vmid = AMDGPU_JOB_GET_VMID(job);
4416         u32 header, control = 0;
4417
4418         if (ib->flags & AMDGPU_IB_FLAG_CE)
4419                 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
4420         else
4421                 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
4422
4423         control |= ib->length_dw | (vmid << 24);
4424
4425         if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) {
4426                 control |= INDIRECT_BUFFER_PRE_ENB(1);
4427
4428                 if (!(ib->flags & AMDGPU_IB_FLAG_CE))
4429                         gfx_v9_0_ring_emit_de_meta(ring);
4430         }
4431
4432         amdgpu_ring_write(ring, header);
4433         BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
4434         amdgpu_ring_write(ring,
4435 #ifdef __BIG_ENDIAN
4436                 (2 << 0) |
4437 #endif
4438                 lower_32_bits(ib->gpu_addr));
4439         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
4440         amdgpu_ring_write(ring, control);
4441 }
4442
4443 static void gfx_v9_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
4444                                           struct amdgpu_job *job,
4445                                           struct amdgpu_ib *ib,
4446                                           uint32_t flags)
4447 {
4448         unsigned vmid = AMDGPU_JOB_GET_VMID(job);
4449         u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
4450
4451         /* Currently, there is a high possibility to get wave ID mismatch
4452          * between ME and GDS, leading to a hw deadlock, because ME generates
4453          * different wave IDs than the GDS expects. This situation happens
4454          * randomly when at least 5 compute pipes use GDS ordered append.
4455          * The wave IDs generated by ME are also wrong after suspend/resume.
4456          * Those are probably bugs somewhere else in the kernel driver.
4457          *
4458          * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and
4459          * GDS to 0 for this ring (me/pipe).
4460          */
4461         if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) {
4462                 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
4463                 amdgpu_ring_write(ring, mmGDS_COMPUTE_MAX_WAVE_ID);
4464                 amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id);
4465         }
4466
4467         amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
4468         BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
4469         amdgpu_ring_write(ring,
4470 #ifdef __BIG_ENDIAN
4471                                 (2 << 0) |
4472 #endif
4473                                 lower_32_bits(ib->gpu_addr));
4474         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
4475         amdgpu_ring_write(ring, control);
4476 }
4477
4478 static void gfx_v9_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
4479                                      u64 seq, unsigned flags)
4480 {
4481         bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
4482         bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
4483         bool writeback = flags & AMDGPU_FENCE_FLAG_TC_WB_ONLY;
4484
4485         /* RELEASE_MEM - flush caches, send int */
4486         amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6));
4487         amdgpu_ring_write(ring, ((writeback ? (EOP_TC_WB_ACTION_EN |
4488                                                EOP_TC_NC_ACTION_EN) :
4489                                               (EOP_TCL1_ACTION_EN |
4490                                                EOP_TC_ACTION_EN |
4491                                                EOP_TC_WB_ACTION_EN |
4492                                                EOP_TC_MD_ACTION_EN)) |
4493                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
4494                                  EVENT_INDEX(5)));
4495         amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
4496
4497         /*
4498          * the address should be Qword aligned if 64bit write, Dword
4499          * aligned if only send 32bit data low (discard data high)
4500          */
4501         if (write64bit)
4502                 BUG_ON(addr & 0x7);
4503         else
4504                 BUG_ON(addr & 0x3);
4505         amdgpu_ring_write(ring, lower_32_bits(addr));
4506         amdgpu_ring_write(ring, upper_32_bits(addr));
4507         amdgpu_ring_write(ring, lower_32_bits(seq));
4508         amdgpu_ring_write(ring, upper_32_bits(seq));
4509         amdgpu_ring_write(ring, 0);
4510 }
4511
4512 static void gfx_v9_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
4513 {
4514         int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
4515         uint32_t seq = ring->fence_drv.sync_seq;
4516         uint64_t addr = ring->fence_drv.gpu_addr;
4517
4518         gfx_v9_0_wait_reg_mem(ring, usepfp, 1, 0,
4519                               lower_32_bits(addr), upper_32_bits(addr),
4520                               seq, 0xffffffff, 4);
4521 }
4522
4523 static void gfx_v9_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
4524                                         unsigned vmid, uint64_t pd_addr)
4525 {
4526         amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
4527
4528         /* compute doesn't have PFP */
4529         if (ring->funcs->type == AMDGPU_RING_TYPE_GFX) {
4530                 /* sync PFP to ME, otherwise we might get invalid PFP reads */
4531                 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
4532                 amdgpu_ring_write(ring, 0x0);
4533         }
4534 }
4535
4536 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
4537 {
4538         return ring->adev->wb.wb[ring->rptr_offs]; /* gfx9 hardware is 32bit rptr */
4539 }
4540
4541 static u64 gfx_v9_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
4542 {
4543         u64 wptr;
4544
4545         /* XXX check if swapping is necessary on BE */
4546         if (ring->use_doorbell)
4547                 wptr = atomic64_read((atomic64_t *)&ring->adev->wb.wb[ring->wptr_offs]);
4548         else
4549                 BUG();
4550         return wptr;
4551 }
4552
4553 static void gfx_v9_0_ring_set_pipe_percent(struct amdgpu_ring *ring,
4554                                            bool acquire)
4555 {
4556         struct amdgpu_device *adev = ring->adev;
4557         int pipe_num, tmp, reg;
4558         int pipe_percent = acquire ? SPI_WCL_PIPE_PERCENT_GFX__VALUE_MASK : 0x1;
4559
4560         pipe_num = ring->me * adev->gfx.mec.num_pipe_per_mec + ring->pipe;
4561
4562         /* first me only has 2 entries, GFX and HP3D */
4563         if (ring->me > 0)
4564                 pipe_num -= 2;
4565
4566         reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_GFX) + pipe_num;
4567         tmp = RREG32(reg);
4568         tmp = REG_SET_FIELD(tmp, SPI_WCL_PIPE_PERCENT_GFX, VALUE, pipe_percent);
4569         WREG32(reg, tmp);
4570 }
4571
4572 static void gfx_v9_0_pipe_reserve_resources(struct amdgpu_device *adev,
4573                                             struct amdgpu_ring *ring,
4574                                             bool acquire)
4575 {
4576         int i, pipe;
4577         bool reserve;
4578         struct amdgpu_ring *iring;
4579
4580         mutex_lock(&adev->gfx.pipe_reserve_mutex);
4581         pipe = amdgpu_gfx_mec_queue_to_bit(adev, ring->me, ring->pipe, 0);
4582         if (acquire)
4583                 set_bit(pipe, adev->gfx.pipe_reserve_bitmap);
4584         else
4585                 clear_bit(pipe, adev->gfx.pipe_reserve_bitmap);
4586
4587         if (!bitmap_weight(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES)) {
4588                 /* Clear all reservations - everyone reacquires all resources */
4589                 for (i = 0; i < adev->gfx.num_gfx_rings; ++i)
4590                         gfx_v9_0_ring_set_pipe_percent(&adev->gfx.gfx_ring[i],
4591                                                        true);
4592
4593                 for (i = 0; i < adev->gfx.num_compute_rings; ++i)
4594                         gfx_v9_0_ring_set_pipe_percent(&adev->gfx.compute_ring[i],
4595                                                        true);
4596         } else {
4597                 /* Lower all pipes without a current reservation */
4598                 for (i = 0; i < adev->gfx.num_gfx_rings; ++i) {
4599                         iring = &adev->gfx.gfx_ring[i];
4600                         pipe = amdgpu_gfx_mec_queue_to_bit(adev,
4601                                                            iring->me,
4602                                                            iring->pipe,
4603                                                            0);
4604                         reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap);
4605                         gfx_v9_0_ring_set_pipe_percent(iring, reserve);
4606                 }
4607
4608                 for (i = 0; i < adev->gfx.num_compute_rings; ++i) {
4609                         iring = &adev->gfx.compute_ring[i];
4610                         pipe = amdgpu_gfx_mec_queue_to_bit(adev,
4611                                                            iring->me,
4612                                                            iring->pipe,
4613                                                            0);
4614                         reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap);
4615                         gfx_v9_0_ring_set_pipe_percent(iring, reserve);
4616                 }
4617         }
4618
4619         mutex_unlock(&adev->gfx.pipe_reserve_mutex);
4620 }
4621
4622 static void gfx_v9_0_hqd_set_priority(struct amdgpu_device *adev,
4623                                       struct amdgpu_ring *ring,
4624                                       bool acquire)
4625 {
4626         uint32_t pipe_priority = acquire ? 0x2 : 0x0;
4627         uint32_t queue_priority = acquire ? 0xf : 0x0;
4628
4629         mutex_lock(&adev->srbm_mutex);
4630         soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4631
4632         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PIPE_PRIORITY, pipe_priority);
4633         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_QUEUE_PRIORITY, queue_priority);
4634
4635         soc15_grbm_select(adev, 0, 0, 0, 0);
4636         mutex_unlock(&adev->srbm_mutex);
4637 }
4638
4639 static void gfx_v9_0_ring_set_priority_compute(struct amdgpu_ring *ring,
4640                                                enum drm_sched_priority priority)
4641 {
4642         struct amdgpu_device *adev = ring->adev;
4643         bool acquire = priority == DRM_SCHED_PRIORITY_HIGH_HW;
4644
4645         if (ring->funcs->type != AMDGPU_RING_TYPE_COMPUTE)
4646                 return;
4647
4648         gfx_v9_0_hqd_set_priority(adev, ring, acquire);
4649         gfx_v9_0_pipe_reserve_resources(adev, ring, acquire);
4650 }
4651
4652 static void gfx_v9_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
4653 {
4654         struct amdgpu_device *adev = ring->adev;
4655
4656         /* XXX check if swapping is necessary on BE */
4657         if (ring->use_doorbell) {
4658                 atomic64_set((atomic64_t*)&adev->wb.wb[ring->wptr_offs], ring->wptr);
4659                 WDOORBELL64(ring->doorbell_index, ring->wptr);
4660         } else{
4661                 BUG(); /* only DOORBELL method supported on gfx9 now */
4662         }
4663 }
4664
4665 static void gfx_v9_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
4666                                          u64 seq, unsigned int flags)
4667 {
4668         struct amdgpu_device *adev = ring->adev;
4669
4670         /* we only allocate 32bit for each seq wb address */
4671         BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
4672
4673         /* write fence seq to the "addr" */
4674         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4675         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4676                                  WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
4677         amdgpu_ring_write(ring, lower_32_bits(addr));
4678         amdgpu_ring_write(ring, upper_32_bits(addr));
4679         amdgpu_ring_write(ring, lower_32_bits(seq));
4680
4681         if (flags & AMDGPU_FENCE_FLAG_INT) {
4682                 /* set register to trigger INT */
4683                 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4684                 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4685                                          WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
4686                 amdgpu_ring_write(ring, SOC15_REG_OFFSET(GC, 0, mmCPC_INT_STATUS));
4687                 amdgpu_ring_write(ring, 0);
4688                 amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
4689         }
4690 }
4691
4692 static void gfx_v9_ring_emit_sb(struct amdgpu_ring *ring)
4693 {
4694         amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
4695         amdgpu_ring_write(ring, 0);
4696 }
4697
4698 static void gfx_v9_0_ring_emit_ce_meta(struct amdgpu_ring *ring)
4699 {
4700         struct v9_ce_ib_state ce_payload = {0};
4701         uint64_t csa_addr;
4702         int cnt;
4703
4704         cnt = (sizeof(ce_payload) >> 2) + 4 - 2;
4705         csa_addr = amdgpu_csa_vaddr(ring->adev);
4706
4707         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
4708         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
4709                                  WRITE_DATA_DST_SEL(8) |
4710                                  WR_CONFIRM) |
4711                                  WRITE_DATA_CACHE_POLICY(0));
4712         amdgpu_ring_write(ring, lower_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, ce_payload)));
4713         amdgpu_ring_write(ring, upper_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, ce_payload)));
4714         amdgpu_ring_write_multiple(ring, (void *)&ce_payload, sizeof(ce_payload) >> 2);
4715 }
4716
4717 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring)
4718 {
4719         struct v9_de_ib_state de_payload = {0};
4720         uint64_t csa_addr, gds_addr;
4721         int cnt;
4722
4723         csa_addr = amdgpu_csa_vaddr(ring->adev);
4724         gds_addr = csa_addr + 4096;
4725         de_payload.gds_backup_addrlo = lower_32_bits(gds_addr);
4726         de_payload.gds_backup_addrhi = upper_32_bits(gds_addr);
4727
4728         cnt = (sizeof(de_payload) >> 2) + 4 - 2;
4729         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
4730         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
4731                                  WRITE_DATA_DST_SEL(8) |
4732                                  WR_CONFIRM) |
4733                                  WRITE_DATA_CACHE_POLICY(0));
4734         amdgpu_ring_write(ring, lower_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, de_payload)));
4735         amdgpu_ring_write(ring, upper_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, de_payload)));
4736         amdgpu_ring_write_multiple(ring, (void *)&de_payload, sizeof(de_payload) >> 2);
4737 }
4738
4739 static void gfx_v9_0_ring_emit_tmz(struct amdgpu_ring *ring, bool start)
4740 {
4741         amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0));
4742         amdgpu_ring_write(ring, FRAME_CMD(start ? 0 : 1)); /* frame_end */
4743 }
4744
4745 static void gfx_v9_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
4746 {
4747         uint32_t dw2 = 0;
4748
4749         if (amdgpu_sriov_vf(ring->adev))
4750                 gfx_v9_0_ring_emit_ce_meta(ring);
4751
4752         gfx_v9_0_ring_emit_tmz(ring, true);
4753
4754         dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
4755         if (flags & AMDGPU_HAVE_CTX_SWITCH) {
4756                 /* set load_global_config & load_global_uconfig */
4757                 dw2 |= 0x8001;
4758                 /* set load_cs_sh_regs */
4759                 dw2 |= 0x01000000;
4760                 /* set load_per_context_state & load_gfx_sh_regs for GFX */
4761                 dw2 |= 0x10002;
4762
4763                 /* set load_ce_ram if preamble presented */
4764                 if (AMDGPU_PREAMBLE_IB_PRESENT & flags)
4765                         dw2 |= 0x10000000;
4766         } else {
4767                 /* still load_ce_ram if this is the first time preamble presented
4768                  * although there is no context switch happens.
4769                  */
4770                 if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags)
4771                         dw2 |= 0x10000000;
4772         }
4773
4774         amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4775         amdgpu_ring_write(ring, dw2);
4776         amdgpu_ring_write(ring, 0);
4777 }
4778
4779 static unsigned gfx_v9_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring)
4780 {
4781         unsigned ret;
4782         amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
4783         amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
4784         amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
4785         amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */
4786         ret = ring->wptr & ring->buf_mask;
4787         amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */
4788         return ret;
4789 }
4790
4791 static void gfx_v9_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset)
4792 {
4793         unsigned cur;
4794         BUG_ON(offset > ring->buf_mask);
4795         BUG_ON(ring->ring[offset] != 0x55aa55aa);
4796
4797         cur = (ring->wptr & ring->buf_mask) - 1;
4798         if (likely(cur > offset))
4799                 ring->ring[offset] = cur - offset;
4800         else
4801                 ring->ring[offset] = (ring->ring_size>>2) - offset + cur;
4802 }
4803
4804 static void gfx_v9_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg)
4805 {
4806         struct amdgpu_device *adev = ring->adev;
4807
4808         amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
4809         amdgpu_ring_write(ring, 0 |     /* src: register*/
4810                                 (5 << 8) |      /* dst: memory */
4811                                 (1 << 20));     /* write confirm */
4812         amdgpu_ring_write(ring, reg);
4813         amdgpu_ring_write(ring, 0);
4814         amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
4815                                 adev->virt.reg_val_offs * 4));
4816         amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
4817                                 adev->virt.reg_val_offs * 4));
4818 }
4819
4820 static void gfx_v9_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
4821                                     uint32_t val)
4822 {
4823         uint32_t cmd = 0;
4824
4825         switch (ring->funcs->type) {
4826         case AMDGPU_RING_TYPE_GFX:
4827                 cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM;
4828                 break;
4829         case AMDGPU_RING_TYPE_KIQ:
4830                 cmd = (1 << 16); /* no inc addr */
4831                 break;
4832         default:
4833                 cmd = WR_CONFIRM;
4834                 break;
4835         }
4836         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4837         amdgpu_ring_write(ring, cmd);
4838         amdgpu_ring_write(ring, reg);
4839         amdgpu_ring_write(ring, 0);
4840         amdgpu_ring_write(ring, val);
4841 }
4842
4843 static void gfx_v9_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
4844                                         uint32_t val, uint32_t mask)
4845 {
4846         gfx_v9_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20);
4847 }
4848
4849 static void gfx_v9_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
4850                                                   uint32_t reg0, uint32_t reg1,
4851                                                   uint32_t ref, uint32_t mask)
4852 {
4853         int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
4854         struct amdgpu_device *adev = ring->adev;
4855         bool fw_version_ok = (ring->funcs->type == AMDGPU_RING_TYPE_GFX) ?
4856                 adev->gfx.me_fw_write_wait : adev->gfx.mec_fw_write_wait;
4857
4858         if (fw_version_ok)
4859                 gfx_v9_0_wait_reg_mem(ring, usepfp, 0, 1, reg0, reg1,
4860                                       ref, mask, 0x20);
4861         else
4862                 amdgpu_ring_emit_reg_write_reg_wait_helper(ring, reg0, reg1,
4863                                                            ref, mask);
4864 }
4865
4866 static void gfx_v9_0_ring_soft_recovery(struct amdgpu_ring *ring, unsigned vmid)
4867 {
4868         struct amdgpu_device *adev = ring->adev;
4869         uint32_t value = 0;
4870
4871         value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03);
4872         value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01);
4873         value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1);
4874         value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid);
4875         WREG32(mmSQ_CMD, value);
4876 }
4877
4878 static void gfx_v9_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
4879                                                  enum amdgpu_interrupt_state state)
4880 {
4881         switch (state) {
4882         case AMDGPU_IRQ_STATE_DISABLE:
4883         case AMDGPU_IRQ_STATE_ENABLE:
4884                 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
4885                                TIME_STAMP_INT_ENABLE,
4886                                state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
4887                 break;
4888         default:
4889                 break;
4890         }
4891 }
4892
4893 static void gfx_v9_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
4894                                                      int me, int pipe,
4895                                                      enum amdgpu_interrupt_state state)
4896 {
4897         u32 mec_int_cntl, mec_int_cntl_reg;
4898
4899         /*
4900          * amdgpu controls only the first MEC. That's why this function only
4901          * handles the setting of interrupts for this specific MEC. All other
4902          * pipes' interrupts are set by amdkfd.
4903          */
4904
4905         if (me == 1) {
4906                 switch (pipe) {
4907                 case 0:
4908                         mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE0_INT_CNTL);
4909                         break;
4910                 case 1:
4911                         mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE1_INT_CNTL);
4912                         break;
4913                 case 2:
4914                         mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE2_INT_CNTL);
4915                         break;
4916                 case 3:
4917                         mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE3_INT_CNTL);
4918                         break;
4919                 default:
4920                         DRM_DEBUG("invalid pipe %d\n", pipe);
4921                         return;
4922                 }
4923         } else {
4924                 DRM_DEBUG("invalid me %d\n", me);
4925                 return;
4926         }
4927
4928         switch (state) {
4929         case AMDGPU_IRQ_STATE_DISABLE:
4930                 mec_int_cntl = RREG32(mec_int_cntl_reg);
4931                 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
4932                                              TIME_STAMP_INT_ENABLE, 0);
4933                 WREG32(mec_int_cntl_reg, mec_int_cntl);
4934                 break;
4935         case AMDGPU_IRQ_STATE_ENABLE:
4936                 mec_int_cntl = RREG32(mec_int_cntl_reg);
4937                 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
4938                                              TIME_STAMP_INT_ENABLE, 1);
4939                 WREG32(mec_int_cntl_reg, mec_int_cntl);
4940                 break;
4941         default:
4942                 break;
4943         }
4944 }
4945
4946 static int gfx_v9_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
4947                                              struct amdgpu_irq_src *source,
4948                                              unsigned type,
4949                                              enum amdgpu_interrupt_state state)
4950 {
4951         switch (state) {
4952         case AMDGPU_IRQ_STATE_DISABLE:
4953         case AMDGPU_IRQ_STATE_ENABLE:
4954                 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
4955                                PRIV_REG_INT_ENABLE,
4956                                state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
4957                 break;
4958         default:
4959                 break;
4960         }
4961
4962         return 0;
4963 }
4964
4965 static int gfx_v9_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
4966                                               struct amdgpu_irq_src *source,
4967                                               unsigned type,
4968                                               enum amdgpu_interrupt_state state)
4969 {
4970         switch (state) {
4971         case AMDGPU_IRQ_STATE_DISABLE:
4972         case AMDGPU_IRQ_STATE_ENABLE:
4973                 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
4974                                PRIV_INSTR_INT_ENABLE,
4975                                state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
4976         default:
4977                 break;
4978         }
4979
4980         return 0;
4981 }
4982
4983 #define ENABLE_ECC_ON_ME_PIPE(me, pipe)                         \
4984         WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\
4985                         CP_ECC_ERROR_INT_ENABLE, 1)
4986
4987 #define DISABLE_ECC_ON_ME_PIPE(me, pipe)                        \
4988         WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\
4989                         CP_ECC_ERROR_INT_ENABLE, 0)
4990
4991 static int gfx_v9_0_set_cp_ecc_error_state(struct amdgpu_device *adev,
4992                                               struct amdgpu_irq_src *source,
4993                                               unsigned type,
4994                                               enum amdgpu_interrupt_state state)
4995 {
4996         switch (state) {
4997         case AMDGPU_IRQ_STATE_DISABLE:
4998                 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
4999                                 CP_ECC_ERROR_INT_ENABLE, 0);
5000                 DISABLE_ECC_ON_ME_PIPE(1, 0);
5001                 DISABLE_ECC_ON_ME_PIPE(1, 1);
5002                 DISABLE_ECC_ON_ME_PIPE(1, 2);
5003                 DISABLE_ECC_ON_ME_PIPE(1, 3);
5004                 break;
5005
5006         case AMDGPU_IRQ_STATE_ENABLE:
5007                 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5008                                 CP_ECC_ERROR_INT_ENABLE, 1);
5009                 ENABLE_ECC_ON_ME_PIPE(1, 0);
5010                 ENABLE_ECC_ON_ME_PIPE(1, 1);
5011                 ENABLE_ECC_ON_ME_PIPE(1, 2);
5012                 ENABLE_ECC_ON_ME_PIPE(1, 3);
5013                 break;
5014         default:
5015                 break;
5016         }
5017
5018         return 0;
5019 }
5020
5021
5022 static int gfx_v9_0_set_eop_interrupt_state(struct amdgpu_device *adev,
5023                                             struct amdgpu_irq_src *src,
5024                                             unsigned type,
5025                                             enum amdgpu_interrupt_state state)
5026 {
5027         switch (type) {
5028         case AMDGPU_CP_IRQ_GFX_EOP:
5029                 gfx_v9_0_set_gfx_eop_interrupt_state(adev, state);
5030                 break;
5031         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
5032                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
5033                 break;
5034         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
5035                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
5036                 break;
5037         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
5038                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
5039                 break;
5040         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
5041                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
5042                 break;
5043         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
5044                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
5045                 break;
5046         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
5047                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
5048                 break;
5049         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
5050                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
5051                 break;
5052         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
5053                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
5054                 break;
5055         default:
5056                 break;
5057         }
5058         return 0;
5059 }
5060
5061 static int gfx_v9_0_eop_irq(struct amdgpu_device *adev,
5062                             struct amdgpu_irq_src *source,
5063                             struct amdgpu_iv_entry *entry)
5064 {
5065         int i;
5066         u8 me_id, pipe_id, queue_id;
5067         struct amdgpu_ring *ring;
5068
5069         DRM_DEBUG("IH: CP EOP\n");
5070         me_id = (entry->ring_id & 0x0c) >> 2;
5071         pipe_id = (entry->ring_id & 0x03) >> 0;
5072         queue_id = (entry->ring_id & 0x70) >> 4;
5073
5074         switch (me_id) {
5075         case 0:
5076                 amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
5077                 break;
5078         case 1:
5079         case 2:
5080                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5081                         ring = &adev->gfx.compute_ring[i];
5082                         /* Per-queue interrupt is supported for MEC starting from VI.
5083                           * The interrupt can only be enabled/disabled per pipe instead of per queue.
5084                           */
5085                         if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
5086                                 amdgpu_fence_process(ring);
5087                 }
5088                 break;
5089         }
5090         return 0;
5091 }
5092
5093 static void gfx_v9_0_fault(struct amdgpu_device *adev,
5094                            struct amdgpu_iv_entry *entry)
5095 {
5096         u8 me_id, pipe_id, queue_id;
5097         struct amdgpu_ring *ring;
5098         int i;
5099
5100         me_id = (entry->ring_id & 0x0c) >> 2;
5101         pipe_id = (entry->ring_id & 0x03) >> 0;
5102         queue_id = (entry->ring_id & 0x70) >> 4;
5103
5104         switch (me_id) {
5105         case 0:
5106                 drm_sched_fault(&adev->gfx.gfx_ring[0].sched);
5107                 break;
5108         case 1:
5109         case 2:
5110                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5111                         ring = &adev->gfx.compute_ring[i];
5112                         if (ring->me == me_id && ring->pipe == pipe_id &&
5113                             ring->queue == queue_id)
5114                                 drm_sched_fault(&ring->sched);
5115                 }
5116                 break;
5117         }
5118 }
5119
5120 static int gfx_v9_0_priv_reg_irq(struct amdgpu_device *adev,
5121                                  struct amdgpu_irq_src *source,
5122                                  struct amdgpu_iv_entry *entry)
5123 {
5124         DRM_ERROR("Illegal register access in command stream\n");
5125         gfx_v9_0_fault(adev, entry);
5126         return 0;
5127 }
5128
5129 static int gfx_v9_0_priv_inst_irq(struct amdgpu_device *adev,
5130                                   struct amdgpu_irq_src *source,
5131                                   struct amdgpu_iv_entry *entry)
5132 {
5133         DRM_ERROR("Illegal instruction in command stream\n");
5134         gfx_v9_0_fault(adev, entry);
5135         return 0;
5136 }
5137
5138 static int gfx_v9_0_process_ras_data_cb(struct amdgpu_device *adev,
5139                 struct amdgpu_iv_entry *entry)
5140 {
5141         /* TODO ue will trigger an interrupt. */
5142         kgd2kfd_set_sram_ecc_flag(adev->kfd.dev);
5143         amdgpu_ras_reset_gpu(adev, 0);
5144         return AMDGPU_RAS_UE;
5145 }
5146
5147 static int gfx_v9_0_cp_ecc_error_irq(struct amdgpu_device *adev,
5148                                   struct amdgpu_irq_src *source,
5149                                   struct amdgpu_iv_entry *entry)
5150 {
5151         struct ras_common_if *ras_if = adev->gfx.ras_if;
5152         struct ras_dispatch_if ih_data = {
5153                 .entry = entry,
5154         };
5155
5156         if (!ras_if)
5157                 return 0;
5158
5159         ih_data.head = *ras_if;
5160
5161         DRM_ERROR("CP ECC ERROR IRQ\n");
5162         amdgpu_ras_interrupt_dispatch(adev, &ih_data);
5163         return 0;
5164 }
5165
5166 static const struct amd_ip_funcs gfx_v9_0_ip_funcs = {
5167         .name = "gfx_v9_0",
5168         .early_init = gfx_v9_0_early_init,
5169         .late_init = gfx_v9_0_late_init,
5170         .sw_init = gfx_v9_0_sw_init,
5171         .sw_fini = gfx_v9_0_sw_fini,
5172         .hw_init = gfx_v9_0_hw_init,
5173         .hw_fini = gfx_v9_0_hw_fini,
5174         .suspend = gfx_v9_0_suspend,
5175         .resume = gfx_v9_0_resume,
5176         .is_idle = gfx_v9_0_is_idle,
5177         .wait_for_idle = gfx_v9_0_wait_for_idle,
5178         .soft_reset = gfx_v9_0_soft_reset,
5179         .set_clockgating_state = gfx_v9_0_set_clockgating_state,
5180         .set_powergating_state = gfx_v9_0_set_powergating_state,
5181         .get_clockgating_state = gfx_v9_0_get_clockgating_state,
5182 };
5183
5184 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = {
5185         .type = AMDGPU_RING_TYPE_GFX,
5186         .align_mask = 0xff,
5187         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
5188         .support_64bit_ptrs = true,
5189         .vmhub = AMDGPU_GFXHUB,
5190         .get_rptr = gfx_v9_0_ring_get_rptr_gfx,
5191         .get_wptr = gfx_v9_0_ring_get_wptr_gfx,
5192         .set_wptr = gfx_v9_0_ring_set_wptr_gfx,
5193         .emit_frame_size = /* totally 242 maximum if 16 IBs */
5194                 5 +  /* COND_EXEC */
5195                 7 +  /* PIPELINE_SYNC */
5196                 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
5197                 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
5198                 2 + /* VM_FLUSH */
5199                 8 +  /* FENCE for VM_FLUSH */
5200                 20 + /* GDS switch */
5201                 4 + /* double SWITCH_BUFFER,
5202                        the first COND_EXEC jump to the place just
5203                            prior to this double SWITCH_BUFFER  */
5204                 5 + /* COND_EXEC */
5205                 7 +      /*     HDP_flush */
5206                 4 +      /*     VGT_flush */
5207                 14 + /* CE_META */
5208                 31 + /* DE_META */
5209                 3 + /* CNTX_CTRL */
5210                 5 + /* HDP_INVL */
5211                 8 + 8 + /* FENCE x2 */
5212                 2, /* SWITCH_BUFFER */
5213         .emit_ib_size = 4, /* gfx_v9_0_ring_emit_ib_gfx */
5214         .emit_ib = gfx_v9_0_ring_emit_ib_gfx,
5215         .emit_fence = gfx_v9_0_ring_emit_fence,
5216         .emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
5217         .emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
5218         .emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
5219         .emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
5220         .test_ring = gfx_v9_0_ring_test_ring,
5221         .test_ib = gfx_v9_0_ring_test_ib,
5222         .insert_nop = amdgpu_ring_insert_nop,
5223         .pad_ib = amdgpu_ring_generic_pad_ib,
5224         .emit_switch_buffer = gfx_v9_ring_emit_sb,
5225         .emit_cntxcntl = gfx_v9_ring_emit_cntxcntl,
5226         .init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec,
5227         .patch_cond_exec = gfx_v9_0_ring_emit_patch_cond_exec,
5228         .emit_tmz = gfx_v9_0_ring_emit_tmz,
5229         .emit_wreg = gfx_v9_0_ring_emit_wreg,
5230         .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
5231         .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
5232         .soft_recovery = gfx_v9_0_ring_soft_recovery,
5233 };
5234
5235 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = {
5236         .type = AMDGPU_RING_TYPE_COMPUTE,
5237         .align_mask = 0xff,
5238         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
5239         .support_64bit_ptrs = true,
5240         .vmhub = AMDGPU_GFXHUB,
5241         .get_rptr = gfx_v9_0_ring_get_rptr_compute,
5242         .get_wptr = gfx_v9_0_ring_get_wptr_compute,
5243         .set_wptr = gfx_v9_0_ring_set_wptr_compute,
5244         .emit_frame_size =
5245                 20 + /* gfx_v9_0_ring_emit_gds_switch */
5246                 7 + /* gfx_v9_0_ring_emit_hdp_flush */
5247                 5 + /* hdp invalidate */
5248                 7 + /* gfx_v9_0_ring_emit_pipeline_sync */
5249                 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
5250                 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
5251                 2 + /* gfx_v9_0_ring_emit_vm_flush */
5252                 8 + 8 + 8, /* gfx_v9_0_ring_emit_fence x3 for user fence, vm fence */
5253         .emit_ib_size = 7, /* gfx_v9_0_ring_emit_ib_compute */
5254         .emit_ib = gfx_v9_0_ring_emit_ib_compute,
5255         .emit_fence = gfx_v9_0_ring_emit_fence,
5256         .emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
5257         .emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
5258         .emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
5259         .emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
5260         .test_ring = gfx_v9_0_ring_test_ring,
5261         .test_ib = gfx_v9_0_ring_test_ib,
5262         .insert_nop = amdgpu_ring_insert_nop,
5263         .pad_ib = amdgpu_ring_generic_pad_ib,
5264         .set_priority = gfx_v9_0_ring_set_priority_compute,
5265         .emit_wreg = gfx_v9_0_ring_emit_wreg,
5266         .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
5267         .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
5268 };
5269
5270 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = {
5271         .type = AMDGPU_RING_TYPE_KIQ,
5272         .align_mask = 0xff,
5273         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
5274         .support_64bit_ptrs = true,
5275         .vmhub = AMDGPU_GFXHUB,
5276         .get_rptr = gfx_v9_0_ring_get_rptr_compute,
5277         .get_wptr = gfx_v9_0_ring_get_wptr_compute,
5278         .set_wptr = gfx_v9_0_ring_set_wptr_compute,
5279         .emit_frame_size =
5280                 20 + /* gfx_v9_0_ring_emit_gds_switch */
5281                 7 + /* gfx_v9_0_ring_emit_hdp_flush */
5282                 5 + /* hdp invalidate */
5283                 7 + /* gfx_v9_0_ring_emit_pipeline_sync */
5284                 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
5285                 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
5286                 2 + /* gfx_v9_0_ring_emit_vm_flush */
5287                 8 + 8 + 8, /* gfx_v9_0_ring_emit_fence_kiq x3 for user fence, vm fence */
5288         .emit_ib_size = 7, /* gfx_v9_0_ring_emit_ib_compute */
5289         .emit_fence = gfx_v9_0_ring_emit_fence_kiq,
5290         .test_ring = gfx_v9_0_ring_test_ring,
5291         .insert_nop = amdgpu_ring_insert_nop,
5292         .pad_ib = amdgpu_ring_generic_pad_ib,
5293         .emit_rreg = gfx_v9_0_ring_emit_rreg,
5294         .emit_wreg = gfx_v9_0_ring_emit_wreg,
5295         .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
5296         .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
5297 };
5298
5299 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev)
5300 {
5301         int i;
5302
5303         adev->gfx.kiq.ring.funcs = &gfx_v9_0_ring_funcs_kiq;
5304
5305         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
5306                 adev->gfx.gfx_ring[i].funcs = &gfx_v9_0_ring_funcs_gfx;
5307
5308         for (i = 0; i < adev->gfx.num_compute_rings; i++)
5309                 adev->gfx.compute_ring[i].funcs = &gfx_v9_0_ring_funcs_compute;
5310 }
5311
5312 static const struct amdgpu_irq_src_funcs gfx_v9_0_eop_irq_funcs = {
5313         .set = gfx_v9_0_set_eop_interrupt_state,
5314         .process = gfx_v9_0_eop_irq,
5315 };
5316
5317 static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_reg_irq_funcs = {
5318         .set = gfx_v9_0_set_priv_reg_fault_state,
5319         .process = gfx_v9_0_priv_reg_irq,
5320 };
5321
5322 static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_inst_irq_funcs = {
5323         .set = gfx_v9_0_set_priv_inst_fault_state,
5324         .process = gfx_v9_0_priv_inst_irq,
5325 };
5326
5327 static const struct amdgpu_irq_src_funcs gfx_v9_0_cp_ecc_error_irq_funcs = {
5328         .set = gfx_v9_0_set_cp_ecc_error_state,
5329         .process = gfx_v9_0_cp_ecc_error_irq,
5330 };
5331
5332
5333 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev)
5334 {
5335         adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
5336         adev->gfx.eop_irq.funcs = &gfx_v9_0_eop_irq_funcs;
5337
5338         adev->gfx.priv_reg_irq.num_types = 1;
5339         adev->gfx.priv_reg_irq.funcs = &gfx_v9_0_priv_reg_irq_funcs;
5340
5341         adev->gfx.priv_inst_irq.num_types = 1;
5342         adev->gfx.priv_inst_irq.funcs = &gfx_v9_0_priv_inst_irq_funcs;
5343
5344         adev->gfx.cp_ecc_error_irq.num_types = 2; /*C5 ECC error and C9 FUE error*/
5345         adev->gfx.cp_ecc_error_irq.funcs = &gfx_v9_0_cp_ecc_error_irq_funcs;
5346 }
5347
5348 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev)
5349 {
5350         switch (adev->asic_type) {
5351         case CHIP_VEGA10:
5352         case CHIP_VEGA12:
5353         case CHIP_VEGA20:
5354         case CHIP_RAVEN:
5355                 adev->gfx.rlc.funcs = &gfx_v9_0_rlc_funcs;
5356                 break;
5357         default:
5358                 break;
5359         }
5360 }
5361
5362 static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev)
5363 {
5364         /* init asci gds info */
5365         switch (adev->asic_type) {
5366         case CHIP_VEGA10:
5367         case CHIP_VEGA12:
5368         case CHIP_VEGA20:
5369                 adev->gds.gds_size = 0x10000;
5370                 break;
5371         case CHIP_RAVEN:
5372                 adev->gds.gds_size = 0x1000;
5373                 break;
5374         default:
5375                 adev->gds.gds_size = 0x10000;
5376                 break;
5377         }
5378
5379         switch (adev->asic_type) {
5380         case CHIP_VEGA10:
5381         case CHIP_VEGA20:
5382                 adev->gds.gds_compute_max_wave_id = 0x7ff;
5383                 break;
5384         case CHIP_VEGA12:
5385                 adev->gds.gds_compute_max_wave_id = 0x27f;
5386                 break;
5387         case CHIP_RAVEN:
5388                 if (adev->rev_id >= 0x8)
5389                         adev->gds.gds_compute_max_wave_id = 0x77; /* raven2 */
5390                 else
5391                         adev->gds.gds_compute_max_wave_id = 0x15f; /* raven1 */
5392                 break;
5393         default:
5394                 /* this really depends on the chip */
5395                 adev->gds.gds_compute_max_wave_id = 0x7ff;
5396                 break;
5397         }
5398
5399         adev->gds.gws_size = 64;
5400         adev->gds.oa_size = 16;
5401 }
5402
5403 static void gfx_v9_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
5404                                                  u32 bitmap)
5405 {
5406         u32 data;
5407
5408         if (!bitmap)
5409                 return;
5410
5411         data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
5412         data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
5413
5414         WREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG, data);
5415 }
5416
5417 static u32 gfx_v9_0_get_cu_active_bitmap(struct amdgpu_device *adev)
5418 {
5419         u32 data, mask;
5420
5421         data = RREG32_SOC15(GC, 0, mmCC_GC_SHADER_ARRAY_CONFIG);
5422         data |= RREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG);
5423
5424         data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
5425         data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
5426
5427         mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh);
5428
5429         return (~data) & mask;
5430 }
5431
5432 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
5433                                  struct amdgpu_cu_info *cu_info)
5434 {
5435         int i, j, k, counter, active_cu_number = 0;
5436         u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
5437         unsigned disable_masks[4 * 2];
5438
5439         if (!adev || !cu_info)
5440                 return -EINVAL;
5441
5442         amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2);
5443
5444         mutex_lock(&adev->grbm_idx_mutex);
5445         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
5446                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
5447                         mask = 1;
5448                         ao_bitmap = 0;
5449                         counter = 0;
5450                         gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
5451                         if (i < 4 && j < 2)
5452                                 gfx_v9_0_set_user_cu_inactive_bitmap(
5453                                         adev, disable_masks[i * 2 + j]);
5454                         bitmap = gfx_v9_0_get_cu_active_bitmap(adev);
5455                         cu_info->bitmap[i][j] = bitmap;
5456
5457                         for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
5458                                 if (bitmap & mask) {
5459                                         if (counter < adev->gfx.config.max_cu_per_sh)
5460                                                 ao_bitmap |= mask;
5461                                         counter ++;
5462                                 }
5463                                 mask <<= 1;
5464                         }
5465                         active_cu_number += counter;
5466                         if (i < 2 && j < 2)
5467                                 ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
5468                         cu_info->ao_cu_bitmap[i][j] = ao_bitmap;
5469                 }
5470         }
5471         gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
5472         mutex_unlock(&adev->grbm_idx_mutex);
5473
5474         cu_info->number = active_cu_number;
5475         cu_info->ao_cu_mask = ao_cu_mask;
5476         cu_info->simd_per_cu = NUM_SIMD_PER_CU;
5477
5478         return 0;
5479 }
5480
5481 const struct amdgpu_ip_block_version gfx_v9_0_ip_block =
5482 {
5483         .type = AMD_IP_BLOCK_TYPE_GFX,
5484         .major = 9,
5485         .minor = 0,
5486         .rev = 0,
5487         .funcs = &gfx_v9_0_ip_funcs,
5488 };
This page took 0.369653 seconds and 4 git commands to generate.