]> Git Repo - linux.git/blob - drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
Merge tag 'phy-for-6.11' of git://git.kernel.org/pub/scm/linux/kernel/git/phy/linux-phy
[linux.git] / drivers / gpu / drm / amd / amdgpu / gfx_v9_0.c
1 /*
2  * Copyright 2016 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23
24 #include <linux/delay.h>
25 #include <linux/kernel.h>
26 #include <linux/firmware.h>
27 #include <linux/module.h>
28 #include <linux/pci.h>
29
30 #include "amdgpu.h"
31 #include "amdgpu_gfx.h"
32 #include "soc15.h"
33 #include "soc15d.h"
34 #include "amdgpu_atomfirmware.h"
35 #include "amdgpu_pm.h"
36
37 #include "gc/gc_9_0_offset.h"
38 #include "gc/gc_9_0_sh_mask.h"
39
40 #include "vega10_enum.h"
41
42 #include "soc15_common.h"
43 #include "clearstate_gfx9.h"
44 #include "v9_structs.h"
45
46 #include "ivsrcid/gfx/irqsrcs_gfx_9_0.h"
47
48 #include "amdgpu_ras.h"
49
50 #include "amdgpu_ring_mux.h"
51 #include "gfx_v9_4.h"
52 #include "gfx_v9_0.h"
53 #include "gfx_v9_4_2.h"
54
55 #include "asic_reg/pwr/pwr_10_0_offset.h"
56 #include "asic_reg/pwr/pwr_10_0_sh_mask.h"
57 #include "asic_reg/gc/gc_9_0_default.h"
58
59 #define GFX9_NUM_GFX_RINGS     1
60 #define GFX9_NUM_SW_GFX_RINGS  2
61 #define GFX9_MEC_HPD_SIZE 4096
62 #define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L
63 #define RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET 0x00000000L
64
65 #define mmGCEA_PROBE_MAP                        0x070c
66 #define mmGCEA_PROBE_MAP_BASE_IDX               0
67
68 MODULE_FIRMWARE("amdgpu/vega10_ce.bin");
69 MODULE_FIRMWARE("amdgpu/vega10_pfp.bin");
70 MODULE_FIRMWARE("amdgpu/vega10_me.bin");
71 MODULE_FIRMWARE("amdgpu/vega10_mec.bin");
72 MODULE_FIRMWARE("amdgpu/vega10_mec2.bin");
73 MODULE_FIRMWARE("amdgpu/vega10_rlc.bin");
74
75 MODULE_FIRMWARE("amdgpu/vega12_ce.bin");
76 MODULE_FIRMWARE("amdgpu/vega12_pfp.bin");
77 MODULE_FIRMWARE("amdgpu/vega12_me.bin");
78 MODULE_FIRMWARE("amdgpu/vega12_mec.bin");
79 MODULE_FIRMWARE("amdgpu/vega12_mec2.bin");
80 MODULE_FIRMWARE("amdgpu/vega12_rlc.bin");
81
82 MODULE_FIRMWARE("amdgpu/vega20_ce.bin");
83 MODULE_FIRMWARE("amdgpu/vega20_pfp.bin");
84 MODULE_FIRMWARE("amdgpu/vega20_me.bin");
85 MODULE_FIRMWARE("amdgpu/vega20_mec.bin");
86 MODULE_FIRMWARE("amdgpu/vega20_mec2.bin");
87 MODULE_FIRMWARE("amdgpu/vega20_rlc.bin");
88
89 MODULE_FIRMWARE("amdgpu/raven_ce.bin");
90 MODULE_FIRMWARE("amdgpu/raven_pfp.bin");
91 MODULE_FIRMWARE("amdgpu/raven_me.bin");
92 MODULE_FIRMWARE("amdgpu/raven_mec.bin");
93 MODULE_FIRMWARE("amdgpu/raven_mec2.bin");
94 MODULE_FIRMWARE("amdgpu/raven_rlc.bin");
95
96 MODULE_FIRMWARE("amdgpu/picasso_ce.bin");
97 MODULE_FIRMWARE("amdgpu/picasso_pfp.bin");
98 MODULE_FIRMWARE("amdgpu/picasso_me.bin");
99 MODULE_FIRMWARE("amdgpu/picasso_mec.bin");
100 MODULE_FIRMWARE("amdgpu/picasso_mec2.bin");
101 MODULE_FIRMWARE("amdgpu/picasso_rlc.bin");
102 MODULE_FIRMWARE("amdgpu/picasso_rlc_am4.bin");
103
104 MODULE_FIRMWARE("amdgpu/raven2_ce.bin");
105 MODULE_FIRMWARE("amdgpu/raven2_pfp.bin");
106 MODULE_FIRMWARE("amdgpu/raven2_me.bin");
107 MODULE_FIRMWARE("amdgpu/raven2_mec.bin");
108 MODULE_FIRMWARE("amdgpu/raven2_mec2.bin");
109 MODULE_FIRMWARE("amdgpu/raven2_rlc.bin");
110 MODULE_FIRMWARE("amdgpu/raven_kicker_rlc.bin");
111
112 MODULE_FIRMWARE("amdgpu/arcturus_mec.bin");
113 MODULE_FIRMWARE("amdgpu/arcturus_rlc.bin");
114
115 MODULE_FIRMWARE("amdgpu/renoir_ce.bin");
116 MODULE_FIRMWARE("amdgpu/renoir_pfp.bin");
117 MODULE_FIRMWARE("amdgpu/renoir_me.bin");
118 MODULE_FIRMWARE("amdgpu/renoir_mec.bin");
119 MODULE_FIRMWARE("amdgpu/renoir_rlc.bin");
120
121 MODULE_FIRMWARE("amdgpu/green_sardine_ce.bin");
122 MODULE_FIRMWARE("amdgpu/green_sardine_pfp.bin");
123 MODULE_FIRMWARE("amdgpu/green_sardine_me.bin");
124 MODULE_FIRMWARE("amdgpu/green_sardine_mec.bin");
125 MODULE_FIRMWARE("amdgpu/green_sardine_mec2.bin");
126 MODULE_FIRMWARE("amdgpu/green_sardine_rlc.bin");
127
128 MODULE_FIRMWARE("amdgpu/aldebaran_mec.bin");
129 MODULE_FIRMWARE("amdgpu/aldebaran_mec2.bin");
130 MODULE_FIRMWARE("amdgpu/aldebaran_rlc.bin");
131 MODULE_FIRMWARE("amdgpu/aldebaran_sjt_mec.bin");
132 MODULE_FIRMWARE("amdgpu/aldebaran_sjt_mec2.bin");
133
134 #define mmTCP_CHAN_STEER_0_ARCT                                                         0x0b03
135 #define mmTCP_CHAN_STEER_0_ARCT_BASE_IDX                                                        0
136 #define mmTCP_CHAN_STEER_1_ARCT                                                         0x0b04
137 #define mmTCP_CHAN_STEER_1_ARCT_BASE_IDX                                                        0
138 #define mmTCP_CHAN_STEER_2_ARCT                                                         0x0b09
139 #define mmTCP_CHAN_STEER_2_ARCT_BASE_IDX                                                        0
140 #define mmTCP_CHAN_STEER_3_ARCT                                                         0x0b0a
141 #define mmTCP_CHAN_STEER_3_ARCT_BASE_IDX                                                        0
142 #define mmTCP_CHAN_STEER_4_ARCT                                                         0x0b0b
143 #define mmTCP_CHAN_STEER_4_ARCT_BASE_IDX                                                        0
144 #define mmTCP_CHAN_STEER_5_ARCT                                                         0x0b0c
145 #define mmTCP_CHAN_STEER_5_ARCT_BASE_IDX                                                        0
146
147 #define mmGOLDEN_TSC_COUNT_UPPER_Renoir                0x0025
148 #define mmGOLDEN_TSC_COUNT_UPPER_Renoir_BASE_IDX       1
149 #define mmGOLDEN_TSC_COUNT_LOWER_Renoir                0x0026
150 #define mmGOLDEN_TSC_COUNT_LOWER_Renoir_BASE_IDX       1
151
152 static const struct amdgpu_hwip_reg_entry gc_reg_list_9[] = {
153         SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS),
154         SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS2),
155         SOC15_REG_ENTRY_STR(GC, 0, mmCP_STALLED_STAT1),
156         SOC15_REG_ENTRY_STR(GC, 0, mmCP_STALLED_STAT2),
157         SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPC_STALLED_STAT1),
158         SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPF_STALLED_STAT1),
159         SOC15_REG_ENTRY_STR(GC, 0, mmCP_BUSY_STAT),
160         SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPC_BUSY_STAT),
161         SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPF_BUSY_STAT),
162         SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPF_STATUS),
163         SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_ERROR),
164         SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB_BASE),
165         SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB_RPTR),
166         SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB_WPTR),
167         SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB0_BASE),
168         SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB0_RPTR),
169         SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB0_WPTR),
170         SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB1_BASE),
171         SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB1_RPTR),
172         SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB1_WPTR),
173         SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB2_BASE),
174         SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB2_WPTR),
175         SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB2_WPTR),
176         SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB1_CMD_BUFSZ),
177         SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB2_CMD_BUFSZ),
178         SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB1_CMD_BUFSZ),
179         SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB2_CMD_BUFSZ),
180         SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB1_BASE_LO),
181         SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB1_BASE_HI),
182         SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB1_BUFSZ),
183         SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB2_BASE_LO),
184         SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB2_BASE_HI),
185         SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB2_BUFSZ),
186         SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB1_BASE_LO),
187         SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB1_BASE_HI),
188         SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB1_BUFSZ),
189         SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB2_BASE_LO),
190         SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB2_BASE_HI),
191         SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB2_BUFSZ),
192         SOC15_REG_ENTRY_STR(GC, 0, mmCPF_UTCL1_STATUS),
193         SOC15_REG_ENTRY_STR(GC, 0, mmCPC_UTCL1_STATUS),
194         SOC15_REG_ENTRY_STR(GC, 0, mmCPG_UTCL1_STATUS),
195         SOC15_REG_ENTRY_STR(GC, 0, mmGDS_PROTECTION_FAULT),
196         SOC15_REG_ENTRY_STR(GC, 0, mmGDS_VM_PROTECTION_FAULT),
197         SOC15_REG_ENTRY_STR(GC, 0, mmIA_UTCL1_STATUS),
198         SOC15_REG_ENTRY_STR(GC, 0, mmIA_UTCL1_CNTL),
199         SOC15_REG_ENTRY_STR(GC, 0, mmPA_CL_CNTL_STATUS),
200         SOC15_REG_ENTRY_STR(GC, 0, mmRLC_UTCL1_STATUS),
201         SOC15_REG_ENTRY_STR(GC, 0, mmRMI_UTCL1_STATUS),
202         SOC15_REG_ENTRY_STR(GC, 0, mmSQC_DCACHE_UTCL1_STATUS),
203         SOC15_REG_ENTRY_STR(GC, 0, mmSQC_ICACHE_UTCL1_STATUS),
204         SOC15_REG_ENTRY_STR(GC, 0, mmSQ_UTCL1_STATUS),
205         SOC15_REG_ENTRY_STR(GC, 0, mmTCP_UTCL1_STATUS),
206         SOC15_REG_ENTRY_STR(GC, 0, mmWD_UTCL1_STATUS),
207         SOC15_REG_ENTRY_STR(GC, 0, mmVM_L2_PROTECTION_FAULT_CNTL),
208         SOC15_REG_ENTRY_STR(GC, 0, mmVM_L2_PROTECTION_FAULT_STATUS),
209         SOC15_REG_ENTRY_STR(GC, 0, mmCP_DEBUG),
210         SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_CNTL),
211         SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_INSTR_PNTR),
212         SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC1_INSTR_PNTR),
213         SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC2_INSTR_PNTR),
214         SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_INSTR_PNTR),
215         SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_INSTR_PNTR),
216         SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPC_STATUS),
217         SOC15_REG_ENTRY_STR(GC, 0, mmRLC_STAT),
218         SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SMU_COMMAND),
219         SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SMU_MESSAGE),
220         SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SMU_ARGUMENT_1),
221         SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SMU_ARGUMENT_2),
222         SOC15_REG_ENTRY_STR(GC, 0, mmSMU_RLC_RESPONSE),
223         SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SAFE_MODE),
224         SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SMU_SAFE_MODE),
225         SOC15_REG_ENTRY_STR(GC, 0, mmRLC_INT_STAT),
226         SOC15_REG_ENTRY_STR(GC, 0, mmRLC_GPM_GENERAL_6),
227         /* cp header registers */
228         SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_HEADER_DUMP),
229         SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME1_HEADER_DUMP),
230         SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME2_HEADER_DUMP),
231         SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_HEADER_DUMP),
232         SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_HEADER_DUMP),
233         /* SE status registers */
234         SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS_SE0),
235         SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS_SE1),
236         SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS_SE2),
237         SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS_SE3)
238 };
239
240 static const struct amdgpu_hwip_reg_entry gc_cp_reg_list_9[] = {
241         /* compute queue registers */
242         SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_VMID),
243         SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_ACTIVE),
244         SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PERSISTENT_STATE),
245         SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PIPE_PRIORITY),
246         SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_QUEUE_PRIORITY),
247         SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_QUANTUM),
248         SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_BASE),
249         SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_BASE_HI),
250         SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_RPTR),
251         SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR),
252         SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI),
253         SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL),
254         SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_CONTROL),
255         SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_IB_BASE_ADDR),
256         SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_IB_BASE_ADDR_HI),
257         SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_IB_RPTR),
258         SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_IB_CONTROL),
259         SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_DEQUEUE_REQUEST),
260         SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_BASE_ADDR),
261         SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI),
262         SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_CONTROL),
263         SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_RPTR),
264         SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_WPTR),
265         SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_EVENTS),
266         SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_CTX_SAVE_BASE_ADDR_LO),
267         SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_CTX_SAVE_BASE_ADDR_HI),
268         SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_CTX_SAVE_CONTROL),
269         SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_CNTL_STACK_OFFSET),
270         SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_CNTL_STACK_SIZE),
271         SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_WG_STATE_OFFSET),
272         SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_CTX_SAVE_SIZE),
273         SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_GDS_RESOURCE_STATE),
274         SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_ERROR),
275         SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_WPTR_MEM),
276         SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_WPTR_LO),
277         SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_WPTR_HI),
278         SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_GFX_STATUS),
279 };
280
281 enum ta_ras_gfx_subblock {
282         /*CPC*/
283         TA_RAS_BLOCK__GFX_CPC_INDEX_START = 0,
284         TA_RAS_BLOCK__GFX_CPC_SCRATCH = TA_RAS_BLOCK__GFX_CPC_INDEX_START,
285         TA_RAS_BLOCK__GFX_CPC_UCODE,
286         TA_RAS_BLOCK__GFX_DC_STATE_ME1,
287         TA_RAS_BLOCK__GFX_DC_CSINVOC_ME1,
288         TA_RAS_BLOCK__GFX_DC_RESTORE_ME1,
289         TA_RAS_BLOCK__GFX_DC_STATE_ME2,
290         TA_RAS_BLOCK__GFX_DC_CSINVOC_ME2,
291         TA_RAS_BLOCK__GFX_DC_RESTORE_ME2,
292         TA_RAS_BLOCK__GFX_CPC_INDEX_END = TA_RAS_BLOCK__GFX_DC_RESTORE_ME2,
293         /* CPF*/
294         TA_RAS_BLOCK__GFX_CPF_INDEX_START,
295         TA_RAS_BLOCK__GFX_CPF_ROQ_ME2 = TA_RAS_BLOCK__GFX_CPF_INDEX_START,
296         TA_RAS_BLOCK__GFX_CPF_ROQ_ME1,
297         TA_RAS_BLOCK__GFX_CPF_TAG,
298         TA_RAS_BLOCK__GFX_CPF_INDEX_END = TA_RAS_BLOCK__GFX_CPF_TAG,
299         /* CPG*/
300         TA_RAS_BLOCK__GFX_CPG_INDEX_START,
301         TA_RAS_BLOCK__GFX_CPG_DMA_ROQ = TA_RAS_BLOCK__GFX_CPG_INDEX_START,
302         TA_RAS_BLOCK__GFX_CPG_DMA_TAG,
303         TA_RAS_BLOCK__GFX_CPG_TAG,
304         TA_RAS_BLOCK__GFX_CPG_INDEX_END = TA_RAS_BLOCK__GFX_CPG_TAG,
305         /* GDS*/
306         TA_RAS_BLOCK__GFX_GDS_INDEX_START,
307         TA_RAS_BLOCK__GFX_GDS_MEM = TA_RAS_BLOCK__GFX_GDS_INDEX_START,
308         TA_RAS_BLOCK__GFX_GDS_INPUT_QUEUE,
309         TA_RAS_BLOCK__GFX_GDS_OA_PHY_CMD_RAM_MEM,
310         TA_RAS_BLOCK__GFX_GDS_OA_PHY_DATA_RAM_MEM,
311         TA_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM,
312         TA_RAS_BLOCK__GFX_GDS_INDEX_END = TA_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM,
313         /* SPI*/
314         TA_RAS_BLOCK__GFX_SPI_SR_MEM,
315         /* SQ*/
316         TA_RAS_BLOCK__GFX_SQ_INDEX_START,
317         TA_RAS_BLOCK__GFX_SQ_SGPR = TA_RAS_BLOCK__GFX_SQ_INDEX_START,
318         TA_RAS_BLOCK__GFX_SQ_LDS_D,
319         TA_RAS_BLOCK__GFX_SQ_LDS_I,
320         TA_RAS_BLOCK__GFX_SQ_VGPR, /* VGPR = SP*/
321         TA_RAS_BLOCK__GFX_SQ_INDEX_END = TA_RAS_BLOCK__GFX_SQ_VGPR,
322         /* SQC (3 ranges)*/
323         TA_RAS_BLOCK__GFX_SQC_INDEX_START,
324         /* SQC range 0*/
325         TA_RAS_BLOCK__GFX_SQC_INDEX0_START = TA_RAS_BLOCK__GFX_SQC_INDEX_START,
326         TA_RAS_BLOCK__GFX_SQC_INST_UTCL1_LFIFO =
327                 TA_RAS_BLOCK__GFX_SQC_INDEX0_START,
328         TA_RAS_BLOCK__GFX_SQC_DATA_CU0_WRITE_DATA_BUF,
329         TA_RAS_BLOCK__GFX_SQC_DATA_CU0_UTCL1_LFIFO,
330         TA_RAS_BLOCK__GFX_SQC_DATA_CU1_WRITE_DATA_BUF,
331         TA_RAS_BLOCK__GFX_SQC_DATA_CU1_UTCL1_LFIFO,
332         TA_RAS_BLOCK__GFX_SQC_DATA_CU2_WRITE_DATA_BUF,
333         TA_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO,
334         TA_RAS_BLOCK__GFX_SQC_INDEX0_END =
335                 TA_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO,
336         /* SQC range 1*/
337         TA_RAS_BLOCK__GFX_SQC_INDEX1_START,
338         TA_RAS_BLOCK__GFX_SQC_INST_BANKA_TAG_RAM =
339                 TA_RAS_BLOCK__GFX_SQC_INDEX1_START,
340         TA_RAS_BLOCK__GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO,
341         TA_RAS_BLOCK__GFX_SQC_INST_BANKA_MISS_FIFO,
342         TA_RAS_BLOCK__GFX_SQC_INST_BANKA_BANK_RAM,
343         TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_TAG_RAM,
344         TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_HIT_FIFO,
345         TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_MISS_FIFO,
346         TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM,
347         TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM,
348         TA_RAS_BLOCK__GFX_SQC_INDEX1_END =
349                 TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM,
350         /* SQC range 2*/
351         TA_RAS_BLOCK__GFX_SQC_INDEX2_START,
352         TA_RAS_BLOCK__GFX_SQC_INST_BANKB_TAG_RAM =
353                 TA_RAS_BLOCK__GFX_SQC_INDEX2_START,
354         TA_RAS_BLOCK__GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO,
355         TA_RAS_BLOCK__GFX_SQC_INST_BANKB_MISS_FIFO,
356         TA_RAS_BLOCK__GFX_SQC_INST_BANKB_BANK_RAM,
357         TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_TAG_RAM,
358         TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_HIT_FIFO,
359         TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_MISS_FIFO,
360         TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM,
361         TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM,
362         TA_RAS_BLOCK__GFX_SQC_INDEX2_END =
363                 TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM,
364         TA_RAS_BLOCK__GFX_SQC_INDEX_END = TA_RAS_BLOCK__GFX_SQC_INDEX2_END,
365         /* TA*/
366         TA_RAS_BLOCK__GFX_TA_INDEX_START,
367         TA_RAS_BLOCK__GFX_TA_FS_DFIFO = TA_RAS_BLOCK__GFX_TA_INDEX_START,
368         TA_RAS_BLOCK__GFX_TA_FS_AFIFO,
369         TA_RAS_BLOCK__GFX_TA_FL_LFIFO,
370         TA_RAS_BLOCK__GFX_TA_FX_LFIFO,
371         TA_RAS_BLOCK__GFX_TA_FS_CFIFO,
372         TA_RAS_BLOCK__GFX_TA_INDEX_END = TA_RAS_BLOCK__GFX_TA_FS_CFIFO,
373         /* TCA*/
374         TA_RAS_BLOCK__GFX_TCA_INDEX_START,
375         TA_RAS_BLOCK__GFX_TCA_HOLE_FIFO = TA_RAS_BLOCK__GFX_TCA_INDEX_START,
376         TA_RAS_BLOCK__GFX_TCA_REQ_FIFO,
377         TA_RAS_BLOCK__GFX_TCA_INDEX_END = TA_RAS_BLOCK__GFX_TCA_REQ_FIFO,
378         /* TCC (5 sub-ranges)*/
379         TA_RAS_BLOCK__GFX_TCC_INDEX_START,
380         /* TCC range 0*/
381         TA_RAS_BLOCK__GFX_TCC_INDEX0_START = TA_RAS_BLOCK__GFX_TCC_INDEX_START,
382         TA_RAS_BLOCK__GFX_TCC_CACHE_DATA = TA_RAS_BLOCK__GFX_TCC_INDEX0_START,
383         TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_0_1,
384         TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_0,
385         TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_1,
386         TA_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_0,
387         TA_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_1,
388         TA_RAS_BLOCK__GFX_TCC_HIGH_RATE_TAG,
389         TA_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG,
390         TA_RAS_BLOCK__GFX_TCC_INDEX0_END = TA_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG,
391         /* TCC range 1*/
392         TA_RAS_BLOCK__GFX_TCC_INDEX1_START,
393         TA_RAS_BLOCK__GFX_TCC_IN_USE_DEC = TA_RAS_BLOCK__GFX_TCC_INDEX1_START,
394         TA_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER,
395         TA_RAS_BLOCK__GFX_TCC_INDEX1_END =
396                 TA_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER,
397         /* TCC range 2*/
398         TA_RAS_BLOCK__GFX_TCC_INDEX2_START,
399         TA_RAS_BLOCK__GFX_TCC_RETURN_DATA = TA_RAS_BLOCK__GFX_TCC_INDEX2_START,
400         TA_RAS_BLOCK__GFX_TCC_RETURN_CONTROL,
401         TA_RAS_BLOCK__GFX_TCC_UC_ATOMIC_FIFO,
402         TA_RAS_BLOCK__GFX_TCC_WRITE_RETURN,
403         TA_RAS_BLOCK__GFX_TCC_WRITE_CACHE_READ,
404         TA_RAS_BLOCK__GFX_TCC_SRC_FIFO,
405         TA_RAS_BLOCK__GFX_TCC_SRC_FIFO_NEXT_RAM,
406         TA_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO,
407         TA_RAS_BLOCK__GFX_TCC_INDEX2_END =
408                 TA_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO,
409         /* TCC range 3*/
410         TA_RAS_BLOCK__GFX_TCC_INDEX3_START,
411         TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO = TA_RAS_BLOCK__GFX_TCC_INDEX3_START,
412         TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM,
413         TA_RAS_BLOCK__GFX_TCC_INDEX3_END =
414                 TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM,
415         /* TCC range 4*/
416         TA_RAS_BLOCK__GFX_TCC_INDEX4_START,
417         TA_RAS_BLOCK__GFX_TCC_WRRET_TAG_WRITE_RETURN =
418                 TA_RAS_BLOCK__GFX_TCC_INDEX4_START,
419         TA_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER,
420         TA_RAS_BLOCK__GFX_TCC_INDEX4_END =
421                 TA_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER,
422         TA_RAS_BLOCK__GFX_TCC_INDEX_END = TA_RAS_BLOCK__GFX_TCC_INDEX4_END,
423         /* TCI*/
424         TA_RAS_BLOCK__GFX_TCI_WRITE_RAM,
425         /* TCP*/
426         TA_RAS_BLOCK__GFX_TCP_INDEX_START,
427         TA_RAS_BLOCK__GFX_TCP_CACHE_RAM = TA_RAS_BLOCK__GFX_TCP_INDEX_START,
428         TA_RAS_BLOCK__GFX_TCP_LFIFO_RAM,
429         TA_RAS_BLOCK__GFX_TCP_CMD_FIFO,
430         TA_RAS_BLOCK__GFX_TCP_VM_FIFO,
431         TA_RAS_BLOCK__GFX_TCP_DB_RAM,
432         TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO0,
433         TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1,
434         TA_RAS_BLOCK__GFX_TCP_INDEX_END = TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1,
435         /* TD*/
436         TA_RAS_BLOCK__GFX_TD_INDEX_START,
437         TA_RAS_BLOCK__GFX_TD_SS_FIFO_LO = TA_RAS_BLOCK__GFX_TD_INDEX_START,
438         TA_RAS_BLOCK__GFX_TD_SS_FIFO_HI,
439         TA_RAS_BLOCK__GFX_TD_CS_FIFO,
440         TA_RAS_BLOCK__GFX_TD_INDEX_END = TA_RAS_BLOCK__GFX_TD_CS_FIFO,
441         /* EA (3 sub-ranges)*/
442         TA_RAS_BLOCK__GFX_EA_INDEX_START,
443         /* EA range 0*/
444         TA_RAS_BLOCK__GFX_EA_INDEX0_START = TA_RAS_BLOCK__GFX_EA_INDEX_START,
445         TA_RAS_BLOCK__GFX_EA_DRAMRD_CMDMEM = TA_RAS_BLOCK__GFX_EA_INDEX0_START,
446         TA_RAS_BLOCK__GFX_EA_DRAMWR_CMDMEM,
447         TA_RAS_BLOCK__GFX_EA_DRAMWR_DATAMEM,
448         TA_RAS_BLOCK__GFX_EA_RRET_TAGMEM,
449         TA_RAS_BLOCK__GFX_EA_WRET_TAGMEM,
450         TA_RAS_BLOCK__GFX_EA_GMIRD_CMDMEM,
451         TA_RAS_BLOCK__GFX_EA_GMIWR_CMDMEM,
452         TA_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM,
453         TA_RAS_BLOCK__GFX_EA_INDEX0_END = TA_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM,
454         /* EA range 1*/
455         TA_RAS_BLOCK__GFX_EA_INDEX1_START,
456         TA_RAS_BLOCK__GFX_EA_DRAMRD_PAGEMEM = TA_RAS_BLOCK__GFX_EA_INDEX1_START,
457         TA_RAS_BLOCK__GFX_EA_DRAMWR_PAGEMEM,
458         TA_RAS_BLOCK__GFX_EA_IORD_CMDMEM,
459         TA_RAS_BLOCK__GFX_EA_IOWR_CMDMEM,
460         TA_RAS_BLOCK__GFX_EA_IOWR_DATAMEM,
461         TA_RAS_BLOCK__GFX_EA_GMIRD_PAGEMEM,
462         TA_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM,
463         TA_RAS_BLOCK__GFX_EA_INDEX1_END = TA_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM,
464         /* EA range 2*/
465         TA_RAS_BLOCK__GFX_EA_INDEX2_START,
466         TA_RAS_BLOCK__GFX_EA_MAM_D0MEM = TA_RAS_BLOCK__GFX_EA_INDEX2_START,
467         TA_RAS_BLOCK__GFX_EA_MAM_D1MEM,
468         TA_RAS_BLOCK__GFX_EA_MAM_D2MEM,
469         TA_RAS_BLOCK__GFX_EA_MAM_D3MEM,
470         TA_RAS_BLOCK__GFX_EA_INDEX2_END = TA_RAS_BLOCK__GFX_EA_MAM_D3MEM,
471         TA_RAS_BLOCK__GFX_EA_INDEX_END = TA_RAS_BLOCK__GFX_EA_INDEX2_END,
472         /* UTC VM L2 bank*/
473         TA_RAS_BLOCK__UTC_VML2_BANK_CACHE,
474         /* UTC VM walker*/
475         TA_RAS_BLOCK__UTC_VML2_WALKER,
476         /* UTC ATC L2 2MB cache*/
477         TA_RAS_BLOCK__UTC_ATCL2_CACHE_2M_BANK,
478         /* UTC ATC L2 4KB cache*/
479         TA_RAS_BLOCK__UTC_ATCL2_CACHE_4K_BANK,
480         TA_RAS_BLOCK__GFX_MAX
481 };
482
483 struct ras_gfx_subblock {
484         unsigned char *name;
485         int ta_subblock;
486         int hw_supported_error_type;
487         int sw_supported_error_type;
488 };
489
490 #define AMDGPU_RAS_SUB_BLOCK(subblock, a, b, c, d, e, f, g, h)                             \
491         [AMDGPU_RAS_BLOCK__##subblock] = {                                     \
492                 #subblock,                                                     \
493                 TA_RAS_BLOCK__##subblock,                                      \
494                 ((a) | ((b) << 1) | ((c) << 2) | ((d) << 3)),                  \
495                 (((e) << 1) | ((f) << 3) | (g) | ((h) << 2)),                  \
496         }
497
498 static const struct ras_gfx_subblock ras_gfx_subblocks[] = {
499         AMDGPU_RAS_SUB_BLOCK(GFX_CPC_SCRATCH, 0, 1, 1, 1, 1, 0, 0, 1),
500         AMDGPU_RAS_SUB_BLOCK(GFX_CPC_UCODE, 0, 1, 1, 1, 1, 0, 0, 1),
501         AMDGPU_RAS_SUB_BLOCK(GFX_DC_STATE_ME1, 1, 0, 0, 1, 0, 0, 1, 0),
502         AMDGPU_RAS_SUB_BLOCK(GFX_DC_CSINVOC_ME1, 1, 0, 0, 1, 0, 0, 0, 0),
503         AMDGPU_RAS_SUB_BLOCK(GFX_DC_RESTORE_ME1, 1, 0, 0, 1, 0, 0, 0, 0),
504         AMDGPU_RAS_SUB_BLOCK(GFX_DC_STATE_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
505         AMDGPU_RAS_SUB_BLOCK(GFX_DC_CSINVOC_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
506         AMDGPU_RAS_SUB_BLOCK(GFX_DC_RESTORE_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
507         AMDGPU_RAS_SUB_BLOCK(GFX_CPF_ROQ_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
508         AMDGPU_RAS_SUB_BLOCK(GFX_CPF_ROQ_ME1, 1, 0, 0, 1, 0, 0, 1, 0),
509         AMDGPU_RAS_SUB_BLOCK(GFX_CPF_TAG, 0, 1, 1, 1, 1, 0, 0, 1),
510         AMDGPU_RAS_SUB_BLOCK(GFX_CPG_DMA_ROQ, 1, 0, 0, 1, 0, 0, 1, 0),
511         AMDGPU_RAS_SUB_BLOCK(GFX_CPG_DMA_TAG, 0, 1, 1, 1, 0, 1, 0, 1),
512         AMDGPU_RAS_SUB_BLOCK(GFX_CPG_TAG, 0, 1, 1, 1, 1, 1, 0, 1),
513         AMDGPU_RAS_SUB_BLOCK(GFX_GDS_MEM, 0, 1, 1, 1, 0, 0, 0, 0),
514         AMDGPU_RAS_SUB_BLOCK(GFX_GDS_INPUT_QUEUE, 1, 0, 0, 1, 0, 0, 0, 0),
515         AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PHY_CMD_RAM_MEM, 0, 1, 1, 1, 0, 0, 0,
516                              0),
517         AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PHY_DATA_RAM_MEM, 1, 0, 0, 1, 0, 0, 0,
518                              0),
519         AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PIPE_MEM, 0, 1, 1, 1, 0, 0, 0, 0),
520         AMDGPU_RAS_SUB_BLOCK(GFX_SPI_SR_MEM, 1, 0, 0, 1, 0, 0, 0, 0),
521         AMDGPU_RAS_SUB_BLOCK(GFX_SQ_SGPR, 0, 1, 1, 1, 0, 0, 0, 0),
522         AMDGPU_RAS_SUB_BLOCK(GFX_SQ_LDS_D, 0, 1, 1, 1, 1, 0, 0, 1),
523         AMDGPU_RAS_SUB_BLOCK(GFX_SQ_LDS_I, 0, 1, 1, 1, 0, 0, 0, 0),
524         AMDGPU_RAS_SUB_BLOCK(GFX_SQ_VGPR, 0, 1, 1, 1, 0, 0, 0, 0),
525         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0, 1),
526         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU0_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0,
527                              0, 0),
528         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU0_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0,
529                              0),
530         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU1_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0,
531                              0, 0),
532         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU1_UTCL1_LFIFO, 0, 1, 1, 1, 1, 0, 0,
533                              0),
534         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU2_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0,
535                              0, 0),
536         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU2_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0,
537                              0),
538         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_TAG_RAM, 0, 1, 1, 1, 1, 0, 0,
539                              1),
540         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO, 1, 0, 0, 1, 0,
541                              0, 0, 0),
542         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
543                              0),
544         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
545                              0),
546         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_TAG_RAM, 0, 1, 1, 1, 0, 0, 0,
547                              0),
548         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_HIT_FIFO, 1, 0, 0, 1, 0, 0, 0,
549                              0),
550         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
551                              0),
552         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM, 1, 0, 0, 1, 0, 0,
553                              0, 0),
554         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
555                              0),
556         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_TAG_RAM, 0, 1, 1, 1, 1, 0, 0,
557                              0),
558         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO, 1, 0, 0, 1, 0,
559                              0, 0, 0),
560         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
561                              0),
562         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
563                              0),
564         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_TAG_RAM, 0, 1, 1, 1, 0, 0, 0,
565                              0),
566         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_HIT_FIFO, 1, 0, 0, 1, 0, 0, 0,
567                              0),
568         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
569                              0),
570         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM, 1, 0, 0, 1, 0, 0,
571                              0, 0),
572         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
573                              0),
574         AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_DFIFO, 0, 1, 1, 1, 1, 0, 0, 1),
575         AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_AFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
576         AMDGPU_RAS_SUB_BLOCK(GFX_TA_FL_LFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
577         AMDGPU_RAS_SUB_BLOCK(GFX_TA_FX_LFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
578         AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_CFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
579         AMDGPU_RAS_SUB_BLOCK(GFX_TCA_HOLE_FIFO, 1, 0, 0, 1, 0, 1, 1, 0),
580         AMDGPU_RAS_SUB_BLOCK(GFX_TCA_REQ_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
581         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA, 0, 1, 1, 1, 1, 0, 0, 1),
582         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_0_1, 0, 1, 1, 1, 1, 0, 0,
583                              1),
584         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_1_0, 0, 1, 1, 1, 1, 0, 0,
585                              1),
586         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_1_1, 0, 1, 1, 1, 1, 0, 0,
587                              1),
588         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DIRTY_BANK_0, 0, 1, 1, 1, 0, 0, 0,
589                              0),
590         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DIRTY_BANK_1, 0, 1, 1, 1, 0, 0, 0,
591                              0),
592         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_HIGH_RATE_TAG, 0, 1, 1, 1, 0, 0, 0, 0),
593         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LOW_RATE_TAG, 0, 1, 1, 1, 0, 0, 0, 0),
594         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_IN_USE_DEC, 1, 0, 0, 1, 0, 0, 0, 0),
595         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_IN_USE_TRANSFER, 1, 0, 0, 1, 0, 0, 0, 0),
596         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_RETURN_DATA, 1, 0, 0, 1, 0, 0, 0, 0),
597         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_RETURN_CONTROL, 1, 0, 0, 1, 0, 0, 0, 0),
598         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_UC_ATOMIC_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
599         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRITE_RETURN, 1, 0, 0, 1, 0, 1, 1, 0),
600         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRITE_CACHE_READ, 1, 0, 0, 1, 0, 0, 0, 0),
601         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_SRC_FIFO, 0, 1, 1, 1, 0, 0, 0, 0),
602         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_SRC_FIFO_NEXT_RAM, 1, 0, 0, 1, 0, 0, 1, 0),
603         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_TAG_PROBE_FIFO, 1, 0, 0, 1, 0, 0, 0,
604                              0),
605         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LATENCY_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
606         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LATENCY_FIFO_NEXT_RAM, 1, 0, 0, 1, 0, 0, 0,
607                              0),
608         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRRET_TAG_WRITE_RETURN, 1, 0, 0, 1, 0, 0,
609                              0, 0),
610         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_ATOMIC_RETURN_BUFFER, 1, 0, 0, 1, 0, 0, 0,
611                              0),
612         AMDGPU_RAS_SUB_BLOCK(GFX_TCI_WRITE_RAM, 1, 0, 0, 1, 0, 0, 0, 0),
613         AMDGPU_RAS_SUB_BLOCK(GFX_TCP_CACHE_RAM, 0, 1, 1, 1, 1, 0, 0, 1),
614         AMDGPU_RAS_SUB_BLOCK(GFX_TCP_LFIFO_RAM, 0, 1, 1, 1, 0, 0, 0, 0),
615         AMDGPU_RAS_SUB_BLOCK(GFX_TCP_CMD_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
616         AMDGPU_RAS_SUB_BLOCK(GFX_TCP_VM_FIFO, 0, 1, 1, 1, 0, 0, 0, 0),
617         AMDGPU_RAS_SUB_BLOCK(GFX_TCP_DB_RAM, 1, 0, 0, 1, 0, 0, 0, 0),
618         AMDGPU_RAS_SUB_BLOCK(GFX_TCP_UTCL1_LFIFO0, 0, 1, 1, 1, 0, 0, 0, 0),
619         AMDGPU_RAS_SUB_BLOCK(GFX_TCP_UTCL1_LFIFO1, 0, 1, 1, 1, 0, 0, 0, 0),
620         AMDGPU_RAS_SUB_BLOCK(GFX_TD_SS_FIFO_LO, 0, 1, 1, 1, 1, 0, 0, 1),
621         AMDGPU_RAS_SUB_BLOCK(GFX_TD_SS_FIFO_HI, 0, 1, 1, 1, 0, 0, 0, 0),
622         AMDGPU_RAS_SUB_BLOCK(GFX_TD_CS_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
623         AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMRD_CMDMEM, 0, 1, 1, 1, 1, 0, 0, 1),
624         AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0),
625         AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_DATAMEM, 0, 1, 1, 1, 0, 0, 0, 0),
626         AMDGPU_RAS_SUB_BLOCK(GFX_EA_RRET_TAGMEM, 0, 1, 1, 1, 0, 0, 0, 0),
627         AMDGPU_RAS_SUB_BLOCK(GFX_EA_WRET_TAGMEM, 0, 1, 1, 1, 0, 0, 0, 0),
628         AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIRD_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0),
629         AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0),
630         AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_DATAMEM, 0, 1, 1, 1, 0, 0, 0, 0),
631         AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMRD_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
632         AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
633         AMDGPU_RAS_SUB_BLOCK(GFX_EA_IORD_CMDMEM, 1, 0, 0, 1, 0, 0, 0, 0),
634         AMDGPU_RAS_SUB_BLOCK(GFX_EA_IOWR_CMDMEM, 1, 0, 0, 1, 0, 0, 0, 0),
635         AMDGPU_RAS_SUB_BLOCK(GFX_EA_IOWR_DATAMEM, 1, 0, 0, 1, 0, 0, 0, 0),
636         AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIRD_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
637         AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
638         AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D0MEM, 1, 0, 0, 1, 0, 0, 0, 0),
639         AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D1MEM, 1, 0, 0, 1, 0, 0, 0, 0),
640         AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D2MEM, 1, 0, 0, 1, 0, 0, 0, 0),
641         AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D3MEM, 1, 0, 0, 1, 0, 0, 0, 0),
642         AMDGPU_RAS_SUB_BLOCK(UTC_VML2_BANK_CACHE, 0, 1, 1, 1, 0, 0, 0, 0),
643         AMDGPU_RAS_SUB_BLOCK(UTC_VML2_WALKER, 0, 1, 1, 1, 0, 0, 0, 0),
644         AMDGPU_RAS_SUB_BLOCK(UTC_ATCL2_CACHE_2M_BANK, 1, 0, 0, 1, 0, 0, 0, 0),
645         AMDGPU_RAS_SUB_BLOCK(UTC_ATCL2_CACHE_4K_BANK, 0, 1, 1, 1, 0, 0, 0, 0),
646 };
647
648 static const struct soc15_reg_golden golden_settings_gc_9_0[] =
649 {
650         SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000400),
651         SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0x80000000, 0x80000000),
652         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
653         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
654         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
655         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
656         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000),
657         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800),
658         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800),
659         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x00ffff87),
660         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x00ffff8f),
661         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000),
662         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
663         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68),
664         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197),
665         SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
666         SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff),
667         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800),
668         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800),
669         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000)
670 };
671
672 static const struct soc15_reg_golden golden_settings_gc_9_0_vg10[] =
673 {
674         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0x0000f000, 0x00012107),
675         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
676         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080),
677         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080),
678         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080),
679         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042),
680         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x2a114042),
681         SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080),
682         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0x00008000, 0x00048000),
683         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080),
684         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080),
685         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080),
686         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080),
687         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080),
688         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000),
689         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x01000107),
690         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x00001800, 0x00000800),
691         SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080)
692 };
693
694 static const struct soc15_reg_golden golden_settings_gc_9_0_vg20[] =
695 {
696         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x0f000080, 0x04000080),
697         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000),
698         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
699         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xf3e777ff, 0x22014042),
700         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xf3e777ff, 0x22014042),
701         SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0x00003e00, 0x00000400),
702         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xff840000, 0x04040000),
703         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00030000),
704         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff010f, 0x01000107),
705         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0x000b0000, 0x000b0000),
706         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01000000, 0x01000000)
707 };
708
709 static const struct soc15_reg_golden golden_settings_gc_9_1[] =
710 {
711         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
712         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080),
713         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080),
714         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080),
715         SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420),
716         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
717         SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080),
718         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
719         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
720         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
721         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080),
722         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080),
723         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080),
724         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080),
725         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080),
726         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
727         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
728         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003120),
729         SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
730         SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000000ff),
731         SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080),
732         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800),
733         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800),
734         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000)
735 };
736
737 static const struct soc15_reg_golden golden_settings_gc_9_1_rv1[] =
738 {
739         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
740         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24000042),
741         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24000042),
742         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04048000),
743         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_MODE_CNTL_1, 0x06000000, 0x06000000),
744         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000),
745         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x00000800)
746 };
747
748 static const struct soc15_reg_golden golden_settings_gc_9_1_rv2[] =
749 {
750         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0xff7fffff, 0x04000000),
751         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
752         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0xff7fffff, 0x0a000000),
753         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x7f0fffff, 0x08000080),
754         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0xff8fffff, 0x08000080),
755         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x7f8fffff, 0x08000080),
756         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x26013041),
757         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x26013041),
758         SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x3f8fffff, 0x08000080),
759         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
760         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0xff0fffff, 0x08000080),
761         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0xff0fffff, 0x08000080),
762         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0xff0fffff, 0x08000080),
763         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0xff0fffff, 0x08000080),
764         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0xff0fffff, 0x08000080),
765         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
766         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010),
767         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000),
768         SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x3f8fffff, 0x08000080),
769 };
770
771 static const struct soc15_reg_golden golden_settings_gc_9_1_rn[] =
772 {
773         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
774         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0xff7fffff, 0x0a000000),
775         SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000400),
776         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xf3e777ff, 0x24000042),
777         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xf3e777ff, 0x24000042),
778         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
779         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
780         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
781         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
782         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
783         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003120),
784         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCEA_PROBE_MAP, 0xffffffff, 0x0000cccc),
785 };
786
787 static const struct soc15_reg_golden golden_settings_gc_9_x_common[] =
788 {
789         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_SD_CNTL, 0xffffffff, 0x000001ff),
790         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_INDEX, 0xffffffff, 0x00000000),
791         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_DATA, 0xffffffff, 0x2544c382)
792 };
793
794 static const struct soc15_reg_golden golden_settings_gc_9_2_1[] =
795 {
796         SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420),
797         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
798         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
799         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
800         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
801         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000),
802         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800),
803         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800),
804         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x0000ff87),
805         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x0000ff8f),
806         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000),
807         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
808         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68),
809         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197),
810         SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
811         SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff)
812 };
813
814 static const struct soc15_reg_golden golden_settings_gc_9_2_1_vg12[] =
815 {
816         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x00000080, 0x04000080),
817         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
818         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000),
819         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24104041),
820         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24104041),
821         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
822         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff03ff, 0x01000107),
823         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
824         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x76325410),
825         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000),
826         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800),
827         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800),
828         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000)
829 };
830
831 static const struct soc15_reg_golden golden_settings_gc_9_4_1_arct[] =
832 {
833         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042),
834         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x10b0000),
835         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_0_ARCT, 0x3fffffff, 0x346f0a4e),
836         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_1_ARCT, 0x3fffffff, 0x1c642ca),
837         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_2_ARCT, 0x3fffffff, 0x26f45098),
838         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_3_ARCT, 0x3fffffff, 0x2ebd9fe3),
839         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_4_ARCT, 0x3fffffff, 0xb90f5b1),
840         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_5_ARCT, 0x3ff, 0x135),
841         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_CONFIG, 0xffffffff, 0x011A0000),
842         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_FIFO_SIZES, 0xffffffff, 0x00000f00),
843         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_UTCL1_CNTL1, 0x30000000, 0x30000000)
844 };
845
846 static const struct soc15_reg_rlcg rlcg_access_gc_9_0[] = {
847         {SOC15_REG_ENTRY(GC, 0, mmGRBM_GFX_INDEX)},
848         {SOC15_REG_ENTRY(GC, 0, mmSQ_IND_INDEX)},
849 };
850
851 static const u32 GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[] =
852 {
853         mmRLC_SRM_INDEX_CNTL_ADDR_0 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
854         mmRLC_SRM_INDEX_CNTL_ADDR_1 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
855         mmRLC_SRM_INDEX_CNTL_ADDR_2 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
856         mmRLC_SRM_INDEX_CNTL_ADDR_3 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
857         mmRLC_SRM_INDEX_CNTL_ADDR_4 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
858         mmRLC_SRM_INDEX_CNTL_ADDR_5 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
859         mmRLC_SRM_INDEX_CNTL_ADDR_6 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
860         mmRLC_SRM_INDEX_CNTL_ADDR_7 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
861 };
862
863 static const u32 GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[] =
864 {
865         mmRLC_SRM_INDEX_CNTL_DATA_0 - mmRLC_SRM_INDEX_CNTL_DATA_0,
866         mmRLC_SRM_INDEX_CNTL_DATA_1 - mmRLC_SRM_INDEX_CNTL_DATA_0,
867         mmRLC_SRM_INDEX_CNTL_DATA_2 - mmRLC_SRM_INDEX_CNTL_DATA_0,
868         mmRLC_SRM_INDEX_CNTL_DATA_3 - mmRLC_SRM_INDEX_CNTL_DATA_0,
869         mmRLC_SRM_INDEX_CNTL_DATA_4 - mmRLC_SRM_INDEX_CNTL_DATA_0,
870         mmRLC_SRM_INDEX_CNTL_DATA_5 - mmRLC_SRM_INDEX_CNTL_DATA_0,
871         mmRLC_SRM_INDEX_CNTL_DATA_6 - mmRLC_SRM_INDEX_CNTL_DATA_0,
872         mmRLC_SRM_INDEX_CNTL_DATA_7 - mmRLC_SRM_INDEX_CNTL_DATA_0,
873 };
874
875 #define VEGA10_GB_ADDR_CONFIG_GOLDEN 0x2a114042
876 #define VEGA12_GB_ADDR_CONFIG_GOLDEN 0x24104041
877 #define RAVEN_GB_ADDR_CONFIG_GOLDEN 0x24000042
878 #define RAVEN2_GB_ADDR_CONFIG_GOLDEN 0x26013041
879
880 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev);
881 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev);
882 static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev);
883 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev);
884 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
885                                 struct amdgpu_cu_info *cu_info);
886 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev);
887 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume, bool usegds);
888 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring);
889 static void gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev,
890                                           void *ras_error_status);
891 static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev,
892                                      void *inject_if, uint32_t instance_mask);
893 static void gfx_v9_0_reset_ras_error_count(struct amdgpu_device *adev);
894 static void gfx_v9_0_update_spm_vmid_internal(struct amdgpu_device *adev,
895                                               unsigned int vmid);
896
897 static void gfx_v9_0_kiq_set_resources(struct amdgpu_ring *kiq_ring,
898                                 uint64_t queue_mask)
899 {
900         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
901         amdgpu_ring_write(kiq_ring,
902                 PACKET3_SET_RESOURCES_VMID_MASK(0) |
903                 /* vmid_mask:0* queue_type:0 (KIQ) */
904                 PACKET3_SET_RESOURCES_QUEUE_TYPE(0));
905         amdgpu_ring_write(kiq_ring,
906                         lower_32_bits(queue_mask));     /* queue mask lo */
907         amdgpu_ring_write(kiq_ring,
908                         upper_32_bits(queue_mask));     /* queue mask hi */
909         amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */
910         amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */
911         amdgpu_ring_write(kiq_ring, 0); /* oac mask */
912         amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */
913 }
914
915 static void gfx_v9_0_kiq_map_queues(struct amdgpu_ring *kiq_ring,
916                                  struct amdgpu_ring *ring)
917 {
918         uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
919         uint64_t wptr_addr = ring->wptr_gpu_addr;
920         uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
921
922         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
923         /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
924         amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
925                          PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */
926                          PACKET3_MAP_QUEUES_VMID(0) | /* VMID */
927                          PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
928                          PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
929                          PACKET3_MAP_QUEUES_ME((ring->me == 1 ? 0 : 1)) |
930                          /*queue_type: normal compute queue */
931                          PACKET3_MAP_QUEUES_QUEUE_TYPE(0) |
932                          /* alloc format: all_on_one_pipe */
933                          PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) |
934                          PACKET3_MAP_QUEUES_ENGINE_SEL(eng_sel) |
935                          /* num_queues: must be 1 */
936                          PACKET3_MAP_QUEUES_NUM_QUEUES(1));
937         amdgpu_ring_write(kiq_ring,
938                         PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index));
939         amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
940         amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
941         amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
942         amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
943 }
944
945 static void gfx_v9_0_kiq_unmap_queues(struct amdgpu_ring *kiq_ring,
946                                    struct amdgpu_ring *ring,
947                                    enum amdgpu_unmap_queues_action action,
948                                    u64 gpu_addr, u64 seq)
949 {
950         uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
951
952         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
953         amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
954                           PACKET3_UNMAP_QUEUES_ACTION(action) |
955                           PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) |
956                           PACKET3_UNMAP_QUEUES_ENGINE_SEL(eng_sel) |
957                           PACKET3_UNMAP_QUEUES_NUM_QUEUES(1));
958         amdgpu_ring_write(kiq_ring,
959                         PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
960
961         if (action == PREEMPT_QUEUES_NO_UNMAP) {
962                 amdgpu_ring_write(kiq_ring, lower_32_bits(ring->wptr & ring->buf_mask));
963                 amdgpu_ring_write(kiq_ring, 0);
964                 amdgpu_ring_write(kiq_ring, 0);
965
966         } else {
967                 amdgpu_ring_write(kiq_ring, 0);
968                 amdgpu_ring_write(kiq_ring, 0);
969                 amdgpu_ring_write(kiq_ring, 0);
970         }
971 }
972
973 static void gfx_v9_0_kiq_query_status(struct amdgpu_ring *kiq_ring,
974                                    struct amdgpu_ring *ring,
975                                    u64 addr,
976                                    u64 seq)
977 {
978         uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
979
980         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_QUERY_STATUS, 5));
981         amdgpu_ring_write(kiq_ring,
982                           PACKET3_QUERY_STATUS_CONTEXT_ID(0) |
983                           PACKET3_QUERY_STATUS_INTERRUPT_SEL(0) |
984                           PACKET3_QUERY_STATUS_COMMAND(2));
985         /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
986         amdgpu_ring_write(kiq_ring,
987                         PACKET3_QUERY_STATUS_DOORBELL_OFFSET(ring->doorbell_index) |
988                         PACKET3_QUERY_STATUS_ENG_SEL(eng_sel));
989         amdgpu_ring_write(kiq_ring, lower_32_bits(addr));
990         amdgpu_ring_write(kiq_ring, upper_32_bits(addr));
991         amdgpu_ring_write(kiq_ring, lower_32_bits(seq));
992         amdgpu_ring_write(kiq_ring, upper_32_bits(seq));
993 }
994
995 static void gfx_v9_0_kiq_invalidate_tlbs(struct amdgpu_ring *kiq_ring,
996                                 uint16_t pasid, uint32_t flush_type,
997                                 bool all_hub)
998 {
999         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0));
1000         amdgpu_ring_write(kiq_ring,
1001                         PACKET3_INVALIDATE_TLBS_DST_SEL(1) |
1002                         PACKET3_INVALIDATE_TLBS_ALL_HUB(all_hub) |
1003                         PACKET3_INVALIDATE_TLBS_PASID(pasid) |
1004                         PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type));
1005 }
1006
1007 static const struct kiq_pm4_funcs gfx_v9_0_kiq_pm4_funcs = {
1008         .kiq_set_resources = gfx_v9_0_kiq_set_resources,
1009         .kiq_map_queues = gfx_v9_0_kiq_map_queues,
1010         .kiq_unmap_queues = gfx_v9_0_kiq_unmap_queues,
1011         .kiq_query_status = gfx_v9_0_kiq_query_status,
1012         .kiq_invalidate_tlbs = gfx_v9_0_kiq_invalidate_tlbs,
1013         .set_resources_size = 8,
1014         .map_queues_size = 7,
1015         .unmap_queues_size = 6,
1016         .query_status_size = 7,
1017         .invalidate_tlbs_size = 2,
1018 };
1019
1020 static void gfx_v9_0_set_kiq_pm4_funcs(struct amdgpu_device *adev)
1021 {
1022         adev->gfx.kiq[0].pmf = &gfx_v9_0_kiq_pm4_funcs;
1023 }
1024
1025 static void gfx_v9_0_init_golden_registers(struct amdgpu_device *adev)
1026 {
1027         switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
1028         case IP_VERSION(9, 0, 1):
1029                 soc15_program_register_sequence(adev,
1030                                                 golden_settings_gc_9_0,
1031                                                 ARRAY_SIZE(golden_settings_gc_9_0));
1032                 soc15_program_register_sequence(adev,
1033                                                 golden_settings_gc_9_0_vg10,
1034                                                 ARRAY_SIZE(golden_settings_gc_9_0_vg10));
1035                 break;
1036         case IP_VERSION(9, 2, 1):
1037                 soc15_program_register_sequence(adev,
1038                                                 golden_settings_gc_9_2_1,
1039                                                 ARRAY_SIZE(golden_settings_gc_9_2_1));
1040                 soc15_program_register_sequence(adev,
1041                                                 golden_settings_gc_9_2_1_vg12,
1042                                                 ARRAY_SIZE(golden_settings_gc_9_2_1_vg12));
1043                 break;
1044         case IP_VERSION(9, 4, 0):
1045                 soc15_program_register_sequence(adev,
1046                                                 golden_settings_gc_9_0,
1047                                                 ARRAY_SIZE(golden_settings_gc_9_0));
1048                 soc15_program_register_sequence(adev,
1049                                                 golden_settings_gc_9_0_vg20,
1050                                                 ARRAY_SIZE(golden_settings_gc_9_0_vg20));
1051                 break;
1052         case IP_VERSION(9, 4, 1):
1053                 soc15_program_register_sequence(adev,
1054                                                 golden_settings_gc_9_4_1_arct,
1055                                                 ARRAY_SIZE(golden_settings_gc_9_4_1_arct));
1056                 break;
1057         case IP_VERSION(9, 2, 2):
1058         case IP_VERSION(9, 1, 0):
1059                 soc15_program_register_sequence(adev, golden_settings_gc_9_1,
1060                                                 ARRAY_SIZE(golden_settings_gc_9_1));
1061                 if (adev->apu_flags & AMD_APU_IS_RAVEN2)
1062                         soc15_program_register_sequence(adev,
1063                                                         golden_settings_gc_9_1_rv2,
1064                                                         ARRAY_SIZE(golden_settings_gc_9_1_rv2));
1065                 else
1066                         soc15_program_register_sequence(adev,
1067                                                         golden_settings_gc_9_1_rv1,
1068                                                         ARRAY_SIZE(golden_settings_gc_9_1_rv1));
1069                 break;
1070          case IP_VERSION(9, 3, 0):
1071                 soc15_program_register_sequence(adev,
1072                                                 golden_settings_gc_9_1_rn,
1073                                                 ARRAY_SIZE(golden_settings_gc_9_1_rn));
1074                 return; /* for renoir, don't need common goldensetting */
1075         case IP_VERSION(9, 4, 2):
1076                 gfx_v9_4_2_init_golden_registers(adev,
1077                                                  adev->smuio.funcs->get_die_id(adev));
1078                 break;
1079         default:
1080                 break;
1081         }
1082
1083         if ((amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 4, 1)) &&
1084             (amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 4, 2)))
1085                 soc15_program_register_sequence(adev, golden_settings_gc_9_x_common,
1086                                                 (const u32)ARRAY_SIZE(golden_settings_gc_9_x_common));
1087 }
1088
1089 static void gfx_v9_0_write_data_to_reg(struct amdgpu_ring *ring, int eng_sel,
1090                                        bool wc, uint32_t reg, uint32_t val)
1091 {
1092         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
1093         amdgpu_ring_write(ring, WRITE_DATA_ENGINE_SEL(eng_sel) |
1094                                 WRITE_DATA_DST_SEL(0) |
1095                                 (wc ? WR_CONFIRM : 0));
1096         amdgpu_ring_write(ring, reg);
1097         amdgpu_ring_write(ring, 0);
1098         amdgpu_ring_write(ring, val);
1099 }
1100
1101 static void gfx_v9_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel,
1102                                   int mem_space, int opt, uint32_t addr0,
1103                                   uint32_t addr1, uint32_t ref, uint32_t mask,
1104                                   uint32_t inv)
1105 {
1106         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
1107         amdgpu_ring_write(ring,
1108                                  /* memory (1) or register (0) */
1109                                  (WAIT_REG_MEM_MEM_SPACE(mem_space) |
1110                                  WAIT_REG_MEM_OPERATION(opt) | /* wait */
1111                                  WAIT_REG_MEM_FUNCTION(3) |  /* equal */
1112                                  WAIT_REG_MEM_ENGINE(eng_sel)));
1113
1114         if (mem_space)
1115                 BUG_ON(addr0 & 0x3); /* Dword align */
1116         amdgpu_ring_write(ring, addr0);
1117         amdgpu_ring_write(ring, addr1);
1118         amdgpu_ring_write(ring, ref);
1119         amdgpu_ring_write(ring, mask);
1120         amdgpu_ring_write(ring, inv); /* poll interval */
1121 }
1122
1123 static int gfx_v9_0_ring_test_ring(struct amdgpu_ring *ring)
1124 {
1125         struct amdgpu_device *adev = ring->adev;
1126         uint32_t scratch = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0);
1127         uint32_t tmp = 0;
1128         unsigned i;
1129         int r;
1130
1131         WREG32(scratch, 0xCAFEDEAD);
1132         r = amdgpu_ring_alloc(ring, 3);
1133         if (r)
1134                 return r;
1135
1136         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
1137         amdgpu_ring_write(ring, scratch - PACKET3_SET_UCONFIG_REG_START);
1138         amdgpu_ring_write(ring, 0xDEADBEEF);
1139         amdgpu_ring_commit(ring);
1140
1141         for (i = 0; i < adev->usec_timeout; i++) {
1142                 tmp = RREG32(scratch);
1143                 if (tmp == 0xDEADBEEF)
1144                         break;
1145                 udelay(1);
1146         }
1147
1148         if (i >= adev->usec_timeout)
1149                 r = -ETIMEDOUT;
1150         return r;
1151 }
1152
1153 static int gfx_v9_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
1154 {
1155         struct amdgpu_device *adev = ring->adev;
1156         struct amdgpu_ib ib;
1157         struct dma_fence *f = NULL;
1158
1159         unsigned index;
1160         uint64_t gpu_addr;
1161         uint32_t tmp;
1162         long r;
1163
1164         r = amdgpu_device_wb_get(adev, &index);
1165         if (r)
1166                 return r;
1167
1168         gpu_addr = adev->wb.gpu_addr + (index * 4);
1169         adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
1170         memset(&ib, 0, sizeof(ib));
1171
1172         r = amdgpu_ib_get(adev, NULL, 20, AMDGPU_IB_POOL_DIRECT, &ib);
1173         if (r)
1174                 goto err1;
1175
1176         ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3);
1177         ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
1178         ib.ptr[2] = lower_32_bits(gpu_addr);
1179         ib.ptr[3] = upper_32_bits(gpu_addr);
1180         ib.ptr[4] = 0xDEADBEEF;
1181         ib.length_dw = 5;
1182
1183         r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
1184         if (r)
1185                 goto err2;
1186
1187         r = dma_fence_wait_timeout(f, false, timeout);
1188         if (r == 0) {
1189                 r = -ETIMEDOUT;
1190                 goto err2;
1191         } else if (r < 0) {
1192                 goto err2;
1193         }
1194
1195         tmp = adev->wb.wb[index];
1196         if (tmp == 0xDEADBEEF)
1197                 r = 0;
1198         else
1199                 r = -EINVAL;
1200
1201 err2:
1202         amdgpu_ib_free(adev, &ib, NULL);
1203         dma_fence_put(f);
1204 err1:
1205         amdgpu_device_wb_free(adev, index);
1206         return r;
1207 }
1208
1209
1210 static void gfx_v9_0_free_microcode(struct amdgpu_device *adev)
1211 {
1212         amdgpu_ucode_release(&adev->gfx.pfp_fw);
1213         amdgpu_ucode_release(&adev->gfx.me_fw);
1214         amdgpu_ucode_release(&adev->gfx.ce_fw);
1215         amdgpu_ucode_release(&adev->gfx.rlc_fw);
1216         amdgpu_ucode_release(&adev->gfx.mec_fw);
1217         amdgpu_ucode_release(&adev->gfx.mec2_fw);
1218
1219         kfree(adev->gfx.rlc.register_list_format);
1220 }
1221
1222 static void gfx_v9_0_check_fw_write_wait(struct amdgpu_device *adev)
1223 {
1224         adev->gfx.me_fw_write_wait = false;
1225         adev->gfx.mec_fw_write_wait = false;
1226
1227         if ((amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 4, 1)) &&
1228             ((adev->gfx.mec_fw_version < 0x000001a5) ||
1229              (adev->gfx.mec_feature_version < 46) ||
1230              (adev->gfx.pfp_fw_version < 0x000000b7) ||
1231              (adev->gfx.pfp_feature_version < 46)))
1232                 DRM_WARN_ONCE("CP firmware version too old, please update!");
1233
1234         switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
1235         case IP_VERSION(9, 0, 1):
1236                 if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1237                     (adev->gfx.me_feature_version >= 42) &&
1238                     (adev->gfx.pfp_fw_version >=  0x000000b1) &&
1239                     (adev->gfx.pfp_feature_version >= 42))
1240                         adev->gfx.me_fw_write_wait = true;
1241
1242                 if ((adev->gfx.mec_fw_version >=  0x00000193) &&
1243                     (adev->gfx.mec_feature_version >= 42))
1244                         adev->gfx.mec_fw_write_wait = true;
1245                 break;
1246         case IP_VERSION(9, 2, 1):
1247                 if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1248                     (adev->gfx.me_feature_version >= 44) &&
1249                     (adev->gfx.pfp_fw_version >=  0x000000b2) &&
1250                     (adev->gfx.pfp_feature_version >= 44))
1251                         adev->gfx.me_fw_write_wait = true;
1252
1253                 if ((adev->gfx.mec_fw_version >=  0x00000196) &&
1254                     (adev->gfx.mec_feature_version >= 44))
1255                         adev->gfx.mec_fw_write_wait = true;
1256                 break;
1257         case IP_VERSION(9, 4, 0):
1258                 if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1259                     (adev->gfx.me_feature_version >= 44) &&
1260                     (adev->gfx.pfp_fw_version >=  0x000000b2) &&
1261                     (adev->gfx.pfp_feature_version >= 44))
1262                         adev->gfx.me_fw_write_wait = true;
1263
1264                 if ((adev->gfx.mec_fw_version >=  0x00000197) &&
1265                     (adev->gfx.mec_feature_version >= 44))
1266                         adev->gfx.mec_fw_write_wait = true;
1267                 break;
1268         case IP_VERSION(9, 1, 0):
1269         case IP_VERSION(9, 2, 2):
1270                 if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1271                     (adev->gfx.me_feature_version >= 42) &&
1272                     (adev->gfx.pfp_fw_version >=  0x000000b1) &&
1273                     (adev->gfx.pfp_feature_version >= 42))
1274                         adev->gfx.me_fw_write_wait = true;
1275
1276                 if ((adev->gfx.mec_fw_version >=  0x00000192) &&
1277                     (adev->gfx.mec_feature_version >= 42))
1278                         adev->gfx.mec_fw_write_wait = true;
1279                 break;
1280         default:
1281                 adev->gfx.me_fw_write_wait = true;
1282                 adev->gfx.mec_fw_write_wait = true;
1283                 break;
1284         }
1285 }
1286
1287 struct amdgpu_gfxoff_quirk {
1288         u16 chip_vendor;
1289         u16 chip_device;
1290         u16 subsys_vendor;
1291         u16 subsys_device;
1292         u8 revision;
1293 };
1294
1295 static const struct amdgpu_gfxoff_quirk amdgpu_gfxoff_quirk_list[] = {
1296         /* https://bugzilla.kernel.org/show_bug.cgi?id=204689 */
1297         { 0x1002, 0x15dd, 0x1002, 0x15dd, 0xc8 },
1298         /* https://bugzilla.kernel.org/show_bug.cgi?id=207171 */
1299         { 0x1002, 0x15dd, 0x103c, 0x83e7, 0xd3 },
1300         /* GFXOFF is unstable on C6 parts with a VBIOS 113-RAVEN-114 */
1301         { 0x1002, 0x15dd, 0x1002, 0x15dd, 0xc6 },
1302         /* Apple MacBook Pro (15-inch, 2019) Radeon Pro Vega 20 4 GB */
1303         { 0x1002, 0x69af, 0x106b, 0x019a, 0xc0 },
1304         { 0, 0, 0, 0, 0 },
1305 };
1306
1307 static bool gfx_v9_0_should_disable_gfxoff(struct pci_dev *pdev)
1308 {
1309         const struct amdgpu_gfxoff_quirk *p = amdgpu_gfxoff_quirk_list;
1310
1311         while (p && p->chip_device != 0) {
1312                 if (pdev->vendor == p->chip_vendor &&
1313                     pdev->device == p->chip_device &&
1314                     pdev->subsystem_vendor == p->subsys_vendor &&
1315                     pdev->subsystem_device == p->subsys_device &&
1316                     pdev->revision == p->revision) {
1317                         return true;
1318                 }
1319                 ++p;
1320         }
1321         return false;
1322 }
1323
1324 static bool is_raven_kicker(struct amdgpu_device *adev)
1325 {
1326         if (adev->pm.fw_version >= 0x41e2b)
1327                 return true;
1328         else
1329                 return false;
1330 }
1331
1332 static bool check_if_enlarge_doorbell_range(struct amdgpu_device *adev)
1333 {
1334         if ((amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 3, 0)) &&
1335             (adev->gfx.me_fw_version >= 0x000000a5) &&
1336             (adev->gfx.me_feature_version >= 52))
1337                 return true;
1338         else
1339                 return false;
1340 }
1341
1342 static void gfx_v9_0_check_if_need_gfxoff(struct amdgpu_device *adev)
1343 {
1344         if (gfx_v9_0_should_disable_gfxoff(adev->pdev))
1345                 adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
1346
1347         switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
1348         case IP_VERSION(9, 0, 1):
1349         case IP_VERSION(9, 2, 1):
1350         case IP_VERSION(9, 4, 0):
1351                 break;
1352         case IP_VERSION(9, 2, 2):
1353         case IP_VERSION(9, 1, 0):
1354                 if (!((adev->apu_flags & AMD_APU_IS_RAVEN2) ||
1355                       (adev->apu_flags & AMD_APU_IS_PICASSO)) &&
1356                     ((!is_raven_kicker(adev) &&
1357                       adev->gfx.rlc_fw_version < 531) ||
1358                      (adev->gfx.rlc_feature_version < 1) ||
1359                      !adev->gfx.rlc.is_rlc_v2_1))
1360                         adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
1361
1362                 if (adev->pm.pp_feature & PP_GFXOFF_MASK)
1363                         adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG |
1364                                 AMD_PG_SUPPORT_CP |
1365                                 AMD_PG_SUPPORT_RLC_SMU_HS;
1366                 break;
1367         case IP_VERSION(9, 3, 0):
1368                 if (adev->pm.pp_feature & PP_GFXOFF_MASK)
1369                         adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG |
1370                                 AMD_PG_SUPPORT_CP |
1371                                 AMD_PG_SUPPORT_RLC_SMU_HS;
1372                 break;
1373         default:
1374                 break;
1375         }
1376 }
1377
1378 static int gfx_v9_0_init_cp_gfx_microcode(struct amdgpu_device *adev,
1379                                           char *chip_name)
1380 {
1381         int err;
1382
1383         err = amdgpu_ucode_request(adev, &adev->gfx.pfp_fw,
1384                                    "amdgpu/%s_pfp.bin", chip_name);
1385         if (err)
1386                 goto out;
1387         amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_PFP);
1388
1389         err = amdgpu_ucode_request(adev, &adev->gfx.me_fw,
1390                                    "amdgpu/%s_me.bin", chip_name);
1391         if (err)
1392                 goto out;
1393         amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_ME);
1394
1395         err = amdgpu_ucode_request(adev, &adev->gfx.ce_fw,
1396                                    "amdgpu/%s_ce.bin", chip_name);
1397         if (err)
1398                 goto out;
1399         amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_CE);
1400
1401 out:
1402         if (err) {
1403                 amdgpu_ucode_release(&adev->gfx.pfp_fw);
1404                 amdgpu_ucode_release(&adev->gfx.me_fw);
1405                 amdgpu_ucode_release(&adev->gfx.ce_fw);
1406         }
1407         return err;
1408 }
1409
1410 static int gfx_v9_0_init_rlc_microcode(struct amdgpu_device *adev,
1411                                        char *chip_name)
1412 {
1413         int err;
1414         const struct rlc_firmware_header_v2_0 *rlc_hdr;
1415         uint16_t version_major;
1416         uint16_t version_minor;
1417         uint32_t smu_version;
1418
1419         /*
1420          * For Picasso && AM4 SOCKET board, we use picasso_rlc_am4.bin
1421          * instead of picasso_rlc.bin.
1422          * Judgment method:
1423          * PCO AM4: revision >= 0xC8 && revision <= 0xCF
1424          *          or revision >= 0xD8 && revision <= 0xDF
1425          * otherwise is PCO FP5
1426          */
1427         if (!strcmp(chip_name, "picasso") &&
1428                 (((adev->pdev->revision >= 0xC8) && (adev->pdev->revision <= 0xCF)) ||
1429                 ((adev->pdev->revision >= 0xD8) && (adev->pdev->revision <= 0xDF))))
1430                 err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw,
1431                                            "amdgpu/%s_rlc_am4.bin", chip_name);
1432         else if (!strcmp(chip_name, "raven") && (amdgpu_pm_load_smu_firmware(adev, &smu_version) == 0) &&
1433                 (smu_version >= 0x41e2b))
1434                 /**
1435                 *SMC is loaded by SBIOS on APU and it's able to get the SMU version directly.
1436                 */
1437                 err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw,
1438                                            "amdgpu/%s_kicker_rlc.bin", chip_name);
1439         else
1440                 err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw,
1441                                            "amdgpu/%s_rlc.bin", chip_name);
1442         if (err)
1443                 goto out;
1444
1445         rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
1446         version_major = le16_to_cpu(rlc_hdr->header.header_version_major);
1447         version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor);
1448         err = amdgpu_gfx_rlc_init_microcode(adev, version_major, version_minor);
1449 out:
1450         if (err)
1451                 amdgpu_ucode_release(&adev->gfx.rlc_fw);
1452
1453         return err;
1454 }
1455
1456 static bool gfx_v9_0_load_mec2_fw_bin_support(struct amdgpu_device *adev)
1457 {
1458         if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 2) ||
1459             amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 1) ||
1460             amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 3, 0))
1461                 return false;
1462
1463         return true;
1464 }
1465
1466 static int gfx_v9_0_init_cp_compute_microcode(struct amdgpu_device *adev,
1467                                               char *chip_name)
1468 {
1469         int err;
1470
1471         if (amdgpu_sriov_vf(adev) && (adev->asic_type == CHIP_ALDEBARAN))
1472                 err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw,
1473                                            "amdgpu/%s_sjt_mec.bin", chip_name);
1474         else
1475                 err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw,
1476                                            "amdgpu/%s_mec.bin", chip_name);
1477         if (err)
1478                 goto out;
1479
1480         amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC1);
1481         amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC1_JT);
1482
1483         if (gfx_v9_0_load_mec2_fw_bin_support(adev)) {
1484                 if (amdgpu_sriov_vf(adev) && (adev->asic_type == CHIP_ALDEBARAN))
1485                         err = amdgpu_ucode_request(adev, &adev->gfx.mec2_fw,
1486                                                    "amdgpu/%s_sjt_mec2.bin", chip_name);
1487                 else
1488                         err = amdgpu_ucode_request(adev, &adev->gfx.mec2_fw,
1489                                                    "amdgpu/%s_mec2.bin", chip_name);
1490                 if (!err) {
1491                         amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC2);
1492                         amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC2_JT);
1493                 } else {
1494                         err = 0;
1495                         amdgpu_ucode_release(&adev->gfx.mec2_fw);
1496                 }
1497         } else {
1498                 adev->gfx.mec2_fw_version = adev->gfx.mec_fw_version;
1499                 adev->gfx.mec2_feature_version = adev->gfx.mec_feature_version;
1500         }
1501
1502         gfx_v9_0_check_if_need_gfxoff(adev);
1503         gfx_v9_0_check_fw_write_wait(adev);
1504
1505 out:
1506         if (err)
1507                 amdgpu_ucode_release(&adev->gfx.mec_fw);
1508         return err;
1509 }
1510
1511 static int gfx_v9_0_init_microcode(struct amdgpu_device *adev)
1512 {
1513         char ucode_prefix[30];
1514         int r;
1515
1516         DRM_DEBUG("\n");
1517         amdgpu_ucode_ip_version_decode(adev, GC_HWIP, ucode_prefix, sizeof(ucode_prefix));
1518
1519         /* No CPG in Arcturus */
1520         if (adev->gfx.num_gfx_rings) {
1521                 r = gfx_v9_0_init_cp_gfx_microcode(adev, ucode_prefix);
1522                 if (r)
1523                         return r;
1524         }
1525
1526         r = gfx_v9_0_init_rlc_microcode(adev, ucode_prefix);
1527         if (r)
1528                 return r;
1529
1530         r = gfx_v9_0_init_cp_compute_microcode(adev, ucode_prefix);
1531         if (r)
1532                 return r;
1533
1534         return r;
1535 }
1536
1537 static u32 gfx_v9_0_get_csb_size(struct amdgpu_device *adev)
1538 {
1539         u32 count = 0;
1540         const struct cs_section_def *sect = NULL;
1541         const struct cs_extent_def *ext = NULL;
1542
1543         /* begin clear state */
1544         count += 2;
1545         /* context control state */
1546         count += 3;
1547
1548         for (sect = gfx9_cs_data; sect->section != NULL; ++sect) {
1549                 for (ext = sect->section; ext->extent != NULL; ++ext) {
1550                         if (sect->id == SECT_CONTEXT)
1551                                 count += 2 + ext->reg_count;
1552                         else
1553                                 return 0;
1554                 }
1555         }
1556
1557         /* end clear state */
1558         count += 2;
1559         /* clear state */
1560         count += 2;
1561
1562         return count;
1563 }
1564
1565 static void gfx_v9_0_get_csb_buffer(struct amdgpu_device *adev,
1566                                     volatile u32 *buffer)
1567 {
1568         u32 count = 0, i;
1569         const struct cs_section_def *sect = NULL;
1570         const struct cs_extent_def *ext = NULL;
1571
1572         if (adev->gfx.rlc.cs_data == NULL)
1573                 return;
1574         if (buffer == NULL)
1575                 return;
1576
1577         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1578         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1579
1580         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
1581         buffer[count++] = cpu_to_le32(0x80000000);
1582         buffer[count++] = cpu_to_le32(0x80000000);
1583
1584         for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
1585                 for (ext = sect->section; ext->extent != NULL; ++ext) {
1586                         if (sect->id == SECT_CONTEXT) {
1587                                 buffer[count++] =
1588                                         cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
1589                                 buffer[count++] = cpu_to_le32(ext->reg_index -
1590                                                 PACKET3_SET_CONTEXT_REG_START);
1591                                 for (i = 0; i < ext->reg_count; i++)
1592                                         buffer[count++] = cpu_to_le32(ext->extent[i]);
1593                         } else {
1594                                 return;
1595                         }
1596                 }
1597         }
1598
1599         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1600         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
1601
1602         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
1603         buffer[count++] = cpu_to_le32(0);
1604 }
1605
1606 static void gfx_v9_0_init_always_on_cu_mask(struct amdgpu_device *adev)
1607 {
1608         struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
1609         uint32_t pg_always_on_cu_num = 2;
1610         uint32_t always_on_cu_num;
1611         uint32_t i, j, k;
1612         uint32_t mask, cu_bitmap, counter;
1613
1614         if (adev->flags & AMD_IS_APU)
1615                 always_on_cu_num = 4;
1616         else if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 2, 1))
1617                 always_on_cu_num = 8;
1618         else
1619                 always_on_cu_num = 12;
1620
1621         mutex_lock(&adev->grbm_idx_mutex);
1622         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
1623                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
1624                         mask = 1;
1625                         cu_bitmap = 0;
1626                         counter = 0;
1627                         amdgpu_gfx_select_se_sh(adev, i, j, 0xffffffff, 0);
1628
1629                         for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
1630                                 if (cu_info->bitmap[0][i][j] & mask) {
1631                                         if (counter == pg_always_on_cu_num)
1632                                                 WREG32_SOC15(GC, 0, mmRLC_PG_ALWAYS_ON_CU_MASK, cu_bitmap);
1633                                         if (counter < always_on_cu_num)
1634                                                 cu_bitmap |= mask;
1635                                         else
1636                                                 break;
1637                                         counter++;
1638                                 }
1639                                 mask <<= 1;
1640                         }
1641
1642                         WREG32_SOC15(GC, 0, mmRLC_LB_ALWAYS_ACTIVE_CU_MASK, cu_bitmap);
1643                         cu_info->ao_cu_bitmap[i][j] = cu_bitmap;
1644                 }
1645         }
1646         amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
1647         mutex_unlock(&adev->grbm_idx_mutex);
1648 }
1649
1650 static void gfx_v9_0_init_lbpw(struct amdgpu_device *adev)
1651 {
1652         uint32_t data;
1653
1654         /* set mmRLC_LB_THR_CONFIG_1/2/3/4 */
1655         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F);
1656         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x0333A5A7);
1657         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077);
1658         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x30 | 0x40 << 8 | 0x02FA << 16));
1659
1660         /* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */
1661         WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000);
1662
1663         /* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */
1664         WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000500);
1665
1666         mutex_lock(&adev->grbm_idx_mutex);
1667         /* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/
1668         amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
1669         WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff);
1670
1671         /* set mmRLC_LB_PARAMS = 0x003F_1006 */
1672         data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003);
1673         data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010);
1674         data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F);
1675         WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data);
1676
1677         /* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */
1678         data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7);
1679         data &= 0x0000FFFF;
1680         data |= 0x00C00000;
1681         WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data);
1682
1683         /*
1684          * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xF (4 CUs AON for Raven),
1685          * programmed in gfx_v9_0_init_always_on_cu_mask()
1686          */
1687
1688         /* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved,
1689          * but used for RLC_LB_CNTL configuration */
1690         data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK;
1691         data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09);
1692         data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000);
1693         WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data);
1694         mutex_unlock(&adev->grbm_idx_mutex);
1695
1696         gfx_v9_0_init_always_on_cu_mask(adev);
1697 }
1698
1699 static void gfx_v9_4_init_lbpw(struct amdgpu_device *adev)
1700 {
1701         uint32_t data;
1702
1703         /* set mmRLC_LB_THR_CONFIG_1/2/3/4 */
1704         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F);
1705         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x033388F8);
1706         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077);
1707         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x10 | 0x27 << 8 | 0x02FA << 16));
1708
1709         /* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */
1710         WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000);
1711
1712         /* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */
1713         WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000800);
1714
1715         mutex_lock(&adev->grbm_idx_mutex);
1716         /* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/
1717         amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
1718         WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff);
1719
1720         /* set mmRLC_LB_PARAMS = 0x003F_1006 */
1721         data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003);
1722         data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010);
1723         data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F);
1724         WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data);
1725
1726         /* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */
1727         data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7);
1728         data &= 0x0000FFFF;
1729         data |= 0x00C00000;
1730         WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data);
1731
1732         /*
1733          * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xFFF (12 CUs AON),
1734          * programmed in gfx_v9_0_init_always_on_cu_mask()
1735          */
1736
1737         /* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved,
1738          * but used for RLC_LB_CNTL configuration */
1739         data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK;
1740         data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09);
1741         data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000);
1742         WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data);
1743         mutex_unlock(&adev->grbm_idx_mutex);
1744
1745         gfx_v9_0_init_always_on_cu_mask(adev);
1746 }
1747
1748 static void gfx_v9_0_enable_lbpw(struct amdgpu_device *adev, bool enable)
1749 {
1750         WREG32_FIELD15(GC, 0, RLC_LB_CNTL, LOAD_BALANCE_ENABLE, enable ? 1 : 0);
1751 }
1752
1753 static int gfx_v9_0_cp_jump_table_num(struct amdgpu_device *adev)
1754 {
1755         if (gfx_v9_0_load_mec2_fw_bin_support(adev))
1756                 return 5;
1757         else
1758                 return 4;
1759 }
1760
1761 static void gfx_v9_0_init_rlcg_reg_access_ctrl(struct amdgpu_device *adev)
1762 {
1763         struct amdgpu_rlcg_reg_access_ctrl *reg_access_ctrl;
1764
1765         reg_access_ctrl = &adev->gfx.rlc.reg_access_ctrl[0];
1766         reg_access_ctrl->scratch_reg0 = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0);
1767         reg_access_ctrl->scratch_reg1 = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG1);
1768         reg_access_ctrl->scratch_reg2 = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG2);
1769         reg_access_ctrl->scratch_reg3 = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG3);
1770         reg_access_ctrl->grbm_cntl = SOC15_REG_OFFSET(GC, 0, mmGRBM_GFX_CNTL);
1771         reg_access_ctrl->grbm_idx = SOC15_REG_OFFSET(GC, 0, mmGRBM_GFX_INDEX);
1772         reg_access_ctrl->spare_int = SOC15_REG_OFFSET(GC, 0, mmRLC_SPARE_INT);
1773         adev->gfx.rlc.rlcg_reg_access_supported = true;
1774 }
1775
1776 static int gfx_v9_0_rlc_init(struct amdgpu_device *adev)
1777 {
1778         const struct cs_section_def *cs_data;
1779         int r;
1780
1781         adev->gfx.rlc.cs_data = gfx9_cs_data;
1782
1783         cs_data = adev->gfx.rlc.cs_data;
1784
1785         if (cs_data) {
1786                 /* init clear state block */
1787                 r = amdgpu_gfx_rlc_init_csb(adev);
1788                 if (r)
1789                         return r;
1790         }
1791
1792         if (adev->flags & AMD_IS_APU) {
1793                 /* TODO: double check the cp_table_size for RV */
1794                 adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
1795                 r = amdgpu_gfx_rlc_init_cpt(adev);
1796                 if (r)
1797                         return r;
1798         }
1799
1800         return 0;
1801 }
1802
1803 static void gfx_v9_0_mec_fini(struct amdgpu_device *adev)
1804 {
1805         amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
1806         amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_obj, NULL, NULL);
1807 }
1808
1809 static int gfx_v9_0_mec_init(struct amdgpu_device *adev)
1810 {
1811         int r;
1812         u32 *hpd;
1813         const __le32 *fw_data;
1814         unsigned fw_size;
1815         u32 *fw;
1816         size_t mec_hpd_size;
1817
1818         const struct gfx_firmware_header_v1_0 *mec_hdr;
1819
1820         bitmap_zero(adev->gfx.mec_bitmap[0].queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
1821
1822         /* take ownership of the relevant compute queues */
1823         amdgpu_gfx_compute_queue_acquire(adev);
1824         mec_hpd_size = adev->gfx.num_compute_rings * GFX9_MEC_HPD_SIZE;
1825         if (mec_hpd_size) {
1826                 r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
1827                                               AMDGPU_GEM_DOMAIN_VRAM |
1828                                               AMDGPU_GEM_DOMAIN_GTT,
1829                                               &adev->gfx.mec.hpd_eop_obj,
1830                                               &adev->gfx.mec.hpd_eop_gpu_addr,
1831                                               (void **)&hpd);
1832                 if (r) {
1833                         dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
1834                         gfx_v9_0_mec_fini(adev);
1835                         return r;
1836                 }
1837
1838                 memset(hpd, 0, mec_hpd_size);
1839
1840                 amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
1841                 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1842         }
1843
1844         mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1845
1846         fw_data = (const __le32 *)
1847                 (adev->gfx.mec_fw->data +
1848                  le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
1849         fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes);
1850
1851         r = amdgpu_bo_create_reserved(adev, mec_hdr->header.ucode_size_bytes,
1852                                       PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
1853                                       &adev->gfx.mec.mec_fw_obj,
1854                                       &adev->gfx.mec.mec_fw_gpu_addr,
1855                                       (void **)&fw);
1856         if (r) {
1857                 dev_warn(adev->dev, "(%d) create mec firmware bo failed\n", r);
1858                 gfx_v9_0_mec_fini(adev);
1859                 return r;
1860         }
1861
1862         memcpy(fw, fw_data, fw_size);
1863
1864         amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj);
1865         amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj);
1866
1867         return 0;
1868 }
1869
1870 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
1871 {
1872         WREG32_SOC15_RLC(GC, 0, mmSQ_IND_INDEX,
1873                 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
1874                 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
1875                 (address << SQ_IND_INDEX__INDEX__SHIFT) |
1876                 (SQ_IND_INDEX__FORCE_READ_MASK));
1877         return RREG32_SOC15(GC, 0, mmSQ_IND_DATA);
1878 }
1879
1880 static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
1881                            uint32_t wave, uint32_t thread,
1882                            uint32_t regno, uint32_t num, uint32_t *out)
1883 {
1884         WREG32_SOC15_RLC(GC, 0, mmSQ_IND_INDEX,
1885                 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
1886                 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
1887                 (regno << SQ_IND_INDEX__INDEX__SHIFT) |
1888                 (thread << SQ_IND_INDEX__THREAD_ID__SHIFT) |
1889                 (SQ_IND_INDEX__FORCE_READ_MASK) |
1890                 (SQ_IND_INDEX__AUTO_INCR_MASK));
1891         while (num--)
1892                 *(out++) = RREG32_SOC15(GC, 0, mmSQ_IND_DATA);
1893 }
1894
1895 static void gfx_v9_0_read_wave_data(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
1896 {
1897         /* type 1 wave data */
1898         dst[(*no_fields)++] = 1;
1899         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS);
1900         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO);
1901         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI);
1902         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO);
1903         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI);
1904         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID);
1905         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0);
1906         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1);
1907         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC);
1908         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC);
1909         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS);
1910         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS);
1911         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0);
1912         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0);
1913         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_MODE);
1914 }
1915
1916 static void gfx_v9_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd,
1917                                      uint32_t wave, uint32_t start,
1918                                      uint32_t size, uint32_t *dst)
1919 {
1920         wave_read_regs(
1921                 adev, simd, wave, 0,
1922                 start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
1923 }
1924
1925 static void gfx_v9_0_read_wave_vgprs(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd,
1926                                      uint32_t wave, uint32_t thread,
1927                                      uint32_t start, uint32_t size,
1928                                      uint32_t *dst)
1929 {
1930         wave_read_regs(
1931                 adev, simd, wave, thread,
1932                 start + SQIND_WAVE_VGPRS_OFFSET, size, dst);
1933 }
1934
1935 static void gfx_v9_0_select_me_pipe_q(struct amdgpu_device *adev,
1936                                   u32 me, u32 pipe, u32 q, u32 vm, u32 xcc_id)
1937 {
1938         soc15_grbm_select(adev, me, pipe, q, vm, 0);
1939 }
1940
1941 static const struct amdgpu_gfx_funcs gfx_v9_0_gfx_funcs = {
1942         .get_gpu_clock_counter = &gfx_v9_0_get_gpu_clock_counter,
1943         .select_se_sh = &gfx_v9_0_select_se_sh,
1944         .read_wave_data = &gfx_v9_0_read_wave_data,
1945         .read_wave_sgprs = &gfx_v9_0_read_wave_sgprs,
1946         .read_wave_vgprs = &gfx_v9_0_read_wave_vgprs,
1947         .select_me_pipe_q = &gfx_v9_0_select_me_pipe_q,
1948 };
1949
1950 const struct amdgpu_ras_block_hw_ops  gfx_v9_0_ras_ops = {
1951                 .ras_error_inject = &gfx_v9_0_ras_error_inject,
1952                 .query_ras_error_count = &gfx_v9_0_query_ras_error_count,
1953                 .reset_ras_error_count = &gfx_v9_0_reset_ras_error_count,
1954 };
1955
1956 static struct amdgpu_gfx_ras gfx_v9_0_ras = {
1957         .ras_block = {
1958                 .hw_ops = &gfx_v9_0_ras_ops,
1959         },
1960 };
1961
1962 static int gfx_v9_0_gpu_early_init(struct amdgpu_device *adev)
1963 {
1964         u32 gb_addr_config;
1965         int err;
1966
1967         switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
1968         case IP_VERSION(9, 0, 1):
1969                 adev->gfx.config.max_hw_contexts = 8;
1970                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1971                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1972                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1973                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1974                 gb_addr_config = VEGA10_GB_ADDR_CONFIG_GOLDEN;
1975                 break;
1976         case IP_VERSION(9, 2, 1):
1977                 adev->gfx.config.max_hw_contexts = 8;
1978                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1979                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1980                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1981                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1982                 gb_addr_config = VEGA12_GB_ADDR_CONFIG_GOLDEN;
1983                 DRM_INFO("fix gfx.config for vega12\n");
1984                 break;
1985         case IP_VERSION(9, 4, 0):
1986                 adev->gfx.ras = &gfx_v9_0_ras;
1987                 adev->gfx.config.max_hw_contexts = 8;
1988                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1989                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1990                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1991                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1992                 gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
1993                 gb_addr_config &= ~0xf3e777ff;
1994                 gb_addr_config |= 0x22014042;
1995                 /* check vbios table if gpu info is not available */
1996                 err = amdgpu_atomfirmware_get_gfx_info(adev);
1997                 if (err)
1998                         return err;
1999                 break;
2000         case IP_VERSION(9, 2, 2):
2001         case IP_VERSION(9, 1, 0):
2002                 adev->gfx.config.max_hw_contexts = 8;
2003                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2004                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2005                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2006                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2007                 if (adev->apu_flags & AMD_APU_IS_RAVEN2)
2008                         gb_addr_config = RAVEN2_GB_ADDR_CONFIG_GOLDEN;
2009                 else
2010                         gb_addr_config = RAVEN_GB_ADDR_CONFIG_GOLDEN;
2011                 break;
2012         case IP_VERSION(9, 4, 1):
2013                 adev->gfx.ras = &gfx_v9_4_ras;
2014                 adev->gfx.config.max_hw_contexts = 8;
2015                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2016                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2017                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2018                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2019                 gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
2020                 gb_addr_config &= ~0xf3e777ff;
2021                 gb_addr_config |= 0x22014042;
2022                 break;
2023         case IP_VERSION(9, 3, 0):
2024                 adev->gfx.config.max_hw_contexts = 8;
2025                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2026                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2027                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x80;
2028                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2029                 gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
2030                 gb_addr_config &= ~0xf3e777ff;
2031                 gb_addr_config |= 0x22010042;
2032                 break;
2033         case IP_VERSION(9, 4, 2):
2034                 adev->gfx.ras = &gfx_v9_4_2_ras;
2035                 adev->gfx.config.max_hw_contexts = 8;
2036                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2037                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2038                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2039                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2040                 gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
2041                 gb_addr_config &= ~0xf3e777ff;
2042                 gb_addr_config |= 0x22014042;
2043                 /* check vbios table if gpu info is not available */
2044                 err = amdgpu_atomfirmware_get_gfx_info(adev);
2045                 if (err)
2046                         return err;
2047                 break;
2048         default:
2049                 BUG();
2050                 break;
2051         }
2052
2053         adev->gfx.config.gb_addr_config = gb_addr_config;
2054
2055         adev->gfx.config.gb_addr_config_fields.num_pipes = 1 <<
2056                         REG_GET_FIELD(
2057                                         adev->gfx.config.gb_addr_config,
2058                                         GB_ADDR_CONFIG,
2059                                         NUM_PIPES);
2060
2061         adev->gfx.config.max_tile_pipes =
2062                 adev->gfx.config.gb_addr_config_fields.num_pipes;
2063
2064         adev->gfx.config.gb_addr_config_fields.num_banks = 1 <<
2065                         REG_GET_FIELD(
2066                                         adev->gfx.config.gb_addr_config,
2067                                         GB_ADDR_CONFIG,
2068                                         NUM_BANKS);
2069         adev->gfx.config.gb_addr_config_fields.max_compress_frags = 1 <<
2070                         REG_GET_FIELD(
2071                                         adev->gfx.config.gb_addr_config,
2072                                         GB_ADDR_CONFIG,
2073                                         MAX_COMPRESSED_FRAGS);
2074         adev->gfx.config.gb_addr_config_fields.num_rb_per_se = 1 <<
2075                         REG_GET_FIELD(
2076                                         adev->gfx.config.gb_addr_config,
2077                                         GB_ADDR_CONFIG,
2078                                         NUM_RB_PER_SE);
2079         adev->gfx.config.gb_addr_config_fields.num_se = 1 <<
2080                         REG_GET_FIELD(
2081                                         adev->gfx.config.gb_addr_config,
2082                                         GB_ADDR_CONFIG,
2083                                         NUM_SHADER_ENGINES);
2084         adev->gfx.config.gb_addr_config_fields.pipe_interleave_size = 1 << (8 +
2085                         REG_GET_FIELD(
2086                                         adev->gfx.config.gb_addr_config,
2087                                         GB_ADDR_CONFIG,
2088                                         PIPE_INTERLEAVE_SIZE));
2089
2090         return 0;
2091 }
2092
2093 static int gfx_v9_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
2094                                       int mec, int pipe, int queue)
2095 {
2096         unsigned irq_type;
2097         struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
2098         unsigned int hw_prio;
2099
2100         ring = &adev->gfx.compute_ring[ring_id];
2101
2102         /* mec0 is me1 */
2103         ring->me = mec + 1;
2104         ring->pipe = pipe;
2105         ring->queue = queue;
2106
2107         ring->ring_obj = NULL;
2108         ring->use_doorbell = true;
2109         ring->doorbell_index = (adev->doorbell_index.mec_ring0 + ring_id) << 1;
2110         ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
2111                                 + (ring_id * GFX9_MEC_HPD_SIZE);
2112         ring->vm_hub = AMDGPU_GFXHUB(0);
2113         sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
2114
2115         irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
2116                 + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
2117                 + ring->pipe;
2118         hw_prio = amdgpu_gfx_is_high_priority_compute_queue(adev, ring) ?
2119                         AMDGPU_RING_PRIO_2 : AMDGPU_RING_PRIO_DEFAULT;
2120         /* type-2 packets are deprecated on MEC, use type-3 instead */
2121         return amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, irq_type,
2122                                 hw_prio, NULL);
2123 }
2124
2125 static void gfx_v9_0_alloc_ip_dump(struct amdgpu_device *adev)
2126 {
2127         uint32_t reg_count = ARRAY_SIZE(gc_reg_list_9);
2128         uint32_t *ptr;
2129         uint32_t inst;
2130
2131         ptr = kcalloc(reg_count, sizeof(uint32_t), GFP_KERNEL);
2132         if (ptr == NULL) {
2133                 DRM_ERROR("Failed to allocate memory for GFX IP Dump\n");
2134                 adev->gfx.ip_dump_core = NULL;
2135         } else {
2136                 adev->gfx.ip_dump_core = ptr;
2137         }
2138
2139         /* Allocate memory for compute queue registers for all the instances */
2140         reg_count = ARRAY_SIZE(gc_cp_reg_list_9);
2141         inst = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe_per_mec *
2142                 adev->gfx.mec.num_queue_per_pipe;
2143
2144         ptr = kcalloc(reg_count * inst, sizeof(uint32_t), GFP_KERNEL);
2145         if (ptr == NULL) {
2146                 DRM_ERROR("Failed to allocate memory for Compute Queues IP Dump\n");
2147                 adev->gfx.ip_dump_compute_queues = NULL;
2148         } else {
2149                 adev->gfx.ip_dump_compute_queues = ptr;
2150         }
2151 }
2152
2153 static int gfx_v9_0_sw_init(void *handle)
2154 {
2155         int i, j, k, r, ring_id;
2156         int xcc_id = 0;
2157         struct amdgpu_ring *ring;
2158         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2159         unsigned int hw_prio;
2160
2161         switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
2162         case IP_VERSION(9, 0, 1):
2163         case IP_VERSION(9, 2, 1):
2164         case IP_VERSION(9, 4, 0):
2165         case IP_VERSION(9, 2, 2):
2166         case IP_VERSION(9, 1, 0):
2167         case IP_VERSION(9, 4, 1):
2168         case IP_VERSION(9, 3, 0):
2169         case IP_VERSION(9, 4, 2):
2170                 adev->gfx.mec.num_mec = 2;
2171                 break;
2172         default:
2173                 adev->gfx.mec.num_mec = 1;
2174                 break;
2175         }
2176
2177         adev->gfx.mec.num_pipe_per_mec = 4;
2178         adev->gfx.mec.num_queue_per_pipe = 8;
2179
2180         /* EOP Event */
2181         r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_EOP_INTERRUPT, &adev->gfx.eop_irq);
2182         if (r)
2183                 return r;
2184
2185         /* Privileged reg */
2186         r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_REG_FAULT,
2187                               &adev->gfx.priv_reg_irq);
2188         if (r)
2189                 return r;
2190
2191         /* Privileged inst */
2192         r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_INSTR_FAULT,
2193                               &adev->gfx.priv_inst_irq);
2194         if (r)
2195                 return r;
2196
2197         /* ECC error */
2198         r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_ECC_ERROR,
2199                               &adev->gfx.cp_ecc_error_irq);
2200         if (r)
2201                 return r;
2202
2203         /* FUE error */
2204         r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_FUE_ERROR,
2205                               &adev->gfx.cp_ecc_error_irq);
2206         if (r)
2207                 return r;
2208
2209         adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
2210
2211         if (adev->gfx.rlc.funcs) {
2212                 if (adev->gfx.rlc.funcs->init) {
2213                         r = adev->gfx.rlc.funcs->init(adev);
2214                         if (r) {
2215                                 dev_err(adev->dev, "Failed to init rlc BOs!\n");
2216                                 return r;
2217                         }
2218                 }
2219         }
2220
2221         r = gfx_v9_0_mec_init(adev);
2222         if (r) {
2223                 DRM_ERROR("Failed to init MEC BOs!\n");
2224                 return r;
2225         }
2226
2227         /* set up the gfx ring */
2228         for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
2229                 ring = &adev->gfx.gfx_ring[i];
2230                 ring->ring_obj = NULL;
2231                 if (!i)
2232                         sprintf(ring->name, "gfx");
2233                 else
2234                         sprintf(ring->name, "gfx_%d", i);
2235                 ring->use_doorbell = true;
2236                 ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1;
2237
2238                 /* disable scheduler on the real ring */
2239                 ring->no_scheduler = adev->gfx.mcbp;
2240                 ring->vm_hub = AMDGPU_GFXHUB(0);
2241                 r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
2242                                      AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP,
2243                                      AMDGPU_RING_PRIO_DEFAULT, NULL);
2244                 if (r)
2245                         return r;
2246         }
2247
2248         /* set up the software rings */
2249         if (adev->gfx.mcbp && adev->gfx.num_gfx_rings) {
2250                 for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++) {
2251                         ring = &adev->gfx.sw_gfx_ring[i];
2252                         ring->ring_obj = NULL;
2253                         sprintf(ring->name, amdgpu_sw_ring_name(i));
2254                         ring->use_doorbell = true;
2255                         ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1;
2256                         ring->is_sw_ring = true;
2257                         hw_prio = amdgpu_sw_ring_priority(i);
2258                         ring->vm_hub = AMDGPU_GFXHUB(0);
2259                         r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
2260                                              AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP, hw_prio,
2261                                              NULL);
2262                         if (r)
2263                                 return r;
2264                         ring->wptr = 0;
2265                 }
2266
2267                 /* init the muxer and add software rings */
2268                 r = amdgpu_ring_mux_init(&adev->gfx.muxer, &adev->gfx.gfx_ring[0],
2269                                          GFX9_NUM_SW_GFX_RINGS);
2270                 if (r) {
2271                         DRM_ERROR("amdgpu_ring_mux_init failed(%d)\n", r);
2272                         return r;
2273                 }
2274                 for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++) {
2275                         r = amdgpu_ring_mux_add_sw_ring(&adev->gfx.muxer,
2276                                                         &adev->gfx.sw_gfx_ring[i]);
2277                         if (r) {
2278                                 DRM_ERROR("amdgpu_ring_mux_add_sw_ring failed(%d)\n", r);
2279                                 return r;
2280                         }
2281                 }
2282         }
2283
2284         /* set up the compute queues - allocate horizontally across pipes */
2285         ring_id = 0;
2286         for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
2287                 for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
2288                         for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
2289                                 if (!amdgpu_gfx_is_mec_queue_enabled(adev, 0, i,
2290                                                                      k, j))
2291                                         continue;
2292
2293                                 r = gfx_v9_0_compute_ring_init(adev,
2294                                                                ring_id,
2295                                                                i, k, j);
2296                                 if (r)
2297                                         return r;
2298
2299                                 ring_id++;
2300                         }
2301                 }
2302         }
2303
2304         r = amdgpu_gfx_kiq_init(adev, GFX9_MEC_HPD_SIZE, 0);
2305         if (r) {
2306                 DRM_ERROR("Failed to init KIQ BOs!\n");
2307                 return r;
2308         }
2309
2310         r = amdgpu_gfx_kiq_init_ring(adev, xcc_id);
2311         if (r)
2312                 return r;
2313
2314         /* create MQD for all compute queues as wel as KIQ for SRIOV case */
2315         r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct v9_mqd_allocation), 0);
2316         if (r)
2317                 return r;
2318
2319         adev->gfx.ce_ram_size = 0x8000;
2320
2321         r = gfx_v9_0_gpu_early_init(adev);
2322         if (r)
2323                 return r;
2324
2325         if (amdgpu_gfx_ras_sw_init(adev)) {
2326                 dev_err(adev->dev, "Failed to initialize gfx ras block!\n");
2327                 return -EINVAL;
2328         }
2329
2330         gfx_v9_0_alloc_ip_dump(adev);
2331
2332         return 0;
2333 }
2334
2335
2336 static int gfx_v9_0_sw_fini(void *handle)
2337 {
2338         int i;
2339         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2340
2341         if (adev->gfx.mcbp && adev->gfx.num_gfx_rings) {
2342                 for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++)
2343                         amdgpu_ring_fini(&adev->gfx.sw_gfx_ring[i]);
2344                 amdgpu_ring_mux_fini(&adev->gfx.muxer);
2345         }
2346
2347         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2348                 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
2349         for (i = 0; i < adev->gfx.num_compute_rings; i++)
2350                 amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
2351
2352         amdgpu_gfx_mqd_sw_fini(adev, 0);
2353         amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq[0].ring);
2354         amdgpu_gfx_kiq_fini(adev, 0);
2355
2356         gfx_v9_0_mec_fini(adev);
2357         amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj,
2358                                 &adev->gfx.rlc.clear_state_gpu_addr,
2359                                 (void **)&adev->gfx.rlc.cs_ptr);
2360         if (adev->flags & AMD_IS_APU) {
2361                 amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj,
2362                                 &adev->gfx.rlc.cp_table_gpu_addr,
2363                                 (void **)&adev->gfx.rlc.cp_table_ptr);
2364         }
2365         gfx_v9_0_free_microcode(adev);
2366
2367         kfree(adev->gfx.ip_dump_core);
2368         kfree(adev->gfx.ip_dump_compute_queues);
2369
2370         return 0;
2371 }
2372
2373
2374 static void gfx_v9_0_tiling_mode_table_init(struct amdgpu_device *adev)
2375 {
2376         /* TODO */
2377 }
2378
2379 void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num,
2380                            u32 instance, int xcc_id)
2381 {
2382         u32 data;
2383
2384         if (instance == 0xffffffff)
2385                 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
2386         else
2387                 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
2388
2389         if (se_num == 0xffffffff)
2390                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
2391         else
2392                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
2393
2394         if (sh_num == 0xffffffff)
2395                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
2396         else
2397                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
2398
2399         WREG32_SOC15_RLC_SHADOW(GC, 0, mmGRBM_GFX_INDEX, data);
2400 }
2401
2402 static u32 gfx_v9_0_get_rb_active_bitmap(struct amdgpu_device *adev)
2403 {
2404         u32 data, mask;
2405
2406         data = RREG32_SOC15(GC, 0, mmCC_RB_BACKEND_DISABLE);
2407         data |= RREG32_SOC15(GC, 0, mmGC_USER_RB_BACKEND_DISABLE);
2408
2409         data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK;
2410         data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT;
2411
2412         mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se /
2413                                          adev->gfx.config.max_sh_per_se);
2414
2415         return (~data) & mask;
2416 }
2417
2418 static void gfx_v9_0_setup_rb(struct amdgpu_device *adev)
2419 {
2420         int i, j;
2421         u32 data;
2422         u32 active_rbs = 0;
2423         u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
2424                                         adev->gfx.config.max_sh_per_se;
2425
2426         mutex_lock(&adev->grbm_idx_mutex);
2427         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
2428                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
2429                         amdgpu_gfx_select_se_sh(adev, i, j, 0xffffffff, 0);
2430                         data = gfx_v9_0_get_rb_active_bitmap(adev);
2431                         active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
2432                                                rb_bitmap_width_per_sh);
2433                 }
2434         }
2435         amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
2436         mutex_unlock(&adev->grbm_idx_mutex);
2437
2438         adev->gfx.config.backend_enable_mask = active_rbs;
2439         adev->gfx.config.num_rbs = hweight32(active_rbs);
2440 }
2441
2442 static void gfx_v9_0_debug_trap_config_init(struct amdgpu_device *adev,
2443                                 uint32_t first_vmid,
2444                                 uint32_t last_vmid)
2445 {
2446         uint32_t data;
2447         uint32_t trap_config_vmid_mask = 0;
2448         int i;
2449
2450         /* Calculate trap config vmid mask */
2451         for (i = first_vmid; i < last_vmid; i++)
2452                 trap_config_vmid_mask |= (1 << i);
2453
2454         data = REG_SET_FIELD(0, SPI_GDBG_TRAP_CONFIG,
2455                         VMID_SEL, trap_config_vmid_mask);
2456         data = REG_SET_FIELD(data, SPI_GDBG_TRAP_CONFIG,
2457                         TRAP_EN, 1);
2458         WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_CONFIG), data);
2459         WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_MASK), 0);
2460
2461         WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_DATA0), 0);
2462         WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_DATA1), 0);
2463 }
2464
2465 #define DEFAULT_SH_MEM_BASES    (0x6000)
2466 static void gfx_v9_0_init_compute_vmid(struct amdgpu_device *adev)
2467 {
2468         int i;
2469         uint32_t sh_mem_config;
2470         uint32_t sh_mem_bases;
2471
2472         /*
2473          * Configure apertures:
2474          * LDS:         0x60000000'00000000 - 0x60000001'00000000 (4GB)
2475          * Scratch:     0x60000001'00000000 - 0x60000002'00000000 (4GB)
2476          * GPUVM:       0x60010000'00000000 - 0x60020000'00000000 (1TB)
2477          */
2478         sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
2479
2480         sh_mem_config = SH_MEM_ADDRESS_MODE_64 |
2481                         SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
2482                         SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT;
2483
2484         mutex_lock(&adev->srbm_mutex);
2485         for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
2486                 soc15_grbm_select(adev, 0, 0, 0, i, 0);
2487                 /* CP and shaders */
2488                 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, sh_mem_config);
2489                 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, sh_mem_bases);
2490         }
2491         soc15_grbm_select(adev, 0, 0, 0, 0, 0);
2492         mutex_unlock(&adev->srbm_mutex);
2493
2494         /* Initialize all compute VMIDs to have no GDS, GWS, or OA
2495            access. These should be enabled by FW for target VMIDs. */
2496         for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
2497                 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * i, 0);
2498                 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * i, 0);
2499                 WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, i, 0);
2500                 WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, i, 0);
2501         }
2502 }
2503
2504 static void gfx_v9_0_init_gds_vmid(struct amdgpu_device *adev)
2505 {
2506         int vmid;
2507
2508         /*
2509          * Initialize all compute and user-gfx VMIDs to have no GDS, GWS, or OA
2510          * access. Compute VMIDs should be enabled by FW for target VMIDs,
2511          * the driver can enable them for graphics. VMID0 should maintain
2512          * access so that HWS firmware can save/restore entries.
2513          */
2514         for (vmid = 1; vmid < AMDGPU_NUM_VMID; vmid++) {
2515                 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * vmid, 0);
2516                 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * vmid, 0);
2517                 WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, vmid, 0);
2518                 WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, vmid, 0);
2519         }
2520 }
2521
2522 static void gfx_v9_0_init_sq_config(struct amdgpu_device *adev)
2523 {
2524         uint32_t tmp;
2525
2526         switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
2527         case IP_VERSION(9, 4, 1):
2528                 tmp = RREG32_SOC15(GC, 0, mmSQ_CONFIG);
2529                 tmp = REG_SET_FIELD(tmp, SQ_CONFIG, DISABLE_BARRIER_WAITCNT,
2530                                 !READ_ONCE(adev->barrier_has_auto_waitcnt));
2531                 WREG32_SOC15(GC, 0, mmSQ_CONFIG, tmp);
2532                 break;
2533         default:
2534                 break;
2535         }
2536 }
2537
2538 static void gfx_v9_0_constants_init(struct amdgpu_device *adev)
2539 {
2540         u32 tmp;
2541         int i;
2542
2543         WREG32_FIELD15_RLC(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff);
2544
2545         gfx_v9_0_tiling_mode_table_init(adev);
2546
2547         if (adev->gfx.num_gfx_rings)
2548                 gfx_v9_0_setup_rb(adev);
2549         gfx_v9_0_get_cu_info(adev, &adev->gfx.cu_info);
2550         adev->gfx.config.db_debug2 = RREG32_SOC15(GC, 0, mmDB_DEBUG2);
2551
2552         /* XXX SH_MEM regs */
2553         /* where to put LDS, scratch, GPUVM in FSA64 space */
2554         mutex_lock(&adev->srbm_mutex);
2555         for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB(0)].num_ids; i++) {
2556                 soc15_grbm_select(adev, 0, 0, 0, i, 0);
2557                 /* CP and shaders */
2558                 if (i == 0) {
2559                         tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
2560                                             SH_MEM_ALIGNMENT_MODE_UNALIGNED);
2561                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE,
2562                                             !!adev->gmc.noretry);
2563                         WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp);
2564                         WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, 0);
2565                 } else {
2566                         tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
2567                                             SH_MEM_ALIGNMENT_MODE_UNALIGNED);
2568                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE,
2569                                             !!adev->gmc.noretry);
2570                         WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp);
2571                         tmp = REG_SET_FIELD(0, SH_MEM_BASES, PRIVATE_BASE,
2572                                 (adev->gmc.private_aperture_start >> 48));
2573                         tmp = REG_SET_FIELD(tmp, SH_MEM_BASES, SHARED_BASE,
2574                                 (adev->gmc.shared_aperture_start >> 48));
2575                         WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, tmp);
2576                 }
2577         }
2578         soc15_grbm_select(adev, 0, 0, 0, 0, 0);
2579
2580         mutex_unlock(&adev->srbm_mutex);
2581
2582         gfx_v9_0_init_compute_vmid(adev);
2583         gfx_v9_0_init_gds_vmid(adev);
2584         gfx_v9_0_init_sq_config(adev);
2585 }
2586
2587 static void gfx_v9_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
2588 {
2589         u32 i, j, k;
2590         u32 mask;
2591
2592         mutex_lock(&adev->grbm_idx_mutex);
2593         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
2594                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
2595                         amdgpu_gfx_select_se_sh(adev, i, j, 0xffffffff, 0);
2596                         for (k = 0; k < adev->usec_timeout; k++) {
2597                                 if (RREG32_SOC15(GC, 0, mmRLC_SERDES_CU_MASTER_BUSY) == 0)
2598                                         break;
2599                                 udelay(1);
2600                         }
2601                         if (k == adev->usec_timeout) {
2602                                 amdgpu_gfx_select_se_sh(adev, 0xffffffff,
2603                                                       0xffffffff, 0xffffffff, 0);
2604                                 mutex_unlock(&adev->grbm_idx_mutex);
2605                                 DRM_INFO("Timeout wait for RLC serdes %u,%u\n",
2606                                          i, j);
2607                                 return;
2608                         }
2609                 }
2610         }
2611         amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
2612         mutex_unlock(&adev->grbm_idx_mutex);
2613
2614         mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
2615                 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
2616                 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
2617                 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
2618         for (k = 0; k < adev->usec_timeout; k++) {
2619                 if ((RREG32_SOC15(GC, 0, mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
2620                         break;
2621                 udelay(1);
2622         }
2623 }
2624
2625 static void gfx_v9_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
2626                                                bool enable)
2627 {
2628         u32 tmp;
2629
2630         /* These interrupts should be enabled to drive DS clock */
2631
2632         tmp= RREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0);
2633
2634         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
2635         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
2636         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
2637         if(adev->gfx.num_gfx_rings)
2638                 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
2639
2640         WREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0, tmp);
2641 }
2642
2643 static void gfx_v9_0_init_csb(struct amdgpu_device *adev)
2644 {
2645         adev->gfx.rlc.funcs->get_csb_buffer(adev, adev->gfx.rlc.cs_ptr);
2646         /* csib */
2647         WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_HI),
2648                         adev->gfx.rlc.clear_state_gpu_addr >> 32);
2649         WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_LO),
2650                         adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
2651         WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_LENGTH),
2652                         adev->gfx.rlc.clear_state_size);
2653 }
2654
2655 static void gfx_v9_1_parse_ind_reg_list(int *register_list_format,
2656                                 int indirect_offset,
2657                                 int list_size,
2658                                 int *unique_indirect_regs,
2659                                 int unique_indirect_reg_count,
2660                                 int *indirect_start_offsets,
2661                                 int *indirect_start_offsets_count,
2662                                 int max_start_offsets_count)
2663 {
2664         int idx;
2665
2666         for (; indirect_offset < list_size; indirect_offset++) {
2667                 WARN_ON(*indirect_start_offsets_count >= max_start_offsets_count);
2668                 indirect_start_offsets[*indirect_start_offsets_count] = indirect_offset;
2669                 *indirect_start_offsets_count = *indirect_start_offsets_count + 1;
2670
2671                 while (register_list_format[indirect_offset] != 0xFFFFFFFF) {
2672                         indirect_offset += 2;
2673
2674                         /* look for the matching indice */
2675                         for (idx = 0; idx < unique_indirect_reg_count; idx++) {
2676                                 if (unique_indirect_regs[idx] ==
2677                                         register_list_format[indirect_offset] ||
2678                                         !unique_indirect_regs[idx])
2679                                         break;
2680                         }
2681
2682                         BUG_ON(idx >= unique_indirect_reg_count);
2683
2684                         if (!unique_indirect_regs[idx])
2685                                 unique_indirect_regs[idx] = register_list_format[indirect_offset];
2686
2687                         indirect_offset++;
2688                 }
2689         }
2690 }
2691
2692 static int gfx_v9_1_init_rlc_save_restore_list(struct amdgpu_device *adev)
2693 {
2694         int unique_indirect_regs[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
2695         int unique_indirect_reg_count = 0;
2696
2697         int indirect_start_offsets[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
2698         int indirect_start_offsets_count = 0;
2699
2700         int list_size = 0;
2701         int i = 0, j = 0;
2702         u32 tmp = 0;
2703
2704         u32 *register_list_format =
2705                 kmemdup(adev->gfx.rlc.register_list_format,
2706                         adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
2707         if (!register_list_format)
2708                 return -ENOMEM;
2709
2710         /* setup unique_indirect_regs array and indirect_start_offsets array */
2711         unique_indirect_reg_count = ARRAY_SIZE(unique_indirect_regs);
2712         gfx_v9_1_parse_ind_reg_list(register_list_format,
2713                                     adev->gfx.rlc.reg_list_format_direct_reg_list_length,
2714                                     adev->gfx.rlc.reg_list_format_size_bytes >> 2,
2715                                     unique_indirect_regs,
2716                                     unique_indirect_reg_count,
2717                                     indirect_start_offsets,
2718                                     &indirect_start_offsets_count,
2719                                     ARRAY_SIZE(indirect_start_offsets));
2720
2721         /* enable auto inc in case it is disabled */
2722         tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL));
2723         tmp |= RLC_SRM_CNTL__AUTO_INCR_ADDR_MASK;
2724         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL), tmp);
2725
2726         /* write register_restore table to offset 0x0 using RLC_SRM_ARAM_ADDR/DATA */
2727         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_ADDR),
2728                 RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET);
2729         for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
2730                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_DATA),
2731                         adev->gfx.rlc.register_restore[i]);
2732
2733         /* load indirect register */
2734         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2735                 adev->gfx.rlc.reg_list_format_start);
2736
2737         /* direct register portion */
2738         for (i = 0; i < adev->gfx.rlc.reg_list_format_direct_reg_list_length; i++)
2739                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA),
2740                         register_list_format[i]);
2741
2742         /* indirect register portion */
2743         while (i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2)) {
2744                 if (register_list_format[i] == 0xFFFFFFFF) {
2745                         WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2746                         continue;
2747                 }
2748
2749                 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2750                 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2751
2752                 for (j = 0; j < unique_indirect_reg_count; j++) {
2753                         if (register_list_format[i] == unique_indirect_regs[j]) {
2754                                 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, j);
2755                                 break;
2756                         }
2757                 }
2758
2759                 BUG_ON(j >= unique_indirect_reg_count);
2760
2761                 i++;
2762         }
2763
2764         /* set save/restore list size */
2765         list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
2766         list_size = list_size >> 1;
2767         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2768                 adev->gfx.rlc.reg_restore_list_size);
2769         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA), list_size);
2770
2771         /* write the starting offsets to RLC scratch ram */
2772         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2773                 adev->gfx.rlc.starting_offsets_start);
2774         for (i = 0; i < ARRAY_SIZE(indirect_start_offsets); i++)
2775                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA),
2776                        indirect_start_offsets[i]);
2777
2778         /* load unique indirect regs*/
2779         for (i = 0; i < ARRAY_SIZE(unique_indirect_regs); i++) {
2780                 if (unique_indirect_regs[i] != 0) {
2781                         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_ADDR_0)
2782                                + GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[i],
2783                                unique_indirect_regs[i] & 0x3FFFF);
2784
2785                         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_DATA_0)
2786                                + GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[i],
2787                                unique_indirect_regs[i] >> 20);
2788                 }
2789         }
2790
2791         kfree(register_list_format);
2792         return 0;
2793 }
2794
2795 static void gfx_v9_0_enable_save_restore_machine(struct amdgpu_device *adev)
2796 {
2797         WREG32_FIELD15(GC, 0, RLC_SRM_CNTL, SRM_ENABLE, 1);
2798 }
2799
2800 static void pwr_10_0_gfxip_control_over_cgpg(struct amdgpu_device *adev,
2801                                              bool enable)
2802 {
2803         uint32_t data = 0;
2804         uint32_t default_data = 0;
2805
2806         default_data = data = RREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS));
2807         if (enable) {
2808                 /* enable GFXIP control over CGPG */
2809                 data |= PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK;
2810                 if(default_data != data)
2811                         WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2812
2813                 /* update status */
2814                 data &= ~PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK;
2815                 data |= (2 << PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT);
2816                 if(default_data != data)
2817                         WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2818         } else {
2819                 /* restore GFXIP control over GCPG */
2820                 data &= ~PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK;
2821                 if(default_data != data)
2822                         WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2823         }
2824 }
2825
2826 static void gfx_v9_0_init_gfx_power_gating(struct amdgpu_device *adev)
2827 {
2828         uint32_t data = 0;
2829
2830         if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
2831                               AMD_PG_SUPPORT_GFX_SMG |
2832                               AMD_PG_SUPPORT_GFX_DMG)) {
2833                 /* init IDLE_POLL_COUNT = 60 */
2834                 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL));
2835                 data &= ~CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT_MASK;
2836                 data |= (0x60 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
2837                 WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL), data);
2838
2839                 /* init RLC PG Delay */
2840                 data = 0;
2841                 data |= (0x10 << RLC_PG_DELAY__POWER_UP_DELAY__SHIFT);
2842                 data |= (0x10 << RLC_PG_DELAY__POWER_DOWN_DELAY__SHIFT);
2843                 data |= (0x10 << RLC_PG_DELAY__CMD_PROPAGATE_DELAY__SHIFT);
2844                 data |= (0x40 << RLC_PG_DELAY__MEM_SLEEP_DELAY__SHIFT);
2845                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY), data);
2846
2847                 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2));
2848                 data &= ~RLC_PG_DELAY_2__SERDES_CMD_DELAY_MASK;
2849                 data |= (0x4 << RLC_PG_DELAY_2__SERDES_CMD_DELAY__SHIFT);
2850                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2), data);
2851
2852                 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3));
2853                 data &= ~RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG_MASK;
2854                 data |= (0xff << RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG__SHIFT);
2855                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3), data);
2856
2857                 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL));
2858                 data &= ~RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD_MASK;
2859
2860                 /* program GRBM_REG_SAVE_GFX_IDLE_THRESHOLD to 0x55f0 */
2861                 data |= (0x55f0 << RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD__SHIFT);
2862                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL), data);
2863                 if (amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 3, 0))
2864                         pwr_10_0_gfxip_control_over_cgpg(adev, true);
2865         }
2866 }
2867
2868 static void gfx_v9_0_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
2869                                                 bool enable)
2870 {
2871         uint32_t data = 0;
2872         uint32_t default_data = 0;
2873
2874         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2875         data = REG_SET_FIELD(data, RLC_PG_CNTL,
2876                              SMU_CLK_SLOWDOWN_ON_PU_ENABLE,
2877                              enable ? 1 : 0);
2878         if (default_data != data)
2879                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2880 }
2881
2882 static void gfx_v9_0_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
2883                                                 bool enable)
2884 {
2885         uint32_t data = 0;
2886         uint32_t default_data = 0;
2887
2888         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2889         data = REG_SET_FIELD(data, RLC_PG_CNTL,
2890                              SMU_CLK_SLOWDOWN_ON_PD_ENABLE,
2891                              enable ? 1 : 0);
2892         if(default_data != data)
2893                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2894 }
2895
2896 static void gfx_v9_0_enable_cp_power_gating(struct amdgpu_device *adev,
2897                                         bool enable)
2898 {
2899         uint32_t data = 0;
2900         uint32_t default_data = 0;
2901
2902         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2903         data = REG_SET_FIELD(data, RLC_PG_CNTL,
2904                              CP_PG_DISABLE,
2905                              enable ? 0 : 1);
2906         if(default_data != data)
2907                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2908 }
2909
2910 static void gfx_v9_0_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
2911                                                 bool enable)
2912 {
2913         uint32_t data, default_data;
2914
2915         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2916         data = REG_SET_FIELD(data, RLC_PG_CNTL,
2917                              GFX_POWER_GATING_ENABLE,
2918                              enable ? 1 : 0);
2919         if(default_data != data)
2920                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2921 }
2922
2923 static void gfx_v9_0_enable_gfx_pipeline_powergating(struct amdgpu_device *adev,
2924                                                 bool enable)
2925 {
2926         uint32_t data, default_data;
2927
2928         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2929         data = REG_SET_FIELD(data, RLC_PG_CNTL,
2930                              GFX_PIPELINE_PG_ENABLE,
2931                              enable ? 1 : 0);
2932         if(default_data != data)
2933                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2934
2935         if (!enable)
2936                 /* read any GFX register to wake up GFX */
2937                 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmDB_RENDER_CONTROL));
2938 }
2939
2940 static void gfx_v9_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
2941                                                        bool enable)
2942 {
2943         uint32_t data, default_data;
2944
2945         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2946         data = REG_SET_FIELD(data, RLC_PG_CNTL,
2947                              STATIC_PER_CU_PG_ENABLE,
2948                              enable ? 1 : 0);
2949         if(default_data != data)
2950                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2951 }
2952
2953 static void gfx_v9_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
2954                                                 bool enable)
2955 {
2956         uint32_t data, default_data;
2957
2958         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2959         data = REG_SET_FIELD(data, RLC_PG_CNTL,
2960                              DYN_PER_CU_PG_ENABLE,
2961                              enable ? 1 : 0);
2962         if(default_data != data)
2963                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2964 }
2965
2966 static void gfx_v9_0_init_pg(struct amdgpu_device *adev)
2967 {
2968         gfx_v9_0_init_csb(adev);
2969
2970         /*
2971          * Rlc save restore list is workable since v2_1.
2972          * And it's needed by gfxoff feature.
2973          */
2974         if (adev->gfx.rlc.is_rlc_v2_1) {
2975                 if (amdgpu_ip_version(adev, GC_HWIP, 0) ==
2976                             IP_VERSION(9, 2, 1) ||
2977                     (adev->apu_flags & AMD_APU_IS_RAVEN2))
2978                         gfx_v9_1_init_rlc_save_restore_list(adev);
2979                 gfx_v9_0_enable_save_restore_machine(adev);
2980         }
2981
2982         if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
2983                               AMD_PG_SUPPORT_GFX_SMG |
2984                               AMD_PG_SUPPORT_GFX_DMG |
2985                               AMD_PG_SUPPORT_CP |
2986                               AMD_PG_SUPPORT_GDS |
2987                               AMD_PG_SUPPORT_RLC_SMU_HS)) {
2988                 WREG32_SOC15(GC, 0, mmRLC_JUMP_TABLE_RESTORE,
2989                              adev->gfx.rlc.cp_table_gpu_addr >> 8);
2990                 gfx_v9_0_init_gfx_power_gating(adev);
2991         }
2992 }
2993
2994 static void gfx_v9_0_rlc_stop(struct amdgpu_device *adev)
2995 {
2996         WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 0);
2997         gfx_v9_0_enable_gui_idle_interrupt(adev, false);
2998         gfx_v9_0_wait_for_rlc_serdes(adev);
2999 }
3000
3001 static void gfx_v9_0_rlc_reset(struct amdgpu_device *adev)
3002 {
3003         WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
3004         udelay(50);
3005         WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
3006         udelay(50);
3007 }
3008
3009 static void gfx_v9_0_rlc_start(struct amdgpu_device *adev)
3010 {
3011 #ifdef AMDGPU_RLC_DEBUG_RETRY
3012         u32 rlc_ucode_ver;
3013 #endif
3014
3015         WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 1);
3016         udelay(50);
3017
3018         /* carrizo do enable cp interrupt after cp inited */
3019         if (!(adev->flags & AMD_IS_APU)) {
3020                 gfx_v9_0_enable_gui_idle_interrupt(adev, true);
3021                 udelay(50);
3022         }
3023
3024 #ifdef AMDGPU_RLC_DEBUG_RETRY
3025         /* RLC_GPM_GENERAL_6 : RLC Ucode version */
3026         rlc_ucode_ver = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_6);
3027         if(rlc_ucode_ver == 0x108) {
3028                 DRM_INFO("Using rlc debug ucode. mmRLC_GPM_GENERAL_6 ==0x08%x / fw_ver == %i \n",
3029                                 rlc_ucode_ver, adev->gfx.rlc_fw_version);
3030                 /* RLC_GPM_TIMER_INT_3 : Timer interval in RefCLK cycles,
3031                  * default is 0x9C4 to create a 100us interval */
3032                 WREG32_SOC15(GC, 0, mmRLC_GPM_TIMER_INT_3, 0x9C4);
3033                 /* RLC_GPM_GENERAL_12 : Minimum gap between wptr and rptr
3034                  * to disable the page fault retry interrupts, default is
3035                  * 0x100 (256) */
3036                 WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_12, 0x100);
3037         }
3038 #endif
3039 }
3040
3041 static int gfx_v9_0_rlc_load_microcode(struct amdgpu_device *adev)
3042 {
3043         const struct rlc_firmware_header_v2_0 *hdr;
3044         const __le32 *fw_data;
3045         unsigned i, fw_size;
3046
3047         if (!adev->gfx.rlc_fw)
3048                 return -EINVAL;
3049
3050         hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
3051         amdgpu_ucode_print_rlc_hdr(&hdr->header);
3052
3053         fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
3054                            le32_to_cpu(hdr->header.ucode_array_offset_bytes));
3055         fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
3056
3057         WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR,
3058                         RLCG_UCODE_LOADING_START_ADDRESS);
3059         for (i = 0; i < fw_size; i++)
3060                 WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
3061         WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
3062
3063         return 0;
3064 }
3065
3066 static int gfx_v9_0_rlc_resume(struct amdgpu_device *adev)
3067 {
3068         int r;
3069
3070         if (amdgpu_sriov_vf(adev)) {
3071                 gfx_v9_0_init_csb(adev);
3072                 return 0;
3073         }
3074
3075         adev->gfx.rlc.funcs->stop(adev);
3076
3077         /* disable CG */
3078         WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, 0);
3079
3080         gfx_v9_0_init_pg(adev);
3081
3082         if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
3083                 /* legacy rlc firmware loading */
3084                 r = gfx_v9_0_rlc_load_microcode(adev);
3085                 if (r)
3086                         return r;
3087         }
3088
3089         switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
3090         case IP_VERSION(9, 2, 2):
3091         case IP_VERSION(9, 1, 0):
3092                 gfx_v9_0_init_lbpw(adev);
3093                 if (amdgpu_lbpw == 0)
3094                         gfx_v9_0_enable_lbpw(adev, false);
3095                 else
3096                         gfx_v9_0_enable_lbpw(adev, true);
3097                 break;
3098         case IP_VERSION(9, 4, 0):
3099                 gfx_v9_4_init_lbpw(adev);
3100                 if (amdgpu_lbpw > 0)
3101                         gfx_v9_0_enable_lbpw(adev, true);
3102                 else
3103                         gfx_v9_0_enable_lbpw(adev, false);
3104                 break;
3105         default:
3106                 break;
3107         }
3108
3109         gfx_v9_0_update_spm_vmid_internal(adev, 0xf);
3110
3111         adev->gfx.rlc.funcs->start(adev);
3112
3113         return 0;
3114 }
3115
3116 static void gfx_v9_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
3117 {
3118         u32 tmp = RREG32_SOC15(GC, 0, mmCP_ME_CNTL);
3119
3120         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, enable ? 0 : 1);
3121         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, enable ? 0 : 1);
3122         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, enable ? 0 : 1);
3123         WREG32_SOC15_RLC(GC, 0, mmCP_ME_CNTL, tmp);
3124         udelay(50);
3125 }
3126
3127 static int gfx_v9_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
3128 {
3129         const struct gfx_firmware_header_v1_0 *pfp_hdr;
3130         const struct gfx_firmware_header_v1_0 *ce_hdr;
3131         const struct gfx_firmware_header_v1_0 *me_hdr;
3132         const __le32 *fw_data;
3133         unsigned i, fw_size;
3134
3135         if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
3136                 return -EINVAL;
3137
3138         pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
3139                 adev->gfx.pfp_fw->data;
3140         ce_hdr = (const struct gfx_firmware_header_v1_0 *)
3141                 adev->gfx.ce_fw->data;
3142         me_hdr = (const struct gfx_firmware_header_v1_0 *)
3143                 adev->gfx.me_fw->data;
3144
3145         amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
3146         amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
3147         amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
3148
3149         gfx_v9_0_cp_gfx_enable(adev, false);
3150
3151         /* PFP */
3152         fw_data = (const __le32 *)
3153                 (adev->gfx.pfp_fw->data +
3154                  le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
3155         fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
3156         WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, 0);
3157         for (i = 0; i < fw_size; i++)
3158                 WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
3159         WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
3160
3161         /* CE */
3162         fw_data = (const __le32 *)
3163                 (adev->gfx.ce_fw->data +
3164                  le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
3165         fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
3166         WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, 0);
3167         for (i = 0; i < fw_size; i++)
3168                 WREG32_SOC15(GC, 0, mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
3169         WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
3170
3171         /* ME */
3172         fw_data = (const __le32 *)
3173                 (adev->gfx.me_fw->data +
3174                  le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
3175         fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
3176         WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, 0);
3177         for (i = 0; i < fw_size; i++)
3178                 WREG32_SOC15(GC, 0, mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
3179         WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version);
3180
3181         return 0;
3182 }
3183
3184 static int gfx_v9_0_cp_gfx_start(struct amdgpu_device *adev)
3185 {
3186         struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
3187         const struct cs_section_def *sect = NULL;
3188         const struct cs_extent_def *ext = NULL;
3189         int r, i, tmp;
3190
3191         /* init the CP */
3192         WREG32_SOC15(GC, 0, mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
3193         WREG32_SOC15(GC, 0, mmCP_DEVICE_ID, 1);
3194
3195         gfx_v9_0_cp_gfx_enable(adev, true);
3196
3197         /* Now only limit the quirk on the APU gfx9 series and already
3198          * confirmed that the APU gfx10/gfx11 needn't such update.
3199          */
3200         if (adev->flags & AMD_IS_APU &&
3201                         adev->in_s3 && !adev->suspend_complete) {
3202                 DRM_INFO(" Will skip the CSB packet resubmit\n");
3203                 return 0;
3204         }
3205         r = amdgpu_ring_alloc(ring, gfx_v9_0_get_csb_size(adev) + 4 + 3);
3206         if (r) {
3207                 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
3208                 return r;
3209         }
3210
3211         amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3212         amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3213
3214         amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
3215         amdgpu_ring_write(ring, 0x80000000);
3216         amdgpu_ring_write(ring, 0x80000000);
3217
3218         for (sect = gfx9_cs_data; sect->section != NULL; ++sect) {
3219                 for (ext = sect->section; ext->extent != NULL; ++ext) {
3220                         if (sect->id == SECT_CONTEXT) {
3221                                 amdgpu_ring_write(ring,
3222                                        PACKET3(PACKET3_SET_CONTEXT_REG,
3223                                                ext->reg_count));
3224                                 amdgpu_ring_write(ring,
3225                                        ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
3226                                 for (i = 0; i < ext->reg_count; i++)
3227                                         amdgpu_ring_write(ring, ext->extent[i]);
3228                         }
3229                 }
3230         }
3231
3232         amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3233         amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
3234
3235         amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3236         amdgpu_ring_write(ring, 0);
3237
3238         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
3239         amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
3240         amdgpu_ring_write(ring, 0x8000);
3241         amdgpu_ring_write(ring, 0x8000);
3242
3243         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG,1));
3244         tmp = (PACKET3_SET_UCONFIG_REG_INDEX_TYPE |
3245                 (SOC15_REG_OFFSET(GC, 0, mmVGT_INDEX_TYPE) - PACKET3_SET_UCONFIG_REG_START));
3246         amdgpu_ring_write(ring, tmp);
3247         amdgpu_ring_write(ring, 0);
3248
3249         amdgpu_ring_commit(ring);
3250
3251         return 0;
3252 }
3253
3254 static int gfx_v9_0_cp_gfx_resume(struct amdgpu_device *adev)
3255 {
3256         struct amdgpu_ring *ring;
3257         u32 tmp;
3258         u32 rb_bufsz;
3259         u64 rb_addr, rptr_addr, wptr_gpu_addr;
3260
3261         /* Set the write pointer delay */
3262         WREG32_SOC15(GC, 0, mmCP_RB_WPTR_DELAY, 0);
3263
3264         /* set the RB to use vmid 0 */
3265         WREG32_SOC15(GC, 0, mmCP_RB_VMID, 0);
3266
3267         /* Set ring buffer size */
3268         ring = &adev->gfx.gfx_ring[0];
3269         rb_bufsz = order_base_2(ring->ring_size / 8);
3270         tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
3271         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
3272 #ifdef __BIG_ENDIAN
3273         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
3274 #endif
3275         WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp);
3276
3277         /* Initialize the ring buffer's write pointers */
3278         ring->wptr = 0;
3279         WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
3280         WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
3281
3282         /* set the wb address wether it's enabled or not */
3283         rptr_addr = ring->rptr_gpu_addr;
3284         WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
3285         WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & CP_RB_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK);
3286
3287         wptr_gpu_addr = ring->wptr_gpu_addr;
3288         WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr));
3289         WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr));
3290
3291         mdelay(1);
3292         WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp);
3293
3294         rb_addr = ring->gpu_addr >> 8;
3295         WREG32_SOC15(GC, 0, mmCP_RB0_BASE, rb_addr);
3296         WREG32_SOC15(GC, 0, mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
3297
3298         tmp = RREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL);
3299         if (ring->use_doorbell) {
3300                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3301                                     DOORBELL_OFFSET, ring->doorbell_index);
3302                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3303                                     DOORBELL_EN, 1);
3304         } else {
3305                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0);
3306         }
3307         WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL, tmp);
3308
3309         tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
3310                         DOORBELL_RANGE_LOWER, ring->doorbell_index);
3311         WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
3312
3313         WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_UPPER,
3314                        CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
3315
3316
3317         /* start the ring */
3318         gfx_v9_0_cp_gfx_start(adev);
3319
3320         return 0;
3321 }
3322
3323 static void gfx_v9_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
3324 {
3325         if (enable) {
3326                 WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL, 0);
3327         } else {
3328                 WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL,
3329                         (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
3330                 adev->gfx.kiq[0].ring.sched.ready = false;
3331         }
3332         udelay(50);
3333 }
3334
3335 static int gfx_v9_0_cp_compute_load_microcode(struct amdgpu_device *adev)
3336 {
3337         const struct gfx_firmware_header_v1_0 *mec_hdr;
3338         const __le32 *fw_data;
3339         unsigned i;
3340         u32 tmp;
3341
3342         if (!adev->gfx.mec_fw)
3343                 return -EINVAL;
3344
3345         gfx_v9_0_cp_compute_enable(adev, false);
3346
3347         mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
3348         amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
3349
3350         fw_data = (const __le32 *)
3351                 (adev->gfx.mec_fw->data +
3352                  le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
3353         tmp = 0;
3354         tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, VMID, 0);
3355         tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0);
3356         WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_CNTL, tmp);
3357
3358         WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_LO,
3359                 adev->gfx.mec.mec_fw_gpu_addr & 0xFFFFF000);
3360         WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_HI,
3361                 upper_32_bits(adev->gfx.mec.mec_fw_gpu_addr));
3362
3363         /* MEC1 */
3364         WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR,
3365                          mec_hdr->jt_offset);
3366         for (i = 0; i < mec_hdr->jt_size; i++)
3367                 WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_DATA,
3368                         le32_to_cpup(fw_data + mec_hdr->jt_offset + i));
3369
3370         WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR,
3371                         adev->gfx.mec_fw_version);
3372         /* Todo : Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */
3373
3374         return 0;
3375 }
3376
3377 /* KIQ functions */
3378 static void gfx_v9_0_kiq_setting(struct amdgpu_ring *ring)
3379 {
3380         uint32_t tmp;
3381         struct amdgpu_device *adev = ring->adev;
3382
3383         /* tell RLC which is KIQ queue */
3384         tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS);
3385         tmp &= 0xffffff00;
3386         tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
3387         WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
3388         tmp |= 0x80;
3389         WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
3390 }
3391
3392 static void gfx_v9_0_mqd_set_priority(struct amdgpu_ring *ring, struct v9_mqd *mqd)
3393 {
3394         struct amdgpu_device *adev = ring->adev;
3395
3396         if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
3397                 if (amdgpu_gfx_is_high_priority_compute_queue(adev, ring)) {
3398                         mqd->cp_hqd_pipe_priority = AMDGPU_GFX_PIPE_PRIO_HIGH;
3399                         mqd->cp_hqd_queue_priority =
3400                                 AMDGPU_GFX_QUEUE_PRIORITY_MAXIMUM;
3401                 }
3402         }
3403 }
3404
3405 static int gfx_v9_0_mqd_init(struct amdgpu_ring *ring)
3406 {
3407         struct amdgpu_device *adev = ring->adev;
3408         struct v9_mqd *mqd = ring->mqd_ptr;
3409         uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
3410         uint32_t tmp;
3411
3412         mqd->header = 0xC0310800;
3413         mqd->compute_pipelinestat_enable = 0x00000001;
3414         mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
3415         mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
3416         mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
3417         mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
3418         mqd->compute_static_thread_mgmt_se4 = 0xffffffff;
3419         mqd->compute_static_thread_mgmt_se5 = 0xffffffff;
3420         mqd->compute_static_thread_mgmt_se6 = 0xffffffff;
3421         mqd->compute_static_thread_mgmt_se7 = 0xffffffff;
3422         mqd->compute_misc_reserved = 0x00000003;
3423
3424         mqd->dynamic_cu_mask_addr_lo =
3425                 lower_32_bits(ring->mqd_gpu_addr
3426                               + offsetof(struct v9_mqd_allocation, dynamic_cu_mask));
3427         mqd->dynamic_cu_mask_addr_hi =
3428                 upper_32_bits(ring->mqd_gpu_addr
3429                               + offsetof(struct v9_mqd_allocation, dynamic_cu_mask));
3430
3431         eop_base_addr = ring->eop_gpu_addr >> 8;
3432         mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
3433         mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
3434
3435         /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
3436         tmp = RREG32_SOC15(GC, 0, mmCP_HQD_EOP_CONTROL);
3437         tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
3438                         (order_base_2(GFX9_MEC_HPD_SIZE / 4) - 1));
3439
3440         mqd->cp_hqd_eop_control = tmp;
3441
3442         /* enable doorbell? */
3443         tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
3444
3445         if (ring->use_doorbell) {
3446                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3447                                     DOORBELL_OFFSET, ring->doorbell_index);
3448                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3449                                     DOORBELL_EN, 1);
3450                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3451                                     DOORBELL_SOURCE, 0);
3452                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3453                                     DOORBELL_HIT, 0);
3454         } else {
3455                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3456                                          DOORBELL_EN, 0);
3457         }
3458
3459         mqd->cp_hqd_pq_doorbell_control = tmp;
3460
3461         /* disable the queue if it's active */
3462         ring->wptr = 0;
3463         mqd->cp_hqd_dequeue_request = 0;
3464         mqd->cp_hqd_pq_rptr = 0;
3465         mqd->cp_hqd_pq_wptr_lo = 0;
3466         mqd->cp_hqd_pq_wptr_hi = 0;
3467
3468         /* set the pointer to the MQD */
3469         mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
3470         mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
3471
3472         /* set MQD vmid to 0 */
3473         tmp = RREG32_SOC15(GC, 0, mmCP_MQD_CONTROL);
3474         tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
3475         mqd->cp_mqd_control = tmp;
3476
3477         /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3478         hqd_gpu_addr = ring->gpu_addr >> 8;
3479         mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
3480         mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
3481
3482         /* set up the HQD, this is similar to CP_RB0_CNTL */
3483         tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_CONTROL);
3484         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
3485                             (order_base_2(ring->ring_size / 4) - 1));
3486         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
3487                         (order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1));
3488 #ifdef __BIG_ENDIAN
3489         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
3490 #endif
3491         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
3492         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
3493         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
3494         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
3495         mqd->cp_hqd_pq_control = tmp;
3496
3497         /* set the wb address whether it's enabled or not */
3498         wb_gpu_addr = ring->rptr_gpu_addr;
3499         mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
3500         mqd->cp_hqd_pq_rptr_report_addr_hi =
3501                 upper_32_bits(wb_gpu_addr) & 0xffff;
3502
3503         /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
3504         wb_gpu_addr = ring->wptr_gpu_addr;
3505         mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
3506         mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
3507
3508         /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3509         ring->wptr = 0;
3510         mqd->cp_hqd_pq_rptr = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR);
3511
3512         /* set the vmid for the queue */
3513         mqd->cp_hqd_vmid = 0;
3514
3515         tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PERSISTENT_STATE);
3516         tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
3517         mqd->cp_hqd_persistent_state = tmp;
3518
3519         /* set MIN_IB_AVAIL_SIZE */
3520         tmp = RREG32_SOC15(GC, 0, mmCP_HQD_IB_CONTROL);
3521         tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
3522         mqd->cp_hqd_ib_control = tmp;
3523
3524         /* set static priority for a queue/ring */
3525         gfx_v9_0_mqd_set_priority(ring, mqd);
3526         mqd->cp_hqd_quantum = RREG32_SOC15(GC, 0, mmCP_HQD_QUANTUM);
3527
3528         /* map_queues packet doesn't need activate the queue,
3529          * so only kiq need set this field.
3530          */
3531         if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)
3532                 mqd->cp_hqd_active = 1;
3533
3534         return 0;
3535 }
3536
3537 static int gfx_v9_0_kiq_init_register(struct amdgpu_ring *ring)
3538 {
3539         struct amdgpu_device *adev = ring->adev;
3540         struct v9_mqd *mqd = ring->mqd_ptr;
3541         int j;
3542
3543         /* disable wptr polling */
3544         WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
3545
3546         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR,
3547                mqd->cp_hqd_eop_base_addr_lo);
3548         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI,
3549                mqd->cp_hqd_eop_base_addr_hi);
3550
3551         /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
3552         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_CONTROL,
3553                mqd->cp_hqd_eop_control);
3554
3555         /* enable doorbell? */
3556         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
3557                mqd->cp_hqd_pq_doorbell_control);
3558
3559         /* disable the queue if it's active */
3560         if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) {
3561                 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
3562                 for (j = 0; j < adev->usec_timeout; j++) {
3563                         if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
3564                                 break;
3565                         udelay(1);
3566                 }
3567                 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST,
3568                        mqd->cp_hqd_dequeue_request);
3569                 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR,
3570                        mqd->cp_hqd_pq_rptr);
3571                 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO,
3572                        mqd->cp_hqd_pq_wptr_lo);
3573                 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI,
3574                        mqd->cp_hqd_pq_wptr_hi);
3575         }
3576
3577         /* set the pointer to the MQD */
3578         WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR,
3579                mqd->cp_mqd_base_addr_lo);
3580         WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR_HI,
3581                mqd->cp_mqd_base_addr_hi);
3582
3583         /* set MQD vmid to 0 */
3584         WREG32_SOC15_RLC(GC, 0, mmCP_MQD_CONTROL,
3585                mqd->cp_mqd_control);
3586
3587         /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3588         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE,
3589                mqd->cp_hqd_pq_base_lo);
3590         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE_HI,
3591                mqd->cp_hqd_pq_base_hi);
3592
3593         /* set up the HQD, this is similar to CP_RB0_CNTL */
3594         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_CONTROL,
3595                mqd->cp_hqd_pq_control);
3596
3597         /* set the wb address whether it's enabled or not */
3598         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR,
3599                                 mqd->cp_hqd_pq_rptr_report_addr_lo);
3600         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
3601                                 mqd->cp_hqd_pq_rptr_report_addr_hi);
3602
3603         /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
3604         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR,
3605                mqd->cp_hqd_pq_wptr_poll_addr_lo);
3606         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
3607                mqd->cp_hqd_pq_wptr_poll_addr_hi);
3608
3609         /* enable the doorbell if requested */
3610         if (ring->use_doorbell) {
3611                 WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_LOWER,
3612                                         (adev->doorbell_index.kiq * 2) << 2);
3613                 /* If GC has entered CGPG, ringing doorbell > first page
3614                  * doesn't wakeup GC. Enlarge CP_MEC_DOORBELL_RANGE_UPPER to
3615                  * workaround this issue. And this change has to align with firmware
3616                  * update.
3617                  */
3618                 if (check_if_enlarge_doorbell_range(adev))
3619                         WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER,
3620                                         (adev->doorbell.size - 4));
3621                 else
3622                         WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER,
3623                                         (adev->doorbell_index.userqueue_end * 2) << 2);
3624         }
3625
3626         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
3627                mqd->cp_hqd_pq_doorbell_control);
3628
3629         /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3630         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO,
3631                mqd->cp_hqd_pq_wptr_lo);
3632         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI,
3633                mqd->cp_hqd_pq_wptr_hi);
3634
3635         /* set the vmid for the queue */
3636         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_VMID, mqd->cp_hqd_vmid);
3637
3638         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE,
3639                mqd->cp_hqd_persistent_state);
3640
3641         /* activate the queue */
3642         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE,
3643                mqd->cp_hqd_active);
3644
3645         if (ring->use_doorbell)
3646                 WREG32_FIELD15(GC, 0, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
3647
3648         return 0;
3649 }
3650
3651 static int gfx_v9_0_kiq_fini_register(struct amdgpu_ring *ring)
3652 {
3653         struct amdgpu_device *adev = ring->adev;
3654         int j;
3655
3656         /* disable the queue if it's active */
3657         if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) {
3658
3659                 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
3660
3661                 for (j = 0; j < adev->usec_timeout; j++) {
3662                         if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
3663                                 break;
3664                         udelay(1);
3665                 }
3666
3667                 if (j == AMDGPU_MAX_USEC_TIMEOUT) {
3668                         DRM_DEBUG("KIQ dequeue request failed.\n");
3669
3670                         /* Manual disable if dequeue request times out */
3671                         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE, 0);
3672                 }
3673
3674                 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST,
3675                       0);
3676         }
3677
3678         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IQ_TIMER, 0);
3679         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IB_CONTROL, 0);
3680         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE, 0);
3681         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0x40000000);
3682         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0);
3683         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR, 0);
3684         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI, 0);
3685         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO, 0);
3686
3687         return 0;
3688 }
3689
3690 static int gfx_v9_0_kiq_init_queue(struct amdgpu_ring *ring)
3691 {
3692         struct amdgpu_device *adev = ring->adev;
3693         struct v9_mqd *mqd = ring->mqd_ptr;
3694         struct v9_mqd *tmp_mqd;
3695
3696         gfx_v9_0_kiq_setting(ring);
3697
3698         /* GPU could be in bad state during probe, driver trigger the reset
3699          * after load the SMU, in this case , the mqd is not be initialized.
3700          * driver need to re-init the mqd.
3701          * check mqd->cp_hqd_pq_control since this value should not be 0
3702          */
3703         tmp_mqd = (struct v9_mqd *)adev->gfx.kiq[0].mqd_backup;
3704         if (amdgpu_in_reset(adev) && tmp_mqd->cp_hqd_pq_control){
3705                 /* for GPU_RESET case , reset MQD to a clean status */
3706                 if (adev->gfx.kiq[0].mqd_backup)
3707                         memcpy(mqd, adev->gfx.kiq[0].mqd_backup, sizeof(struct v9_mqd_allocation));
3708
3709                 /* reset ring buffer */
3710                 ring->wptr = 0;
3711                 amdgpu_ring_clear_ring(ring);
3712
3713                 mutex_lock(&adev->srbm_mutex);
3714                 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0, 0);
3715                 gfx_v9_0_kiq_init_register(ring);
3716                 soc15_grbm_select(adev, 0, 0, 0, 0, 0);
3717                 mutex_unlock(&adev->srbm_mutex);
3718         } else {
3719                 memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation));
3720                 ((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
3721                 ((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
3722                 if (amdgpu_sriov_vf(adev) && adev->in_suspend)
3723                         amdgpu_ring_clear_ring(ring);
3724                 mutex_lock(&adev->srbm_mutex);
3725                 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0, 0);
3726                 gfx_v9_0_mqd_init(ring);
3727                 gfx_v9_0_kiq_init_register(ring);
3728                 soc15_grbm_select(adev, 0, 0, 0, 0, 0);
3729                 mutex_unlock(&adev->srbm_mutex);
3730
3731                 if (adev->gfx.kiq[0].mqd_backup)
3732                         memcpy(adev->gfx.kiq[0].mqd_backup, mqd, sizeof(struct v9_mqd_allocation));
3733         }
3734
3735         return 0;
3736 }
3737
3738 static int gfx_v9_0_kcq_init_queue(struct amdgpu_ring *ring)
3739 {
3740         struct amdgpu_device *adev = ring->adev;
3741         struct v9_mqd *mqd = ring->mqd_ptr;
3742         int mqd_idx = ring - &adev->gfx.compute_ring[0];
3743         struct v9_mqd *tmp_mqd;
3744
3745         /* Same as above kiq init, driver need to re-init the mqd if mqd->cp_hqd_pq_control
3746          * is not be initialized before
3747          */
3748         tmp_mqd = (struct v9_mqd *)adev->gfx.mec.mqd_backup[mqd_idx];
3749
3750         if (!tmp_mqd->cp_hqd_pq_control ||
3751             (!amdgpu_in_reset(adev) && !adev->in_suspend)) {
3752                 memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation));
3753                 ((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
3754                 ((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
3755                 mutex_lock(&adev->srbm_mutex);
3756                 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0, 0);
3757                 gfx_v9_0_mqd_init(ring);
3758                 soc15_grbm_select(adev, 0, 0, 0, 0, 0);
3759                 mutex_unlock(&adev->srbm_mutex);
3760
3761                 if (adev->gfx.mec.mqd_backup[mqd_idx])
3762                         memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation));
3763         } else {
3764                 /* restore MQD to a clean status */
3765                 if (adev->gfx.mec.mqd_backup[mqd_idx])
3766                         memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation));
3767                 /* reset ring buffer */
3768                 ring->wptr = 0;
3769                 atomic64_set((atomic64_t *)ring->wptr_cpu_addr, 0);
3770                 amdgpu_ring_clear_ring(ring);
3771         }
3772
3773         return 0;
3774 }
3775
3776 static int gfx_v9_0_kiq_resume(struct amdgpu_device *adev)
3777 {
3778         struct amdgpu_ring *ring;
3779         int r;
3780
3781         ring = &adev->gfx.kiq[0].ring;
3782
3783         r = amdgpu_bo_reserve(ring->mqd_obj, false);
3784         if (unlikely(r != 0))
3785                 return r;
3786
3787         r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
3788         if (unlikely(r != 0)) {
3789                 amdgpu_bo_unreserve(ring->mqd_obj);
3790                 return r;
3791         }
3792
3793         gfx_v9_0_kiq_init_queue(ring);
3794         amdgpu_bo_kunmap(ring->mqd_obj);
3795         ring->mqd_ptr = NULL;
3796         amdgpu_bo_unreserve(ring->mqd_obj);
3797         return 0;
3798 }
3799
3800 static int gfx_v9_0_kcq_resume(struct amdgpu_device *adev)
3801 {
3802         struct amdgpu_ring *ring = NULL;
3803         int r = 0, i;
3804
3805         gfx_v9_0_cp_compute_enable(adev, true);
3806
3807         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3808                 ring = &adev->gfx.compute_ring[i];
3809
3810                 r = amdgpu_bo_reserve(ring->mqd_obj, false);
3811                 if (unlikely(r != 0))
3812                         goto done;
3813                 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
3814                 if (!r) {
3815                         r = gfx_v9_0_kcq_init_queue(ring);
3816                         amdgpu_bo_kunmap(ring->mqd_obj);
3817                         ring->mqd_ptr = NULL;
3818                 }
3819                 amdgpu_bo_unreserve(ring->mqd_obj);
3820                 if (r)
3821                         goto done;
3822         }
3823
3824         r = amdgpu_gfx_enable_kcq(adev, 0);
3825 done:
3826         return r;
3827 }
3828
3829 static int gfx_v9_0_cp_resume(struct amdgpu_device *adev)
3830 {
3831         int r, i;
3832         struct amdgpu_ring *ring;
3833
3834         if (!(adev->flags & AMD_IS_APU))
3835                 gfx_v9_0_enable_gui_idle_interrupt(adev, false);
3836
3837         if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
3838                 if (adev->gfx.num_gfx_rings) {
3839                         /* legacy firmware loading */
3840                         r = gfx_v9_0_cp_gfx_load_microcode(adev);
3841                         if (r)
3842                                 return r;
3843                 }
3844
3845                 r = gfx_v9_0_cp_compute_load_microcode(adev);
3846                 if (r)
3847                         return r;
3848         }
3849
3850         r = gfx_v9_0_kiq_resume(adev);
3851         if (r)
3852                 return r;
3853
3854         if (adev->gfx.num_gfx_rings) {
3855                 r = gfx_v9_0_cp_gfx_resume(adev);
3856                 if (r)
3857                         return r;
3858         }
3859
3860         r = gfx_v9_0_kcq_resume(adev);
3861         if (r)
3862                 return r;
3863
3864         if (adev->gfx.num_gfx_rings) {
3865                 ring = &adev->gfx.gfx_ring[0];
3866                 r = amdgpu_ring_test_helper(ring);
3867                 if (r)
3868                         return r;
3869         }
3870
3871         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3872                 ring = &adev->gfx.compute_ring[i];
3873                 amdgpu_ring_test_helper(ring);
3874         }
3875
3876         gfx_v9_0_enable_gui_idle_interrupt(adev, true);
3877
3878         return 0;
3879 }
3880
3881 static void gfx_v9_0_init_tcp_config(struct amdgpu_device *adev)
3882 {
3883         u32 tmp;
3884
3885         if (amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 4, 1) &&
3886             amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 4, 2))
3887                 return;
3888
3889         tmp = RREG32_SOC15(GC, 0, mmTCP_ADDR_CONFIG);
3890         tmp = REG_SET_FIELD(tmp, TCP_ADDR_CONFIG, ENABLE64KHASH,
3891                                 adev->df.hash_status.hash_64k);
3892         tmp = REG_SET_FIELD(tmp, TCP_ADDR_CONFIG, ENABLE2MHASH,
3893                                 adev->df.hash_status.hash_2m);
3894         tmp = REG_SET_FIELD(tmp, TCP_ADDR_CONFIG, ENABLE1GHASH,
3895                                 adev->df.hash_status.hash_1g);
3896         WREG32_SOC15(GC, 0, mmTCP_ADDR_CONFIG, tmp);
3897 }
3898
3899 static void gfx_v9_0_cp_enable(struct amdgpu_device *adev, bool enable)
3900 {
3901         if (adev->gfx.num_gfx_rings)
3902                 gfx_v9_0_cp_gfx_enable(adev, enable);
3903         gfx_v9_0_cp_compute_enable(adev, enable);
3904 }
3905
3906 static int gfx_v9_0_hw_init(void *handle)
3907 {
3908         int r;
3909         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3910
3911         if (!amdgpu_sriov_vf(adev))
3912                 gfx_v9_0_init_golden_registers(adev);
3913
3914         gfx_v9_0_constants_init(adev);
3915
3916         gfx_v9_0_init_tcp_config(adev);
3917
3918         r = adev->gfx.rlc.funcs->resume(adev);
3919         if (r)
3920                 return r;
3921
3922         r = gfx_v9_0_cp_resume(adev);
3923         if (r)
3924                 return r;
3925
3926         if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 2))
3927                 gfx_v9_4_2_set_power_brake_sequence(adev);
3928
3929         return r;
3930 }
3931
3932 static int gfx_v9_0_hw_fini(void *handle)
3933 {
3934         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3935
3936         if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
3937                 amdgpu_irq_put(adev, &adev->gfx.cp_ecc_error_irq, 0);
3938         amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
3939         amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
3940
3941         /* DF freeze and kcq disable will fail */
3942         if (!amdgpu_ras_intr_triggered())
3943                 /* disable KCQ to avoid CPC touch memory not valid anymore */
3944                 amdgpu_gfx_disable_kcq(adev, 0);
3945
3946         if (amdgpu_sriov_vf(adev)) {
3947                 gfx_v9_0_cp_gfx_enable(adev, false);
3948                 /* must disable polling for SRIOV when hw finished, otherwise
3949                  * CPC engine may still keep fetching WB address which is already
3950                  * invalid after sw finished and trigger DMAR reading error in
3951                  * hypervisor side.
3952                  */
3953                 WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
3954                 return 0;
3955         }
3956
3957         /* Use deinitialize sequence from CAIL when unbinding device from driver,
3958          * otherwise KIQ is hanging when binding back
3959          */
3960         if (!amdgpu_in_reset(adev) && !adev->in_suspend) {
3961                 mutex_lock(&adev->srbm_mutex);
3962                 soc15_grbm_select(adev, adev->gfx.kiq[0].ring.me,
3963                                 adev->gfx.kiq[0].ring.pipe,
3964                                 adev->gfx.kiq[0].ring.queue, 0, 0);
3965                 gfx_v9_0_kiq_fini_register(&adev->gfx.kiq[0].ring);
3966                 soc15_grbm_select(adev, 0, 0, 0, 0, 0);
3967                 mutex_unlock(&adev->srbm_mutex);
3968         }
3969
3970         gfx_v9_0_cp_enable(adev, false);
3971
3972         /* Skip stopping RLC with A+A reset or when RLC controls GFX clock */
3973         if ((adev->gmc.xgmi.connected_to_cpu && amdgpu_in_reset(adev)) ||
3974             (amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(9, 4, 2))) {
3975                 dev_dbg(adev->dev, "Skipping RLC halt\n");
3976                 return 0;
3977         }
3978
3979         adev->gfx.rlc.funcs->stop(adev);
3980         return 0;
3981 }
3982
3983 static int gfx_v9_0_suspend(void *handle)
3984 {
3985         return gfx_v9_0_hw_fini(handle);
3986 }
3987
3988 static int gfx_v9_0_resume(void *handle)
3989 {
3990         return gfx_v9_0_hw_init(handle);
3991 }
3992
3993 static bool gfx_v9_0_is_idle(void *handle)
3994 {
3995         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3996
3997         if (REG_GET_FIELD(RREG32_SOC15(GC, 0, mmGRBM_STATUS),
3998                                 GRBM_STATUS, GUI_ACTIVE))
3999                 return false;
4000         else
4001                 return true;
4002 }
4003
4004 static int gfx_v9_0_wait_for_idle(void *handle)
4005 {
4006         unsigned i;
4007         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4008
4009         for (i = 0; i < adev->usec_timeout; i++) {
4010                 if (gfx_v9_0_is_idle(handle))
4011                         return 0;
4012                 udelay(1);
4013         }
4014         return -ETIMEDOUT;
4015 }
4016
4017 static int gfx_v9_0_soft_reset(void *handle)
4018 {
4019         u32 grbm_soft_reset = 0;
4020         u32 tmp;
4021         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4022
4023         /* GRBM_STATUS */
4024         tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS);
4025         if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
4026                    GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
4027                    GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
4028                    GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
4029                    GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
4030                    GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK)) {
4031                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4032                                                 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
4033                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4034                                                 GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
4035         }
4036
4037         if (tmp & (GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
4038                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4039                                                 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
4040         }
4041
4042         /* GRBM_STATUS2 */
4043         tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS2);
4044         if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
4045                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4046                                                 GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
4047
4048
4049         if (grbm_soft_reset) {
4050                 /* stop the rlc */
4051                 adev->gfx.rlc.funcs->stop(adev);
4052
4053                 if (adev->gfx.num_gfx_rings)
4054                         /* Disable GFX parsing/prefetching */
4055                         gfx_v9_0_cp_gfx_enable(adev, false);
4056
4057                 /* Disable MEC parsing/prefetching */
4058                 gfx_v9_0_cp_compute_enable(adev, false);
4059
4060                 if (grbm_soft_reset) {
4061                         tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
4062                         tmp |= grbm_soft_reset;
4063                         dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
4064                         WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
4065                         tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
4066
4067                         udelay(50);
4068
4069                         tmp &= ~grbm_soft_reset;
4070                         WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
4071                         tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
4072                 }
4073
4074                 /* Wait a little for things to settle down */
4075                 udelay(50);
4076         }
4077         return 0;
4078 }
4079
4080 static uint64_t gfx_v9_0_kiq_read_clock(struct amdgpu_device *adev)
4081 {
4082         signed long r, cnt = 0;
4083         unsigned long flags;
4084         uint32_t seq, reg_val_offs = 0;
4085         uint64_t value = 0;
4086         struct amdgpu_kiq *kiq = &adev->gfx.kiq[0];
4087         struct amdgpu_ring *ring = &kiq->ring;
4088
4089         BUG_ON(!ring->funcs->emit_rreg);
4090
4091         spin_lock_irqsave(&kiq->ring_lock, flags);
4092         if (amdgpu_device_wb_get(adev, &reg_val_offs)) {
4093                 pr_err("critical bug! too many kiq readers\n");
4094                 goto failed_unlock;
4095         }
4096         amdgpu_ring_alloc(ring, 32);
4097         amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
4098         amdgpu_ring_write(ring, 9 |     /* src: register*/
4099                                 (5 << 8) |      /* dst: memory */
4100                                 (1 << 16) |     /* count sel */
4101                                 (1 << 20));     /* write confirm */
4102         amdgpu_ring_write(ring, 0);
4103         amdgpu_ring_write(ring, 0);
4104         amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
4105                                 reg_val_offs * 4));
4106         amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
4107                                 reg_val_offs * 4));
4108         r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT);
4109         if (r)
4110                 goto failed_undo;
4111
4112         amdgpu_ring_commit(ring);
4113         spin_unlock_irqrestore(&kiq->ring_lock, flags);
4114
4115         r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
4116
4117         /* don't wait anymore for gpu reset case because this way may
4118          * block gpu_recover() routine forever, e.g. this virt_kiq_rreg
4119          * is triggered in TTM and ttm_bo_lock_delayed_workqueue() will
4120          * never return if we keep waiting in virt_kiq_rreg, which cause
4121          * gpu_recover() hang there.
4122          *
4123          * also don't wait anymore for IRQ context
4124          * */
4125         if (r < 1 && (amdgpu_in_reset(adev)))
4126                 goto failed_kiq_read;
4127
4128         might_sleep();
4129         while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) {
4130                 msleep(MAX_KIQ_REG_BAILOUT_INTERVAL);
4131                 r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
4132         }
4133
4134         if (cnt > MAX_KIQ_REG_TRY)
4135                 goto failed_kiq_read;
4136
4137         mb();
4138         value = (uint64_t)adev->wb.wb[reg_val_offs] |
4139                 (uint64_t)adev->wb.wb[reg_val_offs + 1 ] << 32ULL;
4140         amdgpu_device_wb_free(adev, reg_val_offs);
4141         return value;
4142
4143 failed_undo:
4144         amdgpu_ring_undo(ring);
4145 failed_unlock:
4146         spin_unlock_irqrestore(&kiq->ring_lock, flags);
4147 failed_kiq_read:
4148         if (reg_val_offs)
4149                 amdgpu_device_wb_free(adev, reg_val_offs);
4150         pr_err("failed to read gpu clock\n");
4151         return ~0;
4152 }
4153
4154 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev)
4155 {
4156         uint64_t clock, clock_lo, clock_hi, hi_check;
4157
4158         switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
4159         case IP_VERSION(9, 3, 0):
4160                 preempt_disable();
4161                 clock_hi = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_UPPER_Renoir);
4162                 clock_lo = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_LOWER_Renoir);
4163                 hi_check = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_UPPER_Renoir);
4164                 /* The SMUIO TSC clock frequency is 100MHz, which sets 32-bit carry over
4165                  * roughly every 42 seconds.
4166                  */
4167                 if (hi_check != clock_hi) {
4168                         clock_lo = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_LOWER_Renoir);
4169                         clock_hi = hi_check;
4170                 }
4171                 preempt_enable();
4172                 clock = clock_lo | (clock_hi << 32ULL);
4173                 break;
4174         default:
4175                 amdgpu_gfx_off_ctrl(adev, false);
4176                 mutex_lock(&adev->gfx.gpu_clock_mutex);
4177                 if (amdgpu_ip_version(adev, GC_HWIP, 0) ==
4178                             IP_VERSION(9, 0, 1) &&
4179                     amdgpu_sriov_runtime(adev)) {
4180                         clock = gfx_v9_0_kiq_read_clock(adev);
4181                 } else {
4182                         WREG32_SOC15(GC, 0, mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
4183                         clock = (uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_LSB) |
4184                                 ((uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
4185                 }
4186                 mutex_unlock(&adev->gfx.gpu_clock_mutex);
4187                 amdgpu_gfx_off_ctrl(adev, true);
4188                 break;
4189         }
4190         return clock;
4191 }
4192
4193 static void gfx_v9_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
4194                                           uint32_t vmid,
4195                                           uint32_t gds_base, uint32_t gds_size,
4196                                           uint32_t gws_base, uint32_t gws_size,
4197                                           uint32_t oa_base, uint32_t oa_size)
4198 {
4199         struct amdgpu_device *adev = ring->adev;
4200
4201         /* GDS Base */
4202         gfx_v9_0_write_data_to_reg(ring, 0, false,
4203                                    SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_BASE) + 2 * vmid,
4204                                    gds_base);
4205
4206         /* GDS Size */
4207         gfx_v9_0_write_data_to_reg(ring, 0, false,
4208                                    SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE) + 2 * vmid,
4209                                    gds_size);
4210
4211         /* GWS */
4212         gfx_v9_0_write_data_to_reg(ring, 0, false,
4213                                    SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID0) + vmid,
4214                                    gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
4215
4216         /* OA */
4217         gfx_v9_0_write_data_to_reg(ring, 0, false,
4218                                    SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID0) + vmid,
4219                                    (1 << (oa_size + oa_base)) - (1 << oa_base));
4220 }
4221
4222 static const u32 vgpr_init_compute_shader[] =
4223 {
4224         0xb07c0000, 0xbe8000ff,
4225         0x000000f8, 0xbf110800,
4226         0x7e000280, 0x7e020280,
4227         0x7e040280, 0x7e060280,
4228         0x7e080280, 0x7e0a0280,
4229         0x7e0c0280, 0x7e0e0280,
4230         0x80808800, 0xbe803200,
4231         0xbf84fff5, 0xbf9c0000,
4232         0xd28c0001, 0x0001007f,
4233         0xd28d0001, 0x0002027e,
4234         0x10020288, 0xb8810904,
4235         0xb7814000, 0xd1196a01,
4236         0x00000301, 0xbe800087,
4237         0xbefc00c1, 0xd89c4000,
4238         0x00020201, 0xd89cc080,
4239         0x00040401, 0x320202ff,
4240         0x00000800, 0x80808100,
4241         0xbf84fff8, 0x7e020280,
4242         0xbf810000, 0x00000000,
4243 };
4244
4245 static const u32 sgpr_init_compute_shader[] =
4246 {
4247         0xb07c0000, 0xbe8000ff,
4248         0x0000005f, 0xbee50080,
4249         0xbe812c65, 0xbe822c65,
4250         0xbe832c65, 0xbe842c65,
4251         0xbe852c65, 0xb77c0005,
4252         0x80808500, 0xbf84fff8,
4253         0xbe800080, 0xbf810000,
4254 };
4255
4256 static const u32 vgpr_init_compute_shader_arcturus[] = {
4257         0xd3d94000, 0x18000080, 0xd3d94001, 0x18000080, 0xd3d94002, 0x18000080,
4258         0xd3d94003, 0x18000080, 0xd3d94004, 0x18000080, 0xd3d94005, 0x18000080,
4259         0xd3d94006, 0x18000080, 0xd3d94007, 0x18000080, 0xd3d94008, 0x18000080,
4260         0xd3d94009, 0x18000080, 0xd3d9400a, 0x18000080, 0xd3d9400b, 0x18000080,
4261         0xd3d9400c, 0x18000080, 0xd3d9400d, 0x18000080, 0xd3d9400e, 0x18000080,
4262         0xd3d9400f, 0x18000080, 0xd3d94010, 0x18000080, 0xd3d94011, 0x18000080,
4263         0xd3d94012, 0x18000080, 0xd3d94013, 0x18000080, 0xd3d94014, 0x18000080,
4264         0xd3d94015, 0x18000080, 0xd3d94016, 0x18000080, 0xd3d94017, 0x18000080,
4265         0xd3d94018, 0x18000080, 0xd3d94019, 0x18000080, 0xd3d9401a, 0x18000080,
4266         0xd3d9401b, 0x18000080, 0xd3d9401c, 0x18000080, 0xd3d9401d, 0x18000080,
4267         0xd3d9401e, 0x18000080, 0xd3d9401f, 0x18000080, 0xd3d94020, 0x18000080,
4268         0xd3d94021, 0x18000080, 0xd3d94022, 0x18000080, 0xd3d94023, 0x18000080,
4269         0xd3d94024, 0x18000080, 0xd3d94025, 0x18000080, 0xd3d94026, 0x18000080,
4270         0xd3d94027, 0x18000080, 0xd3d94028, 0x18000080, 0xd3d94029, 0x18000080,
4271         0xd3d9402a, 0x18000080, 0xd3d9402b, 0x18000080, 0xd3d9402c, 0x18000080,
4272         0xd3d9402d, 0x18000080, 0xd3d9402e, 0x18000080, 0xd3d9402f, 0x18000080,
4273         0xd3d94030, 0x18000080, 0xd3d94031, 0x18000080, 0xd3d94032, 0x18000080,
4274         0xd3d94033, 0x18000080, 0xd3d94034, 0x18000080, 0xd3d94035, 0x18000080,
4275         0xd3d94036, 0x18000080, 0xd3d94037, 0x18000080, 0xd3d94038, 0x18000080,
4276         0xd3d94039, 0x18000080, 0xd3d9403a, 0x18000080, 0xd3d9403b, 0x18000080,
4277         0xd3d9403c, 0x18000080, 0xd3d9403d, 0x18000080, 0xd3d9403e, 0x18000080,
4278         0xd3d9403f, 0x18000080, 0xd3d94040, 0x18000080, 0xd3d94041, 0x18000080,
4279         0xd3d94042, 0x18000080, 0xd3d94043, 0x18000080, 0xd3d94044, 0x18000080,
4280         0xd3d94045, 0x18000080, 0xd3d94046, 0x18000080, 0xd3d94047, 0x18000080,
4281         0xd3d94048, 0x18000080, 0xd3d94049, 0x18000080, 0xd3d9404a, 0x18000080,
4282         0xd3d9404b, 0x18000080, 0xd3d9404c, 0x18000080, 0xd3d9404d, 0x18000080,
4283         0xd3d9404e, 0x18000080, 0xd3d9404f, 0x18000080, 0xd3d94050, 0x18000080,
4284         0xd3d94051, 0x18000080, 0xd3d94052, 0x18000080, 0xd3d94053, 0x18000080,
4285         0xd3d94054, 0x18000080, 0xd3d94055, 0x18000080, 0xd3d94056, 0x18000080,
4286         0xd3d94057, 0x18000080, 0xd3d94058, 0x18000080, 0xd3d94059, 0x18000080,
4287         0xd3d9405a, 0x18000080, 0xd3d9405b, 0x18000080, 0xd3d9405c, 0x18000080,
4288         0xd3d9405d, 0x18000080, 0xd3d9405e, 0x18000080, 0xd3d9405f, 0x18000080,
4289         0xd3d94060, 0x18000080, 0xd3d94061, 0x18000080, 0xd3d94062, 0x18000080,
4290         0xd3d94063, 0x18000080, 0xd3d94064, 0x18000080, 0xd3d94065, 0x18000080,
4291         0xd3d94066, 0x18000080, 0xd3d94067, 0x18000080, 0xd3d94068, 0x18000080,
4292         0xd3d94069, 0x18000080, 0xd3d9406a, 0x18000080, 0xd3d9406b, 0x18000080,
4293         0xd3d9406c, 0x18000080, 0xd3d9406d, 0x18000080, 0xd3d9406e, 0x18000080,
4294         0xd3d9406f, 0x18000080, 0xd3d94070, 0x18000080, 0xd3d94071, 0x18000080,
4295         0xd3d94072, 0x18000080, 0xd3d94073, 0x18000080, 0xd3d94074, 0x18000080,
4296         0xd3d94075, 0x18000080, 0xd3d94076, 0x18000080, 0xd3d94077, 0x18000080,
4297         0xd3d94078, 0x18000080, 0xd3d94079, 0x18000080, 0xd3d9407a, 0x18000080,
4298         0xd3d9407b, 0x18000080, 0xd3d9407c, 0x18000080, 0xd3d9407d, 0x18000080,
4299         0xd3d9407e, 0x18000080, 0xd3d9407f, 0x18000080, 0xd3d94080, 0x18000080,
4300         0xd3d94081, 0x18000080, 0xd3d94082, 0x18000080, 0xd3d94083, 0x18000080,
4301         0xd3d94084, 0x18000080, 0xd3d94085, 0x18000080, 0xd3d94086, 0x18000080,
4302         0xd3d94087, 0x18000080, 0xd3d94088, 0x18000080, 0xd3d94089, 0x18000080,
4303         0xd3d9408a, 0x18000080, 0xd3d9408b, 0x18000080, 0xd3d9408c, 0x18000080,
4304         0xd3d9408d, 0x18000080, 0xd3d9408e, 0x18000080, 0xd3d9408f, 0x18000080,
4305         0xd3d94090, 0x18000080, 0xd3d94091, 0x18000080, 0xd3d94092, 0x18000080,
4306         0xd3d94093, 0x18000080, 0xd3d94094, 0x18000080, 0xd3d94095, 0x18000080,
4307         0xd3d94096, 0x18000080, 0xd3d94097, 0x18000080, 0xd3d94098, 0x18000080,
4308         0xd3d94099, 0x18000080, 0xd3d9409a, 0x18000080, 0xd3d9409b, 0x18000080,
4309         0xd3d9409c, 0x18000080, 0xd3d9409d, 0x18000080, 0xd3d9409e, 0x18000080,
4310         0xd3d9409f, 0x18000080, 0xd3d940a0, 0x18000080, 0xd3d940a1, 0x18000080,
4311         0xd3d940a2, 0x18000080, 0xd3d940a3, 0x18000080, 0xd3d940a4, 0x18000080,
4312         0xd3d940a5, 0x18000080, 0xd3d940a6, 0x18000080, 0xd3d940a7, 0x18000080,
4313         0xd3d940a8, 0x18000080, 0xd3d940a9, 0x18000080, 0xd3d940aa, 0x18000080,
4314         0xd3d940ab, 0x18000080, 0xd3d940ac, 0x18000080, 0xd3d940ad, 0x18000080,
4315         0xd3d940ae, 0x18000080, 0xd3d940af, 0x18000080, 0xd3d940b0, 0x18000080,
4316         0xd3d940b1, 0x18000080, 0xd3d940b2, 0x18000080, 0xd3d940b3, 0x18000080,
4317         0xd3d940b4, 0x18000080, 0xd3d940b5, 0x18000080, 0xd3d940b6, 0x18000080,
4318         0xd3d940b7, 0x18000080, 0xd3d940b8, 0x18000080, 0xd3d940b9, 0x18000080,
4319         0xd3d940ba, 0x18000080, 0xd3d940bb, 0x18000080, 0xd3d940bc, 0x18000080,
4320         0xd3d940bd, 0x18000080, 0xd3d940be, 0x18000080, 0xd3d940bf, 0x18000080,
4321         0xd3d940c0, 0x18000080, 0xd3d940c1, 0x18000080, 0xd3d940c2, 0x18000080,
4322         0xd3d940c3, 0x18000080, 0xd3d940c4, 0x18000080, 0xd3d940c5, 0x18000080,
4323         0xd3d940c6, 0x18000080, 0xd3d940c7, 0x18000080, 0xd3d940c8, 0x18000080,
4324         0xd3d940c9, 0x18000080, 0xd3d940ca, 0x18000080, 0xd3d940cb, 0x18000080,
4325         0xd3d940cc, 0x18000080, 0xd3d940cd, 0x18000080, 0xd3d940ce, 0x18000080,
4326         0xd3d940cf, 0x18000080, 0xd3d940d0, 0x18000080, 0xd3d940d1, 0x18000080,
4327         0xd3d940d2, 0x18000080, 0xd3d940d3, 0x18000080, 0xd3d940d4, 0x18000080,
4328         0xd3d940d5, 0x18000080, 0xd3d940d6, 0x18000080, 0xd3d940d7, 0x18000080,
4329         0xd3d940d8, 0x18000080, 0xd3d940d9, 0x18000080, 0xd3d940da, 0x18000080,
4330         0xd3d940db, 0x18000080, 0xd3d940dc, 0x18000080, 0xd3d940dd, 0x18000080,
4331         0xd3d940de, 0x18000080, 0xd3d940df, 0x18000080, 0xd3d940e0, 0x18000080,
4332         0xd3d940e1, 0x18000080, 0xd3d940e2, 0x18000080, 0xd3d940e3, 0x18000080,
4333         0xd3d940e4, 0x18000080, 0xd3d940e5, 0x18000080, 0xd3d940e6, 0x18000080,
4334         0xd3d940e7, 0x18000080, 0xd3d940e8, 0x18000080, 0xd3d940e9, 0x18000080,
4335         0xd3d940ea, 0x18000080, 0xd3d940eb, 0x18000080, 0xd3d940ec, 0x18000080,
4336         0xd3d940ed, 0x18000080, 0xd3d940ee, 0x18000080, 0xd3d940ef, 0x18000080,
4337         0xd3d940f0, 0x18000080, 0xd3d940f1, 0x18000080, 0xd3d940f2, 0x18000080,
4338         0xd3d940f3, 0x18000080, 0xd3d940f4, 0x18000080, 0xd3d940f5, 0x18000080,
4339         0xd3d940f6, 0x18000080, 0xd3d940f7, 0x18000080, 0xd3d940f8, 0x18000080,
4340         0xd3d940f9, 0x18000080, 0xd3d940fa, 0x18000080, 0xd3d940fb, 0x18000080,
4341         0xd3d940fc, 0x18000080, 0xd3d940fd, 0x18000080, 0xd3d940fe, 0x18000080,
4342         0xd3d940ff, 0x18000080, 0xb07c0000, 0xbe8a00ff, 0x000000f8, 0xbf11080a,
4343         0x7e000280, 0x7e020280, 0x7e040280, 0x7e060280, 0x7e080280, 0x7e0a0280,
4344         0x7e0c0280, 0x7e0e0280, 0x808a880a, 0xbe80320a, 0xbf84fff5, 0xbf9c0000,
4345         0xd28c0001, 0x0001007f, 0xd28d0001, 0x0002027e, 0x10020288, 0xb88b0904,
4346         0xb78b4000, 0xd1196a01, 0x00001701, 0xbe8a0087, 0xbefc00c1, 0xd89c4000,
4347         0x00020201, 0xd89cc080, 0x00040401, 0x320202ff, 0x00000800, 0x808a810a,
4348         0xbf84fff8, 0xbf810000,
4349 };
4350
4351 /* When below register arrays changed, please update gpr_reg_size,
4352   and sec_ded_counter_reg_size in function gfx_v9_0_do_edc_gpr_workarounds,
4353   to cover all gfx9 ASICs */
4354 static const struct soc15_reg_entry vgpr_init_regs[] = {
4355    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
4356    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 },
4357    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 4 },
4358    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
4359    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x3f },
4360    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x400000 },  /* 64KB LDS */
4361    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff },
4362    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff },
4363    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff },
4364    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff },
4365    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0xffffffff },
4366    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0xffffffff },
4367    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0xffffffff },
4368    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0xffffffff },
4369 };
4370
4371 static const struct soc15_reg_entry vgpr_init_regs_arcturus[] = {
4372    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
4373    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 },
4374    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 4 },
4375    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
4376    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0xbf },
4377    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x400000 },  /* 64KB LDS */
4378    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff },
4379    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff },
4380    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff },
4381    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff },
4382    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0xffffffff },
4383    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0xffffffff },
4384    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0xffffffff },
4385    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0xffffffff },
4386 };
4387
4388 static const struct soc15_reg_entry sgpr1_init_regs[] = {
4389    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
4390    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 },
4391    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 8 },
4392    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
4393    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x240 }, /* (80 GPRS) */
4394    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x0 },
4395    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0x000000ff },
4396    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0x000000ff },
4397    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0x000000ff },
4398    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0x000000ff },
4399    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0x000000ff },
4400    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0x000000ff },
4401    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0x000000ff },
4402    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0x000000ff },
4403 };
4404
4405 static const struct soc15_reg_entry sgpr2_init_regs[] = {
4406    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
4407    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 },
4408    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 8 },
4409    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
4410    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x240 }, /* (80 GPRS) */
4411    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x0 },
4412    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0x0000ff00 },
4413    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0x0000ff00 },
4414    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0x0000ff00 },
4415    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0x0000ff00 },
4416    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0x0000ff00 },
4417    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0x0000ff00 },
4418    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0x0000ff00 },
4419    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0x0000ff00 },
4420 };
4421
4422 static const struct soc15_reg_entry gfx_v9_0_edc_counter_regs[] = {
4423    { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT), 0, 1, 1},
4424    { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT), 0, 1, 1},
4425    { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT), 0, 1, 1},
4426    { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT), 0, 1, 1},
4427    { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT), 0, 1, 1},
4428    { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT), 0, 1, 1},
4429    { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT), 0, 1, 1},
4430    { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT), 0, 1, 1},
4431    { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT), 0, 1, 1},
4432    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT), 0, 1, 1},
4433    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_GRBM_CNT), 0, 1, 1},
4434    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_DED), 0, 1, 1},
4435    { SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT), 0, 4, 1},
4436    { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 0, 4, 6},
4437    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_DED_CNT), 0, 4, 16},
4438    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_INFO), 0, 4, 16},
4439    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_SEC_CNT), 0, 4, 16},
4440    { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 1, 16},
4441    { SOC15_REG_ENTRY(GC, 0, mmTCP_ATC_EDC_GATCL1_CNT), 0, 4, 16},
4442    { SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT), 0, 4, 16},
4443    { SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 0, 4, 16},
4444    { SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 0, 4, 16},
4445    { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 0, 4, 6},
4446    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 0, 4, 16},
4447    { SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 0, 4, 16},
4448    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), 0, 1, 1},
4449    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 0, 1, 1},
4450    { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 1, 32},
4451    { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 1, 32},
4452    { SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT), 0, 1, 72},
4453    { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0, 1, 16},
4454    { SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT), 0, 1, 2},
4455    { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 0, 4, 6},
4456 };
4457
4458 static int gfx_v9_0_do_edc_gds_workarounds(struct amdgpu_device *adev)
4459 {
4460         struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
4461         int i, r;
4462
4463         /* only support when RAS is enabled */
4464         if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
4465                 return 0;
4466
4467         r = amdgpu_ring_alloc(ring, 7);
4468         if (r) {
4469                 DRM_ERROR("amdgpu: GDS workarounds failed to lock ring %s (%d).\n",
4470                         ring->name, r);
4471                 return r;
4472         }
4473
4474         WREG32_SOC15(GC, 0, mmGDS_VMID0_BASE, 0x00000000);
4475         WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, adev->gds.gds_size);
4476
4477         amdgpu_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
4478         amdgpu_ring_write(ring, (PACKET3_DMA_DATA_CP_SYNC |
4479                                 PACKET3_DMA_DATA_DST_SEL(1) |
4480                                 PACKET3_DMA_DATA_SRC_SEL(2) |
4481                                 PACKET3_DMA_DATA_ENGINE(0)));
4482         amdgpu_ring_write(ring, 0);
4483         amdgpu_ring_write(ring, 0);
4484         amdgpu_ring_write(ring, 0);
4485         amdgpu_ring_write(ring, 0);
4486         amdgpu_ring_write(ring, PACKET3_DMA_DATA_CMD_RAW_WAIT |
4487                                 adev->gds.gds_size);
4488
4489         amdgpu_ring_commit(ring);
4490
4491         for (i = 0; i < adev->usec_timeout; i++) {
4492                 if (ring->wptr == gfx_v9_0_ring_get_rptr_compute(ring))
4493                         break;
4494                 udelay(1);
4495         }
4496
4497         if (i >= adev->usec_timeout)
4498                 r = -ETIMEDOUT;
4499
4500         WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, 0x00000000);
4501
4502         return r;
4503 }
4504
4505 static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
4506 {
4507         struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
4508         struct amdgpu_ib ib;
4509         struct dma_fence *f = NULL;
4510         int r, i;
4511         unsigned total_size, vgpr_offset, sgpr_offset;
4512         u64 gpu_addr;
4513
4514         int compute_dim_x = adev->gfx.config.max_shader_engines *
4515                                                 adev->gfx.config.max_cu_per_sh *
4516                                                 adev->gfx.config.max_sh_per_se;
4517         int sgpr_work_group_size = 5;
4518         int gpr_reg_size = adev->gfx.config.max_shader_engines + 6;
4519         int vgpr_init_shader_size;
4520         const u32 *vgpr_init_shader_ptr;
4521         const struct soc15_reg_entry *vgpr_init_regs_ptr;
4522
4523         /* only support when RAS is enabled */
4524         if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
4525                 return 0;
4526
4527         /* bail if the compute ring is not ready */
4528         if (!ring->sched.ready)
4529                 return 0;
4530
4531         if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 1)) {
4532                 vgpr_init_shader_ptr = vgpr_init_compute_shader_arcturus;
4533                 vgpr_init_shader_size = sizeof(vgpr_init_compute_shader_arcturus);
4534                 vgpr_init_regs_ptr = vgpr_init_regs_arcturus;
4535         } else {
4536                 vgpr_init_shader_ptr = vgpr_init_compute_shader;
4537                 vgpr_init_shader_size = sizeof(vgpr_init_compute_shader);
4538                 vgpr_init_regs_ptr = vgpr_init_regs;
4539         }
4540
4541         total_size =
4542                 (gpr_reg_size * 3 + 4 + 5 + 2) * 4; /* VGPRS */
4543         total_size +=
4544                 (gpr_reg_size * 3 + 4 + 5 + 2) * 4; /* SGPRS1 */
4545         total_size +=
4546                 (gpr_reg_size * 3 + 4 + 5 + 2) * 4; /* SGPRS2 */
4547         total_size = ALIGN(total_size, 256);
4548         vgpr_offset = total_size;
4549         total_size += ALIGN(vgpr_init_shader_size, 256);
4550         sgpr_offset = total_size;
4551         total_size += sizeof(sgpr_init_compute_shader);
4552
4553         /* allocate an indirect buffer to put the commands in */
4554         memset(&ib, 0, sizeof(ib));
4555         r = amdgpu_ib_get(adev, NULL, total_size,
4556                                         AMDGPU_IB_POOL_DIRECT, &ib);
4557         if (r) {
4558                 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
4559                 return r;
4560         }
4561
4562         /* load the compute shaders */
4563         for (i = 0; i < vgpr_init_shader_size/sizeof(u32); i++)
4564                 ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_shader_ptr[i];
4565
4566         for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
4567                 ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
4568
4569         /* init the ib length to 0 */
4570         ib.length_dw = 0;
4571
4572         /* VGPR */
4573         /* write the register state for the compute dispatch */
4574         for (i = 0; i < gpr_reg_size; i++) {
4575                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
4576                 ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(vgpr_init_regs_ptr[i])
4577                                                                 - PACKET3_SET_SH_REG_START;
4578                 ib.ptr[ib.length_dw++] = vgpr_init_regs_ptr[i].reg_value;
4579         }
4580         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
4581         gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
4582         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
4583         ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
4584                                                         - PACKET3_SET_SH_REG_START;
4585         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
4586         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
4587
4588         /* write dispatch packet */
4589         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
4590         ib.ptr[ib.length_dw++] = compute_dim_x * 2; /* x */
4591         ib.ptr[ib.length_dw++] = 1; /* y */
4592         ib.ptr[ib.length_dw++] = 1; /* z */
4593         ib.ptr[ib.length_dw++] =
4594                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
4595
4596         /* write CS partial flush packet */
4597         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
4598         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
4599
4600         /* SGPR1 */
4601         /* write the register state for the compute dispatch */
4602         for (i = 0; i < gpr_reg_size; i++) {
4603                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
4604                 ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(sgpr1_init_regs[i])
4605                                                                 - PACKET3_SET_SH_REG_START;
4606                 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i].reg_value;
4607         }
4608         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
4609         gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
4610         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
4611         ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
4612                                                         - PACKET3_SET_SH_REG_START;
4613         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
4614         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
4615
4616         /* write dispatch packet */
4617         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
4618         ib.ptr[ib.length_dw++] = compute_dim_x / 2 * sgpr_work_group_size; /* x */
4619         ib.ptr[ib.length_dw++] = 1; /* y */
4620         ib.ptr[ib.length_dw++] = 1; /* z */
4621         ib.ptr[ib.length_dw++] =
4622                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
4623
4624         /* write CS partial flush packet */
4625         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
4626         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
4627
4628         /* SGPR2 */
4629         /* write the register state for the compute dispatch */
4630         for (i = 0; i < gpr_reg_size; i++) {
4631                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
4632                 ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(sgpr2_init_regs[i])
4633                                                                 - PACKET3_SET_SH_REG_START;
4634                 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i].reg_value;
4635         }
4636         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
4637         gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
4638         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
4639         ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
4640                                                         - PACKET3_SET_SH_REG_START;
4641         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
4642         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
4643
4644         /* write dispatch packet */
4645         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
4646         ib.ptr[ib.length_dw++] = compute_dim_x / 2 * sgpr_work_group_size; /* x */
4647         ib.ptr[ib.length_dw++] = 1; /* y */
4648         ib.ptr[ib.length_dw++] = 1; /* z */
4649         ib.ptr[ib.length_dw++] =
4650                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
4651
4652         /* write CS partial flush packet */
4653         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
4654         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
4655
4656         /* shedule the ib on the ring */
4657         r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
4658         if (r) {
4659                 DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
4660                 goto fail;
4661         }
4662
4663         /* wait for the GPU to finish processing the IB */
4664         r = dma_fence_wait(f, false);
4665         if (r) {
4666                 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
4667                 goto fail;
4668         }
4669
4670 fail:
4671         amdgpu_ib_free(adev, &ib, NULL);
4672         dma_fence_put(f);
4673
4674         return r;
4675 }
4676
4677 static int gfx_v9_0_early_init(void *handle)
4678 {
4679         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4680
4681         adev->gfx.funcs = &gfx_v9_0_gfx_funcs;
4682
4683         if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 1) ||
4684             amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 2))
4685                 adev->gfx.num_gfx_rings = 0;
4686         else
4687                 adev->gfx.num_gfx_rings = GFX9_NUM_GFX_RINGS;
4688         adev->gfx.xcc_mask = 1;
4689         adev->gfx.num_compute_rings = min(amdgpu_gfx_get_num_kcq(adev),
4690                                           AMDGPU_MAX_COMPUTE_RINGS);
4691         gfx_v9_0_set_kiq_pm4_funcs(adev);
4692         gfx_v9_0_set_ring_funcs(adev);
4693         gfx_v9_0_set_irq_funcs(adev);
4694         gfx_v9_0_set_gds_init(adev);
4695         gfx_v9_0_set_rlc_funcs(adev);
4696
4697         /* init rlcg reg access ctrl */
4698         gfx_v9_0_init_rlcg_reg_access_ctrl(adev);
4699
4700         return gfx_v9_0_init_microcode(adev);
4701 }
4702
4703 static int gfx_v9_0_ecc_late_init(void *handle)
4704 {
4705         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4706         int r;
4707
4708         /*
4709          * Temp workaround to fix the issue that CP firmware fails to
4710          * update read pointer when CPDMA is writing clearing operation
4711          * to GDS in suspend/resume sequence on several cards. So just
4712          * limit this operation in cold boot sequence.
4713          */
4714         if ((!adev->in_suspend) &&
4715             (adev->gds.gds_size)) {
4716                 r = gfx_v9_0_do_edc_gds_workarounds(adev);
4717                 if (r)
4718                         return r;
4719         }
4720
4721         /* requires IBs so do in late init after IB pool is initialized */
4722         if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 2))
4723                 r = gfx_v9_4_2_do_edc_gpr_workarounds(adev);
4724         else
4725                 r = gfx_v9_0_do_edc_gpr_workarounds(adev);
4726
4727         if (r)
4728                 return r;
4729
4730         if (adev->gfx.ras &&
4731             adev->gfx.ras->enable_watchdog_timer)
4732                 adev->gfx.ras->enable_watchdog_timer(adev);
4733
4734         return 0;
4735 }
4736
4737 static int gfx_v9_0_late_init(void *handle)
4738 {
4739         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4740         int r;
4741
4742         r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
4743         if (r)
4744                 return r;
4745
4746         r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
4747         if (r)
4748                 return r;
4749
4750         r = gfx_v9_0_ecc_late_init(handle);
4751         if (r)
4752                 return r;
4753
4754         if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 2))
4755                 gfx_v9_4_2_debug_trap_config_init(adev,
4756                         adev->vm_manager.first_kfd_vmid, AMDGPU_NUM_VMID);
4757         else
4758                 gfx_v9_0_debug_trap_config_init(adev,
4759                         adev->vm_manager.first_kfd_vmid, AMDGPU_NUM_VMID);
4760
4761         return 0;
4762 }
4763
4764 static bool gfx_v9_0_is_rlc_enabled(struct amdgpu_device *adev)
4765 {
4766         uint32_t rlc_setting;
4767
4768         /* if RLC is not enabled, do nothing */
4769         rlc_setting = RREG32_SOC15(GC, 0, mmRLC_CNTL);
4770         if (!(rlc_setting & RLC_CNTL__RLC_ENABLE_F32_MASK))
4771                 return false;
4772
4773         return true;
4774 }
4775
4776 static void gfx_v9_0_set_safe_mode(struct amdgpu_device *adev, int xcc_id)
4777 {
4778         uint32_t data;
4779         unsigned i;
4780
4781         data = RLC_SAFE_MODE__CMD_MASK;
4782         data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
4783         WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data);
4784
4785         /* wait for RLC_SAFE_MODE */
4786         for (i = 0; i < adev->usec_timeout; i++) {
4787                 if (!REG_GET_FIELD(RREG32_SOC15(GC, 0, mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
4788                         break;
4789                 udelay(1);
4790         }
4791 }
4792
4793 static void gfx_v9_0_unset_safe_mode(struct amdgpu_device *adev, int xcc_id)
4794 {
4795         uint32_t data;
4796
4797         data = RLC_SAFE_MODE__CMD_MASK;
4798         WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data);
4799 }
4800
4801 static void gfx_v9_0_update_gfx_cg_power_gating(struct amdgpu_device *adev,
4802                                                 bool enable)
4803 {
4804         amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
4805
4806         if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
4807                 gfx_v9_0_enable_gfx_cg_power_gating(adev, true);
4808                 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
4809                         gfx_v9_0_enable_gfx_pipeline_powergating(adev, true);
4810         } else {
4811                 gfx_v9_0_enable_gfx_cg_power_gating(adev, false);
4812                 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
4813                         gfx_v9_0_enable_gfx_pipeline_powergating(adev, false);
4814         }
4815
4816         amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
4817 }
4818
4819 static void gfx_v9_0_update_gfx_mg_power_gating(struct amdgpu_device *adev,
4820                                                 bool enable)
4821 {
4822         /* TODO: double check if we need to perform under safe mode */
4823         /* gfx_v9_0_enter_rlc_safe_mode(adev); */
4824
4825         if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
4826                 gfx_v9_0_enable_gfx_static_mg_power_gating(adev, true);
4827         else
4828                 gfx_v9_0_enable_gfx_static_mg_power_gating(adev, false);
4829
4830         if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
4831                 gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, true);
4832         else
4833                 gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, false);
4834
4835         /* gfx_v9_0_exit_rlc_safe_mode(adev); */
4836 }
4837
4838 static void gfx_v9_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
4839                                                       bool enable)
4840 {
4841         uint32_t data, def;
4842
4843         amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
4844
4845         /* It is disabled by HW by default */
4846         if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
4847                 /* 1 - RLC_CGTT_MGCG_OVERRIDE */
4848                 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4849
4850                 if (amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 2, 1))
4851                         data &= ~RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK;
4852
4853                 data &= ~(RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
4854                           RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK |
4855                           RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK);
4856
4857                 /* only for Vega10 & Raven1 */
4858                 data |= RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK;
4859
4860                 if (def != data)
4861                         WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4862
4863                 /* MGLS is a global flag to control all MGLS in GFX */
4864                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
4865                         /* 2 - RLC memory Light sleep */
4866                         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
4867                                 def = data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
4868                                 data |= RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
4869                                 if (def != data)
4870                                         WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data);
4871                         }
4872                         /* 3 - CP memory Light sleep */
4873                         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
4874                                 def = data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
4875                                 data |= CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
4876                                 if (def != data)
4877                                         WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data);
4878                         }
4879                 }
4880         } else {
4881                 /* 1 - MGCG_OVERRIDE */
4882                 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4883
4884                 if (amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 2, 1))
4885                         data |= RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK;
4886
4887                 data |= (RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK |
4888                          RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
4889                          RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK |
4890                          RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK);
4891
4892                 if (def != data)
4893                         WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4894
4895                 /* 2 - disable MGLS in RLC */
4896                 data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
4897                 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
4898                         data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
4899                         WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data);
4900                 }
4901
4902                 /* 3 - disable MGLS in CP */
4903                 data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
4904                 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
4905                         data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
4906                         WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data);
4907                 }
4908         }
4909
4910         amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
4911 }
4912
4913 static void gfx_v9_0_update_3d_clock_gating(struct amdgpu_device *adev,
4914                                            bool enable)
4915 {
4916         uint32_t data, def;
4917
4918         if (!adev->gfx.num_gfx_rings)
4919                 return;
4920
4921         amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
4922
4923         /* Enable 3D CGCG/CGLS */
4924         if (enable) {
4925                 /* write cmd to clear cgcg/cgls ov */
4926                 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4927                 /* unset CGCG override */
4928                 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_GFX3D_CG_OVERRIDE_MASK;
4929                 /* update CGCG and CGLS override bits */
4930                 if (def != data)
4931                         WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4932
4933                 /* enable 3Dcgcg FSM(0x0000363f) */
4934                 def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
4935
4936                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG)
4937                         data = (0x36 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
4938                                 RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK;
4939                 else
4940                         data = 0x0 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT;
4941
4942                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS)
4943                         data |= (0x000F << RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
4944                                 RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK;
4945                 if (def != data)
4946                         WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data);
4947
4948                 /* set IDLE_POLL_COUNT(0x00900100) */
4949                 def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL);
4950                 data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
4951                         (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
4952                 if (def != data)
4953                         WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data);
4954         } else {
4955                 /* Disable CGCG/CGLS */
4956                 def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
4957                 /* disable cgcg, cgls should be disabled */
4958                 data &= ~(RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK |
4959                           RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK);
4960                 /* disable cgcg and cgls in FSM */
4961                 if (def != data)
4962                         WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data);
4963         }
4964
4965         amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
4966 }
4967
4968 static void gfx_v9_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
4969                                                       bool enable)
4970 {
4971         uint32_t def, data;
4972
4973         amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
4974
4975         if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
4976                 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4977                 /* unset CGCG override */
4978                 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGCG_OVERRIDE_MASK;
4979                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
4980                         data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
4981                 else
4982                         data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
4983                 /* update CGCG and CGLS override bits */
4984                 if (def != data)
4985                         WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4986
4987                 /* enable cgcg FSM(0x0000363F) */
4988                 def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
4989
4990                 if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 1))
4991                         data = (0x2000 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
4992                                 RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
4993                 else
4994                         data = (0x36 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
4995                                 RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
4996                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
4997                         data |= (0x000F << RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
4998                                 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
4999                 if (def != data)
5000                         WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data);
5001
5002                 /* set IDLE_POLL_COUNT(0x00900100) */
5003                 def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL);
5004                 data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
5005                         (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
5006                 if (def != data)
5007                         WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data);
5008         } else {
5009                 def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
5010                 /* reset CGCG/CGLS bits */
5011                 data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
5012                 /* disable cgcg and cgls in FSM */
5013                 if (def != data)
5014                         WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data);
5015         }
5016
5017         amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
5018 }
5019
5020 static int gfx_v9_0_update_gfx_clock_gating(struct amdgpu_device *adev,
5021                                             bool enable)
5022 {
5023         if (enable) {
5024                 /* CGCG/CGLS should be enabled after MGCG/MGLS
5025                  * ===  MGCG + MGLS ===
5026                  */
5027                 gfx_v9_0_update_medium_grain_clock_gating(adev, enable);
5028                 /* ===  CGCG /CGLS for GFX 3D Only === */
5029                 gfx_v9_0_update_3d_clock_gating(adev, enable);
5030                 /* ===  CGCG + CGLS === */
5031                 gfx_v9_0_update_coarse_grain_clock_gating(adev, enable);
5032         } else {
5033                 /* CGCG/CGLS should be disabled before MGCG/MGLS
5034                  * ===  CGCG + CGLS ===
5035                  */
5036                 gfx_v9_0_update_coarse_grain_clock_gating(adev, enable);
5037                 /* ===  CGCG /CGLS for GFX 3D Only === */
5038                 gfx_v9_0_update_3d_clock_gating(adev, enable);
5039                 /* ===  MGCG + MGLS === */
5040                 gfx_v9_0_update_medium_grain_clock_gating(adev, enable);
5041         }
5042         return 0;
5043 }
5044
5045 static void gfx_v9_0_update_spm_vmid_internal(struct amdgpu_device *adev,
5046                                               unsigned int vmid)
5047 {
5048         u32 reg, data;
5049
5050         reg = SOC15_REG_OFFSET(GC, 0, mmRLC_SPM_MC_CNTL);
5051         if (amdgpu_sriov_is_pp_one_vf(adev))
5052                 data = RREG32_NO_KIQ(reg);
5053         else
5054                 data = RREG32_SOC15(GC, 0, mmRLC_SPM_MC_CNTL);
5055
5056         data &= ~RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK;
5057         data |= (vmid & RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK) << RLC_SPM_MC_CNTL__RLC_SPM_VMID__SHIFT;
5058
5059         if (amdgpu_sriov_is_pp_one_vf(adev))
5060                 WREG32_SOC15_NO_KIQ(GC, 0, mmRLC_SPM_MC_CNTL, data);
5061         else
5062                 WREG32_SOC15(GC, 0, mmRLC_SPM_MC_CNTL, data);
5063 }
5064
5065 static void gfx_v9_0_update_spm_vmid(struct amdgpu_device *adev, struct amdgpu_ring *ring, unsigned int vmid)
5066 {
5067         amdgpu_gfx_off_ctrl(adev, false);
5068
5069         gfx_v9_0_update_spm_vmid_internal(adev, vmid);
5070
5071         amdgpu_gfx_off_ctrl(adev, true);
5072 }
5073
5074 static bool gfx_v9_0_check_rlcg_range(struct amdgpu_device *adev,
5075                                         uint32_t offset,
5076                                         struct soc15_reg_rlcg *entries, int arr_size)
5077 {
5078         int i;
5079         uint32_t reg;
5080
5081         if (!entries)
5082                 return false;
5083
5084         for (i = 0; i < arr_size; i++) {
5085                 const struct soc15_reg_rlcg *entry;
5086
5087                 entry = &entries[i];
5088                 reg = adev->reg_offset[entry->hwip][entry->instance][entry->segment] + entry->reg;
5089                 if (offset == reg)
5090                         return true;
5091         }
5092
5093         return false;
5094 }
5095
5096 static bool gfx_v9_0_is_rlcg_access_range(struct amdgpu_device *adev, u32 offset)
5097 {
5098         return gfx_v9_0_check_rlcg_range(adev, offset,
5099                                         (void *)rlcg_access_gc_9_0,
5100                                         ARRAY_SIZE(rlcg_access_gc_9_0));
5101 }
5102
5103 static const struct amdgpu_rlc_funcs gfx_v9_0_rlc_funcs = {
5104         .is_rlc_enabled = gfx_v9_0_is_rlc_enabled,
5105         .set_safe_mode = gfx_v9_0_set_safe_mode,
5106         .unset_safe_mode = gfx_v9_0_unset_safe_mode,
5107         .init = gfx_v9_0_rlc_init,
5108         .get_csb_size = gfx_v9_0_get_csb_size,
5109         .get_csb_buffer = gfx_v9_0_get_csb_buffer,
5110         .get_cp_table_num = gfx_v9_0_cp_jump_table_num,
5111         .resume = gfx_v9_0_rlc_resume,
5112         .stop = gfx_v9_0_rlc_stop,
5113         .reset = gfx_v9_0_rlc_reset,
5114         .start = gfx_v9_0_rlc_start,
5115         .update_spm_vmid = gfx_v9_0_update_spm_vmid,
5116         .is_rlcg_access_range = gfx_v9_0_is_rlcg_access_range,
5117 };
5118
5119 static int gfx_v9_0_set_powergating_state(void *handle,
5120                                           enum amd_powergating_state state)
5121 {
5122         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5123         bool enable = (state == AMD_PG_STATE_GATE);
5124
5125         switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
5126         case IP_VERSION(9, 2, 2):
5127         case IP_VERSION(9, 1, 0):
5128         case IP_VERSION(9, 3, 0):
5129                 if (!enable)
5130                         amdgpu_gfx_off_ctrl(adev, false);
5131
5132                 if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
5133                         gfx_v9_0_enable_sck_slow_down_on_power_up(adev, true);
5134                         gfx_v9_0_enable_sck_slow_down_on_power_down(adev, true);
5135                 } else {
5136                         gfx_v9_0_enable_sck_slow_down_on_power_up(adev, false);
5137                         gfx_v9_0_enable_sck_slow_down_on_power_down(adev, false);
5138                 }
5139
5140                 if (adev->pg_flags & AMD_PG_SUPPORT_CP)
5141                         gfx_v9_0_enable_cp_power_gating(adev, true);
5142                 else
5143                         gfx_v9_0_enable_cp_power_gating(adev, false);
5144
5145                 /* update gfx cgpg state */
5146                 gfx_v9_0_update_gfx_cg_power_gating(adev, enable);
5147
5148                 /* update mgcg state */
5149                 gfx_v9_0_update_gfx_mg_power_gating(adev, enable);
5150
5151                 if (enable)
5152                         amdgpu_gfx_off_ctrl(adev, true);
5153                 break;
5154         case IP_VERSION(9, 2, 1):
5155                 amdgpu_gfx_off_ctrl(adev, enable);
5156                 break;
5157         default:
5158                 break;
5159         }
5160
5161         return 0;
5162 }
5163
5164 static int gfx_v9_0_set_clockgating_state(void *handle,
5165                                           enum amd_clockgating_state state)
5166 {
5167         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5168
5169         if (amdgpu_sriov_vf(adev))
5170                 return 0;
5171
5172         switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
5173         case IP_VERSION(9, 0, 1):
5174         case IP_VERSION(9, 2, 1):
5175         case IP_VERSION(9, 4, 0):
5176         case IP_VERSION(9, 2, 2):
5177         case IP_VERSION(9, 1, 0):
5178         case IP_VERSION(9, 4, 1):
5179         case IP_VERSION(9, 3, 0):
5180         case IP_VERSION(9, 4, 2):
5181                 gfx_v9_0_update_gfx_clock_gating(adev,
5182                                                  state == AMD_CG_STATE_GATE);
5183                 break;
5184         default:
5185                 break;
5186         }
5187         return 0;
5188 }
5189
5190 static void gfx_v9_0_get_clockgating_state(void *handle, u64 *flags)
5191 {
5192         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5193         int data;
5194
5195         if (amdgpu_sriov_vf(adev))
5196                 *flags = 0;
5197
5198         /* AMD_CG_SUPPORT_GFX_MGCG */
5199         data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE));
5200         if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK))
5201                 *flags |= AMD_CG_SUPPORT_GFX_MGCG;
5202
5203         /* AMD_CG_SUPPORT_GFX_CGCG */
5204         data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL));
5205         if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
5206                 *flags |= AMD_CG_SUPPORT_GFX_CGCG;
5207
5208         /* AMD_CG_SUPPORT_GFX_CGLS */
5209         if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
5210                 *flags |= AMD_CG_SUPPORT_GFX_CGLS;
5211
5212         /* AMD_CG_SUPPORT_GFX_RLC_LS */
5213         data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_MEM_SLP_CNTL));
5214         if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK)
5215                 *flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS;
5216
5217         /* AMD_CG_SUPPORT_GFX_CP_LS */
5218         data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmCP_MEM_SLP_CNTL));
5219         if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK)
5220                 *flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS;
5221
5222         if (amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 4, 1)) {
5223                 /* AMD_CG_SUPPORT_GFX_3D_CGCG */
5224                 data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D));
5225                 if (data & RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK)
5226                         *flags |= AMD_CG_SUPPORT_GFX_3D_CGCG;
5227
5228                 /* AMD_CG_SUPPORT_GFX_3D_CGLS */
5229                 if (data & RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK)
5230                         *flags |= AMD_CG_SUPPORT_GFX_3D_CGLS;
5231         }
5232 }
5233
5234 static u64 gfx_v9_0_ring_get_rptr_gfx(struct amdgpu_ring *ring)
5235 {
5236         return *ring->rptr_cpu_addr; /* gfx9 is 32bit rptr*/
5237 }
5238
5239 static u64 gfx_v9_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
5240 {
5241         struct amdgpu_device *adev = ring->adev;
5242         u64 wptr;
5243
5244         /* XXX check if swapping is necessary on BE */
5245         if (ring->use_doorbell) {
5246                 wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr);
5247         } else {
5248                 wptr = RREG32_SOC15(GC, 0, mmCP_RB0_WPTR);
5249                 wptr += (u64)RREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI) << 32;
5250         }
5251
5252         return wptr;
5253 }
5254
5255 static void gfx_v9_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
5256 {
5257         struct amdgpu_device *adev = ring->adev;
5258
5259         if (ring->use_doorbell) {
5260                 /* XXX check if swapping is necessary on BE */
5261                 atomic64_set((atomic64_t *)ring->wptr_cpu_addr, ring->wptr);
5262                 WDOORBELL64(ring->doorbell_index, ring->wptr);
5263         } else {
5264                 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
5265                 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
5266         }
5267 }
5268
5269 static void gfx_v9_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
5270 {
5271         struct amdgpu_device *adev = ring->adev;
5272         u32 ref_and_mask, reg_mem_engine;
5273         const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio.hdp_flush_reg;
5274
5275         if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
5276                 switch (ring->me) {
5277                 case 1:
5278                         ref_and_mask = nbio_hf_reg->ref_and_mask_cp2 << ring->pipe;
5279                         break;
5280                 case 2:
5281                         ref_and_mask = nbio_hf_reg->ref_and_mask_cp6 << ring->pipe;
5282                         break;
5283                 default:
5284                         return;
5285                 }
5286                 reg_mem_engine = 0;
5287         } else {
5288                 ref_and_mask = nbio_hf_reg->ref_and_mask_cp0;
5289                 reg_mem_engine = 1; /* pfp */
5290         }
5291
5292         gfx_v9_0_wait_reg_mem(ring, reg_mem_engine, 0, 1,
5293                               adev->nbio.funcs->get_hdp_flush_req_offset(adev),
5294                               adev->nbio.funcs->get_hdp_flush_done_offset(adev),
5295                               ref_and_mask, ref_and_mask, 0x20);
5296 }
5297
5298 static void gfx_v9_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
5299                                         struct amdgpu_job *job,
5300                                         struct amdgpu_ib *ib,
5301                                         uint32_t flags)
5302 {
5303         unsigned vmid = AMDGPU_JOB_GET_VMID(job);
5304         u32 header, control = 0;
5305
5306         if (ib->flags & AMDGPU_IB_FLAG_CE)
5307                 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
5308         else
5309                 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
5310
5311         control |= ib->length_dw | (vmid << 24);
5312
5313         if (ib->flags & AMDGPU_IB_FLAG_PREEMPT) {
5314                 control |= INDIRECT_BUFFER_PRE_ENB(1);
5315
5316                 if (flags & AMDGPU_IB_PREEMPTED)
5317                         control |= INDIRECT_BUFFER_PRE_RESUME(1);
5318
5319                 if (!(ib->flags & AMDGPU_IB_FLAG_CE) && vmid)
5320                         gfx_v9_0_ring_emit_de_meta(ring,
5321                                                    (!amdgpu_sriov_vf(ring->adev) &&
5322                                                    flags & AMDGPU_IB_PREEMPTED) ?
5323                                                    true : false,
5324                                                    job->gds_size > 0 && job->gds_base != 0);
5325         }
5326
5327         amdgpu_ring_write(ring, header);
5328         BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
5329         amdgpu_ring_write(ring,
5330 #ifdef __BIG_ENDIAN
5331                 (2 << 0) |
5332 #endif
5333                 lower_32_bits(ib->gpu_addr));
5334         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
5335         amdgpu_ring_ib_on_emit_cntl(ring);
5336         amdgpu_ring_write(ring, control);
5337 }
5338
5339 static void gfx_v9_0_ring_patch_cntl(struct amdgpu_ring *ring,
5340                                      unsigned offset)
5341 {
5342         u32 control = ring->ring[offset];
5343
5344         control |= INDIRECT_BUFFER_PRE_RESUME(1);
5345         ring->ring[offset] = control;
5346 }
5347
5348 static void gfx_v9_0_ring_patch_ce_meta(struct amdgpu_ring *ring,
5349                                         unsigned offset)
5350 {
5351         struct amdgpu_device *adev = ring->adev;
5352         void *ce_payload_cpu_addr;
5353         uint64_t payload_offset, payload_size;
5354
5355         payload_size = sizeof(struct v9_ce_ib_state);
5356
5357         if (ring->is_mes_queue) {
5358                 payload_offset = offsetof(struct amdgpu_mes_ctx_meta_data,
5359                                           gfx[0].gfx_meta_data) +
5360                         offsetof(struct v9_gfx_meta_data, ce_payload);
5361                 ce_payload_cpu_addr =
5362                         amdgpu_mes_ctx_get_offs_cpu_addr(ring, payload_offset);
5363         } else {
5364                 payload_offset = offsetof(struct v9_gfx_meta_data, ce_payload);
5365                 ce_payload_cpu_addr = adev->virt.csa_cpu_addr + payload_offset;
5366         }
5367
5368         if (offset + (payload_size >> 2) <= ring->buf_mask + 1) {
5369                 memcpy((void *)&ring->ring[offset], ce_payload_cpu_addr, payload_size);
5370         } else {
5371                 memcpy((void *)&ring->ring[offset], ce_payload_cpu_addr,
5372                        (ring->buf_mask + 1 - offset) << 2);
5373                 payload_size -= (ring->buf_mask + 1 - offset) << 2;
5374                 memcpy((void *)&ring->ring[0],
5375                        ce_payload_cpu_addr + ((ring->buf_mask + 1 - offset) << 2),
5376                        payload_size);
5377         }
5378 }
5379
5380 static void gfx_v9_0_ring_patch_de_meta(struct amdgpu_ring *ring,
5381                                         unsigned offset)
5382 {
5383         struct amdgpu_device *adev = ring->adev;
5384         void *de_payload_cpu_addr;
5385         uint64_t payload_offset, payload_size;
5386
5387         payload_size = sizeof(struct v9_de_ib_state);
5388
5389         if (ring->is_mes_queue) {
5390                 payload_offset = offsetof(struct amdgpu_mes_ctx_meta_data,
5391                                           gfx[0].gfx_meta_data) +
5392                         offsetof(struct v9_gfx_meta_data, de_payload);
5393                 de_payload_cpu_addr =
5394                         amdgpu_mes_ctx_get_offs_cpu_addr(ring, payload_offset);
5395         } else {
5396                 payload_offset = offsetof(struct v9_gfx_meta_data, de_payload);
5397                 de_payload_cpu_addr = adev->virt.csa_cpu_addr + payload_offset;
5398         }
5399
5400         ((struct v9_de_ib_state *)de_payload_cpu_addr)->ib_completion_status =
5401                 IB_COMPLETION_STATUS_PREEMPTED;
5402
5403         if (offset + (payload_size >> 2) <= ring->buf_mask + 1) {
5404                 memcpy((void *)&ring->ring[offset], de_payload_cpu_addr, payload_size);
5405         } else {
5406                 memcpy((void *)&ring->ring[offset], de_payload_cpu_addr,
5407                        (ring->buf_mask + 1 - offset) << 2);
5408                 payload_size -= (ring->buf_mask + 1 - offset) << 2;
5409                 memcpy((void *)&ring->ring[0],
5410                        de_payload_cpu_addr + ((ring->buf_mask + 1 - offset) << 2),
5411                        payload_size);
5412         }
5413 }
5414
5415 static void gfx_v9_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
5416                                           struct amdgpu_job *job,
5417                                           struct amdgpu_ib *ib,
5418                                           uint32_t flags)
5419 {
5420         unsigned vmid = AMDGPU_JOB_GET_VMID(job);
5421         u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
5422
5423         /* Currently, there is a high possibility to get wave ID mismatch
5424          * between ME and GDS, leading to a hw deadlock, because ME generates
5425          * different wave IDs than the GDS expects. This situation happens
5426          * randomly when at least 5 compute pipes use GDS ordered append.
5427          * The wave IDs generated by ME are also wrong after suspend/resume.
5428          * Those are probably bugs somewhere else in the kernel driver.
5429          *
5430          * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and
5431          * GDS to 0 for this ring (me/pipe).
5432          */
5433         if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) {
5434                 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
5435                 amdgpu_ring_write(ring, mmGDS_COMPUTE_MAX_WAVE_ID);
5436                 amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id);
5437         }
5438
5439         amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
5440         BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
5441         amdgpu_ring_write(ring,
5442 #ifdef __BIG_ENDIAN
5443                                 (2 << 0) |
5444 #endif
5445                                 lower_32_bits(ib->gpu_addr));
5446         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
5447         amdgpu_ring_write(ring, control);
5448 }
5449
5450 static void gfx_v9_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
5451                                      u64 seq, unsigned flags)
5452 {
5453         bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
5454         bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
5455         bool writeback = flags & AMDGPU_FENCE_FLAG_TC_WB_ONLY;
5456         bool exec = flags & AMDGPU_FENCE_FLAG_EXEC;
5457         uint32_t dw2 = 0;
5458
5459         /* RELEASE_MEM - flush caches, send int */
5460         amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6));
5461
5462         if (writeback) {
5463                 dw2 = EOP_TC_NC_ACTION_EN;
5464         } else {
5465                 dw2 = EOP_TCL1_ACTION_EN | EOP_TC_ACTION_EN |
5466                                 EOP_TC_MD_ACTION_EN;
5467         }
5468         dw2 |= EOP_TC_WB_ACTION_EN | EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
5469                                 EVENT_INDEX(5);
5470         if (exec)
5471                 dw2 |= EOP_EXEC;
5472
5473         amdgpu_ring_write(ring, dw2);
5474         amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
5475
5476         /*
5477          * the address should be Qword aligned if 64bit write, Dword
5478          * aligned if only send 32bit data low (discard data high)
5479          */
5480         if (write64bit)
5481                 BUG_ON(addr & 0x7);
5482         else
5483                 BUG_ON(addr & 0x3);
5484         amdgpu_ring_write(ring, lower_32_bits(addr));
5485         amdgpu_ring_write(ring, upper_32_bits(addr));
5486         amdgpu_ring_write(ring, lower_32_bits(seq));
5487         amdgpu_ring_write(ring, upper_32_bits(seq));
5488         amdgpu_ring_write(ring, 0);
5489 }
5490
5491 static void gfx_v9_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
5492 {
5493         int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
5494         uint32_t seq = ring->fence_drv.sync_seq;
5495         uint64_t addr = ring->fence_drv.gpu_addr;
5496
5497         gfx_v9_0_wait_reg_mem(ring, usepfp, 1, 0,
5498                               lower_32_bits(addr), upper_32_bits(addr),
5499                               seq, 0xffffffff, 4);
5500 }
5501
5502 static void gfx_v9_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
5503                                         unsigned vmid, uint64_t pd_addr)
5504 {
5505         amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
5506
5507         /* compute doesn't have PFP */
5508         if (ring->funcs->type == AMDGPU_RING_TYPE_GFX) {
5509                 /* sync PFP to ME, otherwise we might get invalid PFP reads */
5510                 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
5511                 amdgpu_ring_write(ring, 0x0);
5512         }
5513 }
5514
5515 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
5516 {
5517         return *ring->rptr_cpu_addr; /* gfx9 hardware is 32bit rptr */
5518 }
5519
5520 static u64 gfx_v9_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
5521 {
5522         u64 wptr;
5523
5524         /* XXX check if swapping is necessary on BE */
5525         if (ring->use_doorbell)
5526                 wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr);
5527         else
5528                 BUG();
5529         return wptr;
5530 }
5531
5532 static void gfx_v9_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
5533 {
5534         struct amdgpu_device *adev = ring->adev;
5535
5536         /* XXX check if swapping is necessary on BE */
5537         if (ring->use_doorbell) {
5538                 atomic64_set((atomic64_t *)ring->wptr_cpu_addr, ring->wptr);
5539                 WDOORBELL64(ring->doorbell_index, ring->wptr);
5540         } else{
5541                 BUG(); /* only DOORBELL method supported on gfx9 now */
5542         }
5543 }
5544
5545 static void gfx_v9_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
5546                                          u64 seq, unsigned int flags)
5547 {
5548         struct amdgpu_device *adev = ring->adev;
5549
5550         /* we only allocate 32bit for each seq wb address */
5551         BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
5552
5553         /* write fence seq to the "addr" */
5554         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5555         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5556                                  WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
5557         amdgpu_ring_write(ring, lower_32_bits(addr));
5558         amdgpu_ring_write(ring, upper_32_bits(addr));
5559         amdgpu_ring_write(ring, lower_32_bits(seq));
5560
5561         if (flags & AMDGPU_FENCE_FLAG_INT) {
5562                 /* set register to trigger INT */
5563                 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5564                 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5565                                          WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
5566                 amdgpu_ring_write(ring, SOC15_REG_OFFSET(GC, 0, mmCPC_INT_STATUS));
5567                 amdgpu_ring_write(ring, 0);
5568                 amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
5569         }
5570 }
5571
5572 static void gfx_v9_ring_emit_sb(struct amdgpu_ring *ring)
5573 {
5574         amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
5575         amdgpu_ring_write(ring, 0);
5576 }
5577
5578 static void gfx_v9_0_ring_emit_ce_meta(struct amdgpu_ring *ring, bool resume)
5579 {
5580         struct amdgpu_device *adev = ring->adev;
5581         struct v9_ce_ib_state ce_payload = {0};
5582         uint64_t offset, ce_payload_gpu_addr;
5583         void *ce_payload_cpu_addr;
5584         int cnt;
5585
5586         cnt = (sizeof(ce_payload) >> 2) + 4 - 2;
5587
5588         if (ring->is_mes_queue) {
5589                 offset = offsetof(struct amdgpu_mes_ctx_meta_data,
5590                                   gfx[0].gfx_meta_data) +
5591                         offsetof(struct v9_gfx_meta_data, ce_payload);
5592                 ce_payload_gpu_addr =
5593                         amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
5594                 ce_payload_cpu_addr =
5595                         amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset);
5596         } else {
5597                 offset = offsetof(struct v9_gfx_meta_data, ce_payload);
5598                 ce_payload_gpu_addr = amdgpu_csa_vaddr(ring->adev) + offset;
5599                 ce_payload_cpu_addr = adev->virt.csa_cpu_addr + offset;
5600         }
5601
5602         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
5603         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
5604                                  WRITE_DATA_DST_SEL(8) |
5605                                  WR_CONFIRM) |
5606                                  WRITE_DATA_CACHE_POLICY(0));
5607         amdgpu_ring_write(ring, lower_32_bits(ce_payload_gpu_addr));
5608         amdgpu_ring_write(ring, upper_32_bits(ce_payload_gpu_addr));
5609
5610         amdgpu_ring_ib_on_emit_ce(ring);
5611
5612         if (resume)
5613                 amdgpu_ring_write_multiple(ring, ce_payload_cpu_addr,
5614                                            sizeof(ce_payload) >> 2);
5615         else
5616                 amdgpu_ring_write_multiple(ring, (void *)&ce_payload,
5617                                            sizeof(ce_payload) >> 2);
5618 }
5619
5620 static int gfx_v9_0_ring_preempt_ib(struct amdgpu_ring *ring)
5621 {
5622         int i, r = 0;
5623         struct amdgpu_device *adev = ring->adev;
5624         struct amdgpu_kiq *kiq = &adev->gfx.kiq[0];
5625         struct amdgpu_ring *kiq_ring = &kiq->ring;
5626         unsigned long flags;
5627
5628         if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues)
5629                 return -EINVAL;
5630
5631         spin_lock_irqsave(&kiq->ring_lock, flags);
5632
5633         if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size)) {
5634                 spin_unlock_irqrestore(&kiq->ring_lock, flags);
5635                 return -ENOMEM;
5636         }
5637
5638         /* assert preemption condition */
5639         amdgpu_ring_set_preempt_cond_exec(ring, false);
5640
5641         ring->trail_seq += 1;
5642         amdgpu_ring_alloc(ring, 13);
5643         gfx_v9_0_ring_emit_fence(ring, ring->trail_fence_gpu_addr,
5644                                  ring->trail_seq, AMDGPU_FENCE_FLAG_EXEC | AMDGPU_FENCE_FLAG_INT);
5645
5646         /* assert IB preemption, emit the trailing fence */
5647         kiq->pmf->kiq_unmap_queues(kiq_ring, ring, PREEMPT_QUEUES_NO_UNMAP,
5648                                    ring->trail_fence_gpu_addr,
5649                                    ring->trail_seq);
5650
5651         amdgpu_ring_commit(kiq_ring);
5652         spin_unlock_irqrestore(&kiq->ring_lock, flags);
5653
5654         /* poll the trailing fence */
5655         for (i = 0; i < adev->usec_timeout; i++) {
5656                 if (ring->trail_seq ==
5657                         le32_to_cpu(*ring->trail_fence_cpu_addr))
5658                         break;
5659                 udelay(1);
5660         }
5661
5662         if (i >= adev->usec_timeout) {
5663                 r = -EINVAL;
5664                 DRM_WARN("ring %d timeout to preempt ib\n", ring->idx);
5665         }
5666
5667         /*reset the CP_VMID_PREEMPT after trailing fence*/
5668         amdgpu_ring_emit_wreg(ring,
5669                               SOC15_REG_OFFSET(GC, 0, mmCP_VMID_PREEMPT),
5670                               0x0);
5671         amdgpu_ring_commit(ring);
5672
5673         /* deassert preemption condition */
5674         amdgpu_ring_set_preempt_cond_exec(ring, true);
5675         return r;
5676 }
5677
5678 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume, bool usegds)
5679 {
5680         struct amdgpu_device *adev = ring->adev;
5681         struct v9_de_ib_state de_payload = {0};
5682         uint64_t offset, gds_addr, de_payload_gpu_addr;
5683         void *de_payload_cpu_addr;
5684         int cnt;
5685
5686         if (ring->is_mes_queue) {
5687                 offset = offsetof(struct amdgpu_mes_ctx_meta_data,
5688                                   gfx[0].gfx_meta_data) +
5689                         offsetof(struct v9_gfx_meta_data, de_payload);
5690                 de_payload_gpu_addr =
5691                         amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
5692                 de_payload_cpu_addr =
5693                         amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset);
5694
5695                 offset = offsetof(struct amdgpu_mes_ctx_meta_data,
5696                                   gfx[0].gds_backup) +
5697                         offsetof(struct v9_gfx_meta_data, de_payload);
5698                 gds_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
5699         } else {
5700                 offset = offsetof(struct v9_gfx_meta_data, de_payload);
5701                 de_payload_gpu_addr = amdgpu_csa_vaddr(ring->adev) + offset;
5702                 de_payload_cpu_addr = adev->virt.csa_cpu_addr + offset;
5703
5704                 gds_addr = ALIGN(amdgpu_csa_vaddr(ring->adev) +
5705                                  AMDGPU_CSA_SIZE - adev->gds.gds_size,
5706                                  PAGE_SIZE);
5707         }
5708
5709         if (usegds) {
5710                 de_payload.gds_backup_addrlo = lower_32_bits(gds_addr);
5711                 de_payload.gds_backup_addrhi = upper_32_bits(gds_addr);
5712         }
5713
5714         cnt = (sizeof(de_payload) >> 2) + 4 - 2;
5715         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
5716         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
5717                                  WRITE_DATA_DST_SEL(8) |
5718                                  WR_CONFIRM) |
5719                                  WRITE_DATA_CACHE_POLICY(0));
5720         amdgpu_ring_write(ring, lower_32_bits(de_payload_gpu_addr));
5721         amdgpu_ring_write(ring, upper_32_bits(de_payload_gpu_addr));
5722
5723         amdgpu_ring_ib_on_emit_de(ring);
5724         if (resume)
5725                 amdgpu_ring_write_multiple(ring, de_payload_cpu_addr,
5726                                            sizeof(de_payload) >> 2);
5727         else
5728                 amdgpu_ring_write_multiple(ring, (void *)&de_payload,
5729                                            sizeof(de_payload) >> 2);
5730 }
5731
5732 static void gfx_v9_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start,
5733                                    bool secure)
5734 {
5735         uint32_t v = secure ? FRAME_TMZ : 0;
5736
5737         amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0));
5738         amdgpu_ring_write(ring, v | FRAME_CMD(start ? 0 : 1));
5739 }
5740
5741 static void gfx_v9_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
5742 {
5743         uint32_t dw2 = 0;
5744
5745         gfx_v9_0_ring_emit_ce_meta(ring,
5746                                    (!amdgpu_sriov_vf(ring->adev) &&
5747                                    flags & AMDGPU_IB_PREEMPTED) ? true : false);
5748
5749         dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
5750         if (flags & AMDGPU_HAVE_CTX_SWITCH) {
5751                 /* set load_global_config & load_global_uconfig */
5752                 dw2 |= 0x8001;
5753                 /* set load_cs_sh_regs */
5754                 dw2 |= 0x01000000;
5755                 /* set load_per_context_state & load_gfx_sh_regs for GFX */
5756                 dw2 |= 0x10002;
5757
5758                 /* set load_ce_ram if preamble presented */
5759                 if (AMDGPU_PREAMBLE_IB_PRESENT & flags)
5760                         dw2 |= 0x10000000;
5761         } else {
5762                 /* still load_ce_ram if this is the first time preamble presented
5763                  * although there is no context switch happens.
5764                  */
5765                 if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags)
5766                         dw2 |= 0x10000000;
5767         }
5768
5769         amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
5770         amdgpu_ring_write(ring, dw2);
5771         amdgpu_ring_write(ring, 0);
5772 }
5773
5774 static unsigned gfx_v9_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring,
5775                                                   uint64_t addr)
5776 {
5777         unsigned ret;
5778         amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
5779         amdgpu_ring_write(ring, lower_32_bits(addr));
5780         amdgpu_ring_write(ring, upper_32_bits(addr));
5781         /* discard following DWs if *cond_exec_gpu_addr==0 */
5782         amdgpu_ring_write(ring, 0);
5783         ret = ring->wptr & ring->buf_mask;
5784         /* patch dummy value later */
5785         amdgpu_ring_write(ring, 0);
5786         return ret;
5787 }
5788
5789 static void gfx_v9_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg,
5790                                     uint32_t reg_val_offs)
5791 {
5792         struct amdgpu_device *adev = ring->adev;
5793
5794         amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
5795         amdgpu_ring_write(ring, 0 |     /* src: register*/
5796                                 (5 << 8) |      /* dst: memory */
5797                                 (1 << 20));     /* write confirm */
5798         amdgpu_ring_write(ring, reg);
5799         amdgpu_ring_write(ring, 0);
5800         amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
5801                                 reg_val_offs * 4));
5802         amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
5803                                 reg_val_offs * 4));
5804 }
5805
5806 static void gfx_v9_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
5807                                     uint32_t val)
5808 {
5809         uint32_t cmd = 0;
5810
5811         switch (ring->funcs->type) {
5812         case AMDGPU_RING_TYPE_GFX:
5813                 cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM;
5814                 break;
5815         case AMDGPU_RING_TYPE_KIQ:
5816                 cmd = (1 << 16); /* no inc addr */
5817                 break;
5818         default:
5819                 cmd = WR_CONFIRM;
5820                 break;
5821         }
5822         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5823         amdgpu_ring_write(ring, cmd);
5824         amdgpu_ring_write(ring, reg);
5825         amdgpu_ring_write(ring, 0);
5826         amdgpu_ring_write(ring, val);
5827 }
5828
5829 static void gfx_v9_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
5830                                         uint32_t val, uint32_t mask)
5831 {
5832         gfx_v9_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20);
5833 }
5834
5835 static void gfx_v9_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
5836                                                   uint32_t reg0, uint32_t reg1,
5837                                                   uint32_t ref, uint32_t mask)
5838 {
5839         int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
5840         struct amdgpu_device *adev = ring->adev;
5841         bool fw_version_ok = (ring->funcs->type == AMDGPU_RING_TYPE_GFX) ?
5842                 adev->gfx.me_fw_write_wait : adev->gfx.mec_fw_write_wait;
5843
5844         if (fw_version_ok)
5845                 gfx_v9_0_wait_reg_mem(ring, usepfp, 0, 1, reg0, reg1,
5846                                       ref, mask, 0x20);
5847         else
5848                 amdgpu_ring_emit_reg_write_reg_wait_helper(ring, reg0, reg1,
5849                                                            ref, mask);
5850 }
5851
5852 static void gfx_v9_0_ring_soft_recovery(struct amdgpu_ring *ring, unsigned vmid)
5853 {
5854         struct amdgpu_device *adev = ring->adev;
5855         uint32_t value = 0;
5856
5857         value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03);
5858         value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01);
5859         value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1);
5860         value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid);
5861         WREG32_SOC15(GC, 0, mmSQ_CMD, value);
5862 }
5863
5864 static void gfx_v9_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
5865                                                  enum amdgpu_interrupt_state state)
5866 {
5867         switch (state) {
5868         case AMDGPU_IRQ_STATE_DISABLE:
5869         case AMDGPU_IRQ_STATE_ENABLE:
5870                 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5871                                TIME_STAMP_INT_ENABLE,
5872                                state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
5873                 break;
5874         default:
5875                 break;
5876         }
5877 }
5878
5879 static void gfx_v9_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
5880                                                      int me, int pipe,
5881                                                      enum amdgpu_interrupt_state state)
5882 {
5883         u32 mec_int_cntl, mec_int_cntl_reg;
5884
5885         /*
5886          * amdgpu controls only the first MEC. That's why this function only
5887          * handles the setting of interrupts for this specific MEC. All other
5888          * pipes' interrupts are set by amdkfd.
5889          */
5890
5891         if (me == 1) {
5892                 switch (pipe) {
5893                 case 0:
5894                         mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE0_INT_CNTL);
5895                         break;
5896                 case 1:
5897                         mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE1_INT_CNTL);
5898                         break;
5899                 case 2:
5900                         mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE2_INT_CNTL);
5901                         break;
5902                 case 3:
5903                         mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE3_INT_CNTL);
5904                         break;
5905                 default:
5906                         DRM_DEBUG("invalid pipe %d\n", pipe);
5907                         return;
5908                 }
5909         } else {
5910                 DRM_DEBUG("invalid me %d\n", me);
5911                 return;
5912         }
5913
5914         switch (state) {
5915         case AMDGPU_IRQ_STATE_DISABLE:
5916                 mec_int_cntl = RREG32_SOC15_IP(GC,mec_int_cntl_reg);
5917                 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
5918                                              TIME_STAMP_INT_ENABLE, 0);
5919                 WREG32_SOC15_IP(GC, mec_int_cntl_reg, mec_int_cntl);
5920                 break;
5921         case AMDGPU_IRQ_STATE_ENABLE:
5922                 mec_int_cntl = RREG32_SOC15_IP(GC, mec_int_cntl_reg);
5923                 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
5924                                              TIME_STAMP_INT_ENABLE, 1);
5925                 WREG32_SOC15_IP(GC, mec_int_cntl_reg, mec_int_cntl);
5926                 break;
5927         default:
5928                 break;
5929         }
5930 }
5931
5932 static int gfx_v9_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
5933                                              struct amdgpu_irq_src *source,
5934                                              unsigned type,
5935                                              enum amdgpu_interrupt_state state)
5936 {
5937         switch (state) {
5938         case AMDGPU_IRQ_STATE_DISABLE:
5939         case AMDGPU_IRQ_STATE_ENABLE:
5940                 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5941                                PRIV_REG_INT_ENABLE,
5942                                state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
5943                 break;
5944         default:
5945                 break;
5946         }
5947
5948         return 0;
5949 }
5950
5951 static int gfx_v9_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
5952                                               struct amdgpu_irq_src *source,
5953                                               unsigned type,
5954                                               enum amdgpu_interrupt_state state)
5955 {
5956         switch (state) {
5957         case AMDGPU_IRQ_STATE_DISABLE:
5958         case AMDGPU_IRQ_STATE_ENABLE:
5959                 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5960                                PRIV_INSTR_INT_ENABLE,
5961                                state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
5962                 break;
5963         default:
5964                 break;
5965         }
5966
5967         return 0;
5968 }
5969
5970 #define ENABLE_ECC_ON_ME_PIPE(me, pipe)                         \
5971         WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\
5972                         CP_ECC_ERROR_INT_ENABLE, 1)
5973
5974 #define DISABLE_ECC_ON_ME_PIPE(me, pipe)                        \
5975         WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\
5976                         CP_ECC_ERROR_INT_ENABLE, 0)
5977
5978 static int gfx_v9_0_set_cp_ecc_error_state(struct amdgpu_device *adev,
5979                                               struct amdgpu_irq_src *source,
5980                                               unsigned type,
5981                                               enum amdgpu_interrupt_state state)
5982 {
5983         switch (state) {
5984         case AMDGPU_IRQ_STATE_DISABLE:
5985                 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5986                                 CP_ECC_ERROR_INT_ENABLE, 0);
5987                 DISABLE_ECC_ON_ME_PIPE(1, 0);
5988                 DISABLE_ECC_ON_ME_PIPE(1, 1);
5989                 DISABLE_ECC_ON_ME_PIPE(1, 2);
5990                 DISABLE_ECC_ON_ME_PIPE(1, 3);
5991                 break;
5992
5993         case AMDGPU_IRQ_STATE_ENABLE:
5994                 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5995                                 CP_ECC_ERROR_INT_ENABLE, 1);
5996                 ENABLE_ECC_ON_ME_PIPE(1, 0);
5997                 ENABLE_ECC_ON_ME_PIPE(1, 1);
5998                 ENABLE_ECC_ON_ME_PIPE(1, 2);
5999                 ENABLE_ECC_ON_ME_PIPE(1, 3);
6000                 break;
6001         default:
6002                 break;
6003         }
6004
6005         return 0;
6006 }
6007
6008
6009 static int gfx_v9_0_set_eop_interrupt_state(struct amdgpu_device *adev,
6010                                             struct amdgpu_irq_src *src,
6011                                             unsigned type,
6012                                             enum amdgpu_interrupt_state state)
6013 {
6014         switch (type) {
6015         case AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP:
6016                 gfx_v9_0_set_gfx_eop_interrupt_state(adev, state);
6017                 break;
6018         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
6019                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
6020                 break;
6021         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
6022                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
6023                 break;
6024         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
6025                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
6026                 break;
6027         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
6028                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
6029                 break;
6030         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
6031                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
6032                 break;
6033         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
6034                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
6035                 break;
6036         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
6037                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
6038                 break;
6039         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
6040                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
6041                 break;
6042         default:
6043                 break;
6044         }
6045         return 0;
6046 }
6047
6048 static int gfx_v9_0_eop_irq(struct amdgpu_device *adev,
6049                             struct amdgpu_irq_src *source,
6050                             struct amdgpu_iv_entry *entry)
6051 {
6052         int i;
6053         u8 me_id, pipe_id, queue_id;
6054         struct amdgpu_ring *ring;
6055
6056         DRM_DEBUG("IH: CP EOP\n");
6057         me_id = (entry->ring_id & 0x0c) >> 2;
6058         pipe_id = (entry->ring_id & 0x03) >> 0;
6059         queue_id = (entry->ring_id & 0x70) >> 4;
6060
6061         switch (me_id) {
6062         case 0:
6063                 if (adev->gfx.num_gfx_rings) {
6064                         if (!adev->gfx.mcbp) {
6065                                 amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
6066                         } else if (!amdgpu_mcbp_handle_trailing_fence_irq(&adev->gfx.muxer)) {
6067                                 /* Fence signals are handled on the software rings*/
6068                                 for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++)
6069                                         amdgpu_fence_process(&adev->gfx.sw_gfx_ring[i]);
6070                         }
6071                 }
6072                 break;
6073         case 1:
6074         case 2:
6075                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
6076                         ring = &adev->gfx.compute_ring[i];
6077                         /* Per-queue interrupt is supported for MEC starting from VI.
6078                           * The interrupt can only be enabled/disabled per pipe instead of per queue.
6079                           */
6080                         if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
6081                                 amdgpu_fence_process(ring);
6082                 }
6083                 break;
6084         }
6085         return 0;
6086 }
6087
6088 static void gfx_v9_0_fault(struct amdgpu_device *adev,
6089                            struct amdgpu_iv_entry *entry)
6090 {
6091         u8 me_id, pipe_id, queue_id;
6092         struct amdgpu_ring *ring;
6093         int i;
6094
6095         me_id = (entry->ring_id & 0x0c) >> 2;
6096         pipe_id = (entry->ring_id & 0x03) >> 0;
6097         queue_id = (entry->ring_id & 0x70) >> 4;
6098
6099         switch (me_id) {
6100         case 0:
6101                 drm_sched_fault(&adev->gfx.gfx_ring[0].sched);
6102                 break;
6103         case 1:
6104         case 2:
6105                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
6106                         ring = &adev->gfx.compute_ring[i];
6107                         if (ring->me == me_id && ring->pipe == pipe_id &&
6108                             ring->queue == queue_id)
6109                                 drm_sched_fault(&ring->sched);
6110                 }
6111                 break;
6112         }
6113 }
6114
6115 static int gfx_v9_0_priv_reg_irq(struct amdgpu_device *adev,
6116                                  struct amdgpu_irq_src *source,
6117                                  struct amdgpu_iv_entry *entry)
6118 {
6119         DRM_ERROR("Illegal register access in command stream\n");
6120         gfx_v9_0_fault(adev, entry);
6121         return 0;
6122 }
6123
6124 static int gfx_v9_0_priv_inst_irq(struct amdgpu_device *adev,
6125                                   struct amdgpu_irq_src *source,
6126                                   struct amdgpu_iv_entry *entry)
6127 {
6128         DRM_ERROR("Illegal instruction in command stream\n");
6129         gfx_v9_0_fault(adev, entry);
6130         return 0;
6131 }
6132
6133
6134 static const struct soc15_ras_field_entry gfx_v9_0_ras_fields[] = {
6135         { "CPC_SCRATCH", SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT),
6136           SOC15_REG_FIELD(CPC_EDC_SCRATCH_CNT, SEC_COUNT),
6137           SOC15_REG_FIELD(CPC_EDC_SCRATCH_CNT, DED_COUNT)
6138         },
6139         { "CPC_UCODE", SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT),
6140           SOC15_REG_FIELD(CPC_EDC_UCODE_CNT, SEC_COUNT),
6141           SOC15_REG_FIELD(CPC_EDC_UCODE_CNT, DED_COUNT)
6142         },
6143         { "CPF_ROQ_ME1", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT),
6144           SOC15_REG_FIELD(CPF_EDC_ROQ_CNT, COUNT_ME1),
6145           0, 0
6146         },
6147         { "CPF_ROQ_ME2", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT),
6148           SOC15_REG_FIELD(CPF_EDC_ROQ_CNT, COUNT_ME2),
6149           0, 0
6150         },
6151         { "CPF_TAG", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT),
6152           SOC15_REG_FIELD(CPF_EDC_TAG_CNT, SEC_COUNT),
6153           SOC15_REG_FIELD(CPF_EDC_TAG_CNT, DED_COUNT)
6154         },
6155         { "CPG_DMA_ROQ", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT),
6156           SOC15_REG_FIELD(CPG_EDC_DMA_CNT, ROQ_COUNT),
6157           0, 0
6158         },
6159         { "CPG_DMA_TAG", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT),
6160           SOC15_REG_FIELD(CPG_EDC_DMA_CNT, TAG_SEC_COUNT),
6161           SOC15_REG_FIELD(CPG_EDC_DMA_CNT, TAG_DED_COUNT)
6162         },
6163         { "CPG_TAG", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT),
6164           SOC15_REG_FIELD(CPG_EDC_TAG_CNT, SEC_COUNT),
6165           SOC15_REG_FIELD(CPG_EDC_TAG_CNT, DED_COUNT)
6166         },
6167         { "DC_CSINVOC", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT),
6168           SOC15_REG_FIELD(DC_EDC_CSINVOC_CNT, COUNT_ME1),
6169           0, 0
6170         },
6171         { "DC_RESTORE", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT),
6172           SOC15_REG_FIELD(DC_EDC_RESTORE_CNT, COUNT_ME1),
6173           0, 0
6174         },
6175         { "DC_STATE", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT),
6176           SOC15_REG_FIELD(DC_EDC_STATE_CNT, COUNT_ME1),
6177           0, 0
6178         },
6179         { "GDS_MEM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT),
6180           SOC15_REG_FIELD(GDS_EDC_CNT, GDS_MEM_SEC),
6181           SOC15_REG_FIELD(GDS_EDC_CNT, GDS_MEM_DED)
6182         },
6183         { "GDS_INPUT_QUEUE", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT),
6184           SOC15_REG_FIELD(GDS_EDC_CNT, GDS_INPUT_QUEUE_SED),
6185           0, 0
6186         },
6187         { "GDS_ME0_CS_PIPE_MEM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT),
6188           SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, ME0_CS_PIPE_MEM_SEC),
6189           SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, ME0_CS_PIPE_MEM_DED)
6190         },
6191         { "GDS_OA_PHY_PHY_CMD_RAM_MEM",
6192           SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT),
6193           SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_CMD_RAM_MEM_SEC),
6194           SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_CMD_RAM_MEM_DED)
6195         },
6196         { "GDS_OA_PHY_PHY_DATA_RAM_MEM",
6197           SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT),
6198           SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_DATA_RAM_MEM_SED),
6199           0, 0
6200         },
6201         { "GDS_OA_PIPE_ME1_PIPE0_PIPE_MEM",
6202           SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
6203           SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE0_PIPE_MEM_SEC),
6204           SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE0_PIPE_MEM_DED)
6205         },
6206         { "GDS_OA_PIPE_ME1_PIPE1_PIPE_MEM",
6207           SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
6208           SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE1_PIPE_MEM_SEC),
6209           SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE1_PIPE_MEM_DED)
6210         },
6211         { "GDS_OA_PIPE_ME1_PIPE2_PIPE_MEM",
6212           SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
6213           SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE2_PIPE_MEM_SEC),
6214           SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE2_PIPE_MEM_DED)
6215         },
6216         { "GDS_OA_PIPE_ME1_PIPE3_PIPE_MEM",
6217           SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
6218           SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE3_PIPE_MEM_SEC),
6219           SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE3_PIPE_MEM_DED)
6220         },
6221         { "SPI_SR_MEM", SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT),
6222           SOC15_REG_FIELD(SPI_EDC_CNT, SPI_SR_MEM_SED_COUNT),
6223           0, 0
6224         },
6225         { "TA_FS_DFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
6226           SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_DFIFO_SEC_COUNT),
6227           SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_DFIFO_DED_COUNT)
6228         },
6229         { "TA_FS_AFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
6230           SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_AFIFO_SED_COUNT),
6231           0, 0
6232         },
6233         { "TA_FL_LFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
6234           SOC15_REG_FIELD(TA_EDC_CNT, TA_FL_LFIFO_SED_COUNT),
6235           0, 0
6236         },
6237         { "TA_FX_LFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
6238           SOC15_REG_FIELD(TA_EDC_CNT, TA_FX_LFIFO_SED_COUNT),
6239           0, 0
6240         },
6241         { "TA_FS_CFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
6242           SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_CFIFO_SED_COUNT),
6243           0, 0
6244         },
6245         { "TCA_HOLE_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT),
6246           SOC15_REG_FIELD(TCA_EDC_CNT, HOLE_FIFO_SED_COUNT),
6247           0, 0
6248         },
6249         { "TCA_REQ_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT),
6250           SOC15_REG_FIELD(TCA_EDC_CNT, REQ_FIFO_SED_COUNT),
6251           0, 0
6252         },
6253         { "TCC_CACHE_DATA", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6254           SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DATA_SEC_COUNT),
6255           SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DATA_DED_COUNT)
6256         },
6257         { "TCC_CACHE_DIRTY", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6258           SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DIRTY_SEC_COUNT),
6259           SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DIRTY_DED_COUNT)
6260         },
6261         { "TCC_HIGH_RATE_TAG", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6262           SOC15_REG_FIELD(TCC_EDC_CNT, HIGH_RATE_TAG_SEC_COUNT),
6263           SOC15_REG_FIELD(TCC_EDC_CNT, HIGH_RATE_TAG_DED_COUNT)
6264         },
6265         { "TCC_LOW_RATE_TAG", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6266           SOC15_REG_FIELD(TCC_EDC_CNT, LOW_RATE_TAG_SEC_COUNT),
6267           SOC15_REG_FIELD(TCC_EDC_CNT, LOW_RATE_TAG_DED_COUNT)
6268         },
6269         { "TCC_SRC_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6270           SOC15_REG_FIELD(TCC_EDC_CNT, SRC_FIFO_SEC_COUNT),
6271           SOC15_REG_FIELD(TCC_EDC_CNT, SRC_FIFO_DED_COUNT)
6272         },
6273         { "TCC_IN_USE_DEC", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6274           SOC15_REG_FIELD(TCC_EDC_CNT, IN_USE_DEC_SED_COUNT),
6275           0, 0
6276         },
6277         { "TCC_IN_USE_TRANSFER", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6278           SOC15_REG_FIELD(TCC_EDC_CNT, IN_USE_TRANSFER_SED_COUNT),
6279           0, 0
6280         },
6281         { "TCC_LATENCY_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6282           SOC15_REG_FIELD(TCC_EDC_CNT, LATENCY_FIFO_SED_COUNT),
6283           0, 0
6284         },
6285         { "TCC_RETURN_DATA", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6286           SOC15_REG_FIELD(TCC_EDC_CNT, RETURN_DATA_SED_COUNT),
6287           0, 0
6288         },
6289         { "TCC_RETURN_CONTROL", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6290           SOC15_REG_FIELD(TCC_EDC_CNT, RETURN_CONTROL_SED_COUNT),
6291           0, 0
6292         },
6293         { "TCC_UC_ATOMIC_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6294           SOC15_REG_FIELD(TCC_EDC_CNT, UC_ATOMIC_FIFO_SED_COUNT),
6295           0, 0
6296         },
6297         { "TCC_WRITE_RETURN", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6298           SOC15_REG_FIELD(TCC_EDC_CNT2, WRITE_RETURN_SED_COUNT),
6299           0, 0
6300         },
6301         { "TCC_WRITE_CACHE_READ", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6302           SOC15_REG_FIELD(TCC_EDC_CNT2, WRITE_CACHE_READ_SED_COUNT),
6303           0, 0
6304         },
6305         { "TCC_SRC_FIFO_NEXT_RAM", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6306           SOC15_REG_FIELD(TCC_EDC_CNT2, SRC_FIFO_NEXT_RAM_SED_COUNT),
6307           0, 0
6308         },
6309         { "TCC_LATENCY_FIFO_NEXT_RAM", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6310           SOC15_REG_FIELD(TCC_EDC_CNT2, LATENCY_FIFO_NEXT_RAM_SED_COUNT),
6311           0, 0
6312         },
6313         { "TCC_CACHE_TAG_PROBE_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6314           SOC15_REG_FIELD(TCC_EDC_CNT2, CACHE_TAG_PROBE_FIFO_SED_COUNT),
6315           0, 0
6316         },
6317         { "TCC_WRRET_TAG_WRITE_RETURN", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6318           SOC15_REG_FIELD(TCC_EDC_CNT2, WRRET_TAG_WRITE_RETURN_SED_COUNT),
6319           0, 0
6320         },
6321         { "TCC_ATOMIC_RETURN_BUFFER", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6322           SOC15_REG_FIELD(TCC_EDC_CNT2, ATOMIC_RETURN_BUFFER_SED_COUNT),
6323           0, 0
6324         },
6325         { "TCI_WRITE_RAM", SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT),
6326           SOC15_REG_FIELD(TCI_EDC_CNT, WRITE_RAM_SED_COUNT),
6327           0, 0
6328         },
6329         { "TCP_CACHE_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6330           SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CACHE_RAM_SEC_COUNT),
6331           SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CACHE_RAM_DED_COUNT)
6332         },
6333         { "TCP_LFIFO_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6334           SOC15_REG_FIELD(TCP_EDC_CNT_NEW, LFIFO_RAM_SEC_COUNT),
6335           SOC15_REG_FIELD(TCP_EDC_CNT_NEW, LFIFO_RAM_DED_COUNT)
6336         },
6337         { "TCP_CMD_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6338           SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CMD_FIFO_SED_COUNT),
6339           0, 0
6340         },
6341         { "TCP_VM_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6342           SOC15_REG_FIELD(TCP_EDC_CNT_NEW, VM_FIFO_SEC_COUNT),
6343           0, 0
6344         },
6345         { "TCP_DB_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6346           SOC15_REG_FIELD(TCP_EDC_CNT_NEW, DB_RAM_SED_COUNT),
6347           0, 0
6348         },
6349         { "TCP_UTCL1_LFIFO0", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6350           SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO0_SEC_COUNT),
6351           SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO0_DED_COUNT)
6352         },
6353         { "TCP_UTCL1_LFIFO1", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6354           SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO1_SEC_COUNT),
6355           SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO1_DED_COUNT)
6356         },
6357         { "TD_SS_FIFO_LO", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT),
6358           SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_LO_SEC_COUNT),
6359           SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_LO_DED_COUNT)
6360         },
6361         { "TD_SS_FIFO_HI", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT),
6362           SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_HI_SEC_COUNT),
6363           SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_HI_DED_COUNT)
6364         },
6365         { "TD_CS_FIFO", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT),
6366           SOC15_REG_FIELD(TD_EDC_CNT, CS_FIFO_SED_COUNT),
6367           0, 0
6368         },
6369         { "SQ_LDS_D", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6370           SOC15_REG_FIELD(SQ_EDC_CNT, LDS_D_SEC_COUNT),
6371           SOC15_REG_FIELD(SQ_EDC_CNT, LDS_D_DED_COUNT)
6372         },
6373         { "SQ_LDS_I", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6374           SOC15_REG_FIELD(SQ_EDC_CNT, LDS_I_SEC_COUNT),
6375           SOC15_REG_FIELD(SQ_EDC_CNT, LDS_I_DED_COUNT)
6376         },
6377         { "SQ_SGPR", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6378           SOC15_REG_FIELD(SQ_EDC_CNT, SGPR_SEC_COUNT),
6379           SOC15_REG_FIELD(SQ_EDC_CNT, SGPR_DED_COUNT)
6380         },
6381         { "SQ_VGPR0", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6382           SOC15_REG_FIELD(SQ_EDC_CNT, VGPR0_SEC_COUNT),
6383           SOC15_REG_FIELD(SQ_EDC_CNT, VGPR0_DED_COUNT)
6384         },
6385         { "SQ_VGPR1", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6386           SOC15_REG_FIELD(SQ_EDC_CNT, VGPR1_SEC_COUNT),
6387           SOC15_REG_FIELD(SQ_EDC_CNT, VGPR1_DED_COUNT)
6388         },
6389         { "SQ_VGPR2", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6390           SOC15_REG_FIELD(SQ_EDC_CNT, VGPR2_SEC_COUNT),
6391           SOC15_REG_FIELD(SQ_EDC_CNT, VGPR2_DED_COUNT)
6392         },
6393         { "SQ_VGPR3", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6394           SOC15_REG_FIELD(SQ_EDC_CNT, VGPR3_SEC_COUNT),
6395           SOC15_REG_FIELD(SQ_EDC_CNT, VGPR3_DED_COUNT)
6396         },
6397         { "SQC_DATA_CU0_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6398           SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_WRITE_DATA_BUF_SEC_COUNT),
6399           SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_WRITE_DATA_BUF_DED_COUNT)
6400         },
6401         { "SQC_DATA_CU0_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6402           SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_UTCL1_LFIFO_SEC_COUNT),
6403           SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_UTCL1_LFIFO_DED_COUNT)
6404         },
6405         { "SQC_DATA_CU1_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6406           SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_WRITE_DATA_BUF_SEC_COUNT),
6407           SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_WRITE_DATA_BUF_DED_COUNT)
6408         },
6409         { "SQC_DATA_CU1_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6410           SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_UTCL1_LFIFO_SEC_COUNT),
6411           SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_UTCL1_LFIFO_DED_COUNT)
6412         },
6413         { "SQC_DATA_CU2_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6414           SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_WRITE_DATA_BUF_SEC_COUNT),
6415           SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_WRITE_DATA_BUF_DED_COUNT)
6416         },
6417         { "SQC_DATA_CU2_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6418           SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_UTCL1_LFIFO_SEC_COUNT),
6419           SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_UTCL1_LFIFO_DED_COUNT)
6420         },
6421         { "SQC_INST_BANKA_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6422           SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_TAG_RAM_SEC_COUNT),
6423           SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_TAG_RAM_DED_COUNT)
6424         },
6425         { "SQC_INST_BANKA_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6426           SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_BANK_RAM_SEC_COUNT),
6427           SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_BANK_RAM_DED_COUNT)
6428         },
6429         { "SQC_DATA_BANKA_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6430           SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_TAG_RAM_SEC_COUNT),
6431           SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_TAG_RAM_DED_COUNT)
6432         },
6433         { "SQC_DATA_BANKA_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6434           SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_BANK_RAM_SEC_COUNT),
6435           SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_BANK_RAM_DED_COUNT)
6436         },
6437         { "SQC_INST_BANKA_UTCL1_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6438           SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_UTCL1_MISS_FIFO_SED_COUNT),
6439           0, 0
6440         },
6441         { "SQC_INST_BANKA_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6442           SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_MISS_FIFO_SED_COUNT),
6443           0, 0
6444         },
6445         { "SQC_DATA_BANKA_HIT_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6446           SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_HIT_FIFO_SED_COUNT),
6447           0, 0
6448         },
6449         { "SQC_DATA_BANKA_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6450           SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_MISS_FIFO_SED_COUNT),
6451           0, 0
6452         },
6453         { "SQC_DATA_BANKA_DIRTY_BIT_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6454           SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_DIRTY_BIT_RAM_SED_COUNT),
6455           0, 0
6456         },
6457         { "SQC_INST_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6458           SOC15_REG_FIELD(SQC_EDC_CNT2, INST_UTCL1_LFIFO_SEC_COUNT),
6459           SOC15_REG_FIELD(SQC_EDC_CNT2, INST_UTCL1_LFIFO_DED_COUNT)
6460         },
6461         { "SQC_INST_BANKB_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6462           SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_TAG_RAM_SEC_COUNT),
6463           SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_TAG_RAM_DED_COUNT)
6464         },
6465         { "SQC_INST_BANKB_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6466           SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_BANK_RAM_SEC_COUNT),
6467           SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_BANK_RAM_DED_COUNT)
6468         },
6469         { "SQC_DATA_BANKB_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6470           SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_TAG_RAM_SEC_COUNT),
6471           SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_TAG_RAM_DED_COUNT)
6472         },
6473         { "SQC_DATA_BANKB_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6474           SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_BANK_RAM_SEC_COUNT),
6475           SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_BANK_RAM_DED_COUNT)
6476         },
6477         { "SQC_INST_BANKB_UTCL1_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6478           SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_UTCL1_MISS_FIFO_SED_COUNT),
6479           0, 0
6480         },
6481         { "SQC_INST_BANKB_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6482           SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_MISS_FIFO_SED_COUNT),
6483           0, 0
6484         },
6485         { "SQC_DATA_BANKB_HIT_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6486           SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_HIT_FIFO_SED_COUNT),
6487           0, 0
6488         },
6489         { "SQC_DATA_BANKB_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6490           SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_MISS_FIFO_SED_COUNT),
6491           0, 0
6492         },
6493         { "SQC_DATA_BANKB_DIRTY_BIT_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6494           SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_DIRTY_BIT_RAM_SED_COUNT),
6495           0, 0
6496         },
6497         { "EA_DRAMRD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6498           SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_CMDMEM_SEC_COUNT),
6499           SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_CMDMEM_DED_COUNT)
6500         },
6501         { "EA_DRAMWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6502           SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_CMDMEM_SEC_COUNT),
6503           SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_CMDMEM_DED_COUNT)
6504         },
6505         { "EA_DRAMWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6506           SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_DATAMEM_SEC_COUNT),
6507           SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_DATAMEM_DED_COUNT)
6508         },
6509         { "EA_RRET_TAGMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6510           SOC15_REG_FIELD(GCEA_EDC_CNT, RRET_TAGMEM_SEC_COUNT),
6511           SOC15_REG_FIELD(GCEA_EDC_CNT, RRET_TAGMEM_DED_COUNT)
6512         },
6513         { "EA_WRET_TAGMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6514           SOC15_REG_FIELD(GCEA_EDC_CNT, WRET_TAGMEM_SEC_COUNT),
6515           SOC15_REG_FIELD(GCEA_EDC_CNT, WRET_TAGMEM_DED_COUNT)
6516         },
6517         { "EA_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6518           SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_PAGEMEM_SED_COUNT),
6519           0, 0
6520         },
6521         { "EA_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6522           SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_PAGEMEM_SED_COUNT),
6523           0, 0
6524         },
6525         { "EA_IORD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6526           SOC15_REG_FIELD(GCEA_EDC_CNT, IORD_CMDMEM_SED_COUNT),
6527           0, 0
6528         },
6529         { "EA_IOWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6530           SOC15_REG_FIELD(GCEA_EDC_CNT, IOWR_CMDMEM_SED_COUNT),
6531           0, 0
6532         },
6533         { "EA_IOWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6534           SOC15_REG_FIELD(GCEA_EDC_CNT, IOWR_DATAMEM_SED_COUNT),
6535           0, 0
6536         },
6537         { "GMIRD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6538           SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_CMDMEM_SEC_COUNT),
6539           SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_CMDMEM_DED_COUNT)
6540         },
6541         { "GMIWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6542           SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_CMDMEM_SEC_COUNT),
6543           SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_CMDMEM_DED_COUNT)
6544         },
6545         { "GMIWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6546           SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_DATAMEM_SEC_COUNT),
6547           SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_DATAMEM_DED_COUNT)
6548         },
6549         { "GMIRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6550           SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_PAGEMEM_SED_COUNT),
6551           0, 0
6552         },
6553         { "GMIWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6554           SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_PAGEMEM_SED_COUNT),
6555           0, 0
6556         },
6557         { "MAM_D0MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6558           SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D0MEM_SED_COUNT),
6559           0, 0
6560         },
6561         { "MAM_D1MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6562           SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D1MEM_SED_COUNT),
6563           0, 0
6564         },
6565         { "MAM_D2MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6566           SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D2MEM_SED_COUNT),
6567           0, 0
6568         },
6569         { "MAM_D3MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6570           SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D3MEM_SED_COUNT),
6571           0, 0
6572         }
6573 };
6574
6575 static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev,
6576                                      void *inject_if, uint32_t instance_mask)
6577 {
6578         struct ras_inject_if *info = (struct ras_inject_if *)inject_if;
6579         int ret;
6580         struct ta_ras_trigger_error_input block_info = { 0 };
6581
6582         if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
6583                 return -EINVAL;
6584
6585         if (info->head.sub_block_index >= ARRAY_SIZE(ras_gfx_subblocks))
6586                 return -EINVAL;
6587
6588         if (!ras_gfx_subblocks[info->head.sub_block_index].name)
6589                 return -EPERM;
6590
6591         if (!(ras_gfx_subblocks[info->head.sub_block_index].hw_supported_error_type &
6592               info->head.type)) {
6593                 DRM_ERROR("GFX Subblock %s, hardware do not support type 0x%x\n",
6594                         ras_gfx_subblocks[info->head.sub_block_index].name,
6595                         info->head.type);
6596                 return -EPERM;
6597         }
6598
6599         if (!(ras_gfx_subblocks[info->head.sub_block_index].sw_supported_error_type &
6600               info->head.type)) {
6601                 DRM_ERROR("GFX Subblock %s, driver do not support type 0x%x\n",
6602                         ras_gfx_subblocks[info->head.sub_block_index].name,
6603                         info->head.type);
6604                 return -EPERM;
6605         }
6606
6607         block_info.block_id = amdgpu_ras_block_to_ta(info->head.block);
6608         block_info.sub_block_index =
6609                 ras_gfx_subblocks[info->head.sub_block_index].ta_subblock;
6610         block_info.inject_error_type = amdgpu_ras_error_to_ta(info->head.type);
6611         block_info.address = info->address;
6612         block_info.value = info->value;
6613
6614         mutex_lock(&adev->grbm_idx_mutex);
6615         ret = psp_ras_trigger_error(&adev->psp, &block_info, instance_mask);
6616         mutex_unlock(&adev->grbm_idx_mutex);
6617
6618         return ret;
6619 }
6620
6621 static const char * const vml2_mems[] = {
6622         "UTC_VML2_BANK_CACHE_0_BIGK_MEM0",
6623         "UTC_VML2_BANK_CACHE_0_BIGK_MEM1",
6624         "UTC_VML2_BANK_CACHE_0_4K_MEM0",
6625         "UTC_VML2_BANK_CACHE_0_4K_MEM1",
6626         "UTC_VML2_BANK_CACHE_1_BIGK_MEM0",
6627         "UTC_VML2_BANK_CACHE_1_BIGK_MEM1",
6628         "UTC_VML2_BANK_CACHE_1_4K_MEM0",
6629         "UTC_VML2_BANK_CACHE_1_4K_MEM1",
6630         "UTC_VML2_BANK_CACHE_2_BIGK_MEM0",
6631         "UTC_VML2_BANK_CACHE_2_BIGK_MEM1",
6632         "UTC_VML2_BANK_CACHE_2_4K_MEM0",
6633         "UTC_VML2_BANK_CACHE_2_4K_MEM1",
6634         "UTC_VML2_BANK_CACHE_3_BIGK_MEM0",
6635         "UTC_VML2_BANK_CACHE_3_BIGK_MEM1",
6636         "UTC_VML2_BANK_CACHE_3_4K_MEM0",
6637         "UTC_VML2_BANK_CACHE_3_4K_MEM1",
6638 };
6639
6640 static const char * const vml2_walker_mems[] = {
6641         "UTC_VML2_CACHE_PDE0_MEM0",
6642         "UTC_VML2_CACHE_PDE0_MEM1",
6643         "UTC_VML2_CACHE_PDE1_MEM0",
6644         "UTC_VML2_CACHE_PDE1_MEM1",
6645         "UTC_VML2_CACHE_PDE2_MEM0",
6646         "UTC_VML2_CACHE_PDE2_MEM1",
6647         "UTC_VML2_RDIF_LOG_FIFO",
6648 };
6649
6650 static const char * const atc_l2_cache_2m_mems[] = {
6651         "UTC_ATCL2_CACHE_2M_BANK0_WAY0_MEM",
6652         "UTC_ATCL2_CACHE_2M_BANK0_WAY1_MEM",
6653         "UTC_ATCL2_CACHE_2M_BANK1_WAY0_MEM",
6654         "UTC_ATCL2_CACHE_2M_BANK1_WAY1_MEM",
6655 };
6656
6657 static const char *atc_l2_cache_4k_mems[] = {
6658         "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM0",
6659         "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM1",
6660         "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM2",
6661         "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM3",
6662         "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM4",
6663         "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM5",
6664         "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM6",
6665         "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM7",
6666         "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM0",
6667         "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM1",
6668         "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM2",
6669         "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM3",
6670         "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM4",
6671         "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM5",
6672         "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM6",
6673         "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM7",
6674         "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM0",
6675         "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM1",
6676         "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM2",
6677         "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM3",
6678         "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM4",
6679         "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM5",
6680         "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM6",
6681         "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM7",
6682         "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM0",
6683         "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM1",
6684         "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM2",
6685         "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM3",
6686         "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM4",
6687         "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM5",
6688         "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM6",
6689         "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM7",
6690 };
6691
6692 static int gfx_v9_0_query_utc_edc_status(struct amdgpu_device *adev,
6693                                          struct ras_err_data *err_data)
6694 {
6695         uint32_t i, data;
6696         uint32_t sec_count, ded_count;
6697
6698         WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
6699         WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT, 0);
6700         WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
6701         WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT, 0);
6702         WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
6703         WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT, 0);
6704         WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
6705         WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT, 0);
6706
6707         for (i = 0; i < ARRAY_SIZE(vml2_mems); i++) {
6708                 WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, i);
6709                 data = RREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT);
6710
6711                 sec_count = REG_GET_FIELD(data, VM_L2_MEM_ECC_CNT, SEC_COUNT);
6712                 if (sec_count) {
6713                         dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6714                                 "SEC %d\n", i, vml2_mems[i], sec_count);
6715                         err_data->ce_count += sec_count;
6716                 }
6717
6718                 ded_count = REG_GET_FIELD(data, VM_L2_MEM_ECC_CNT, DED_COUNT);
6719                 if (ded_count) {
6720                         dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6721                                 "DED %d\n", i, vml2_mems[i], ded_count);
6722                         err_data->ue_count += ded_count;
6723                 }
6724         }
6725
6726         for (i = 0; i < ARRAY_SIZE(vml2_walker_mems); i++) {
6727                 WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, i);
6728                 data = RREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT);
6729
6730                 sec_count = REG_GET_FIELD(data, VM_L2_WALKER_MEM_ECC_CNT,
6731                                                 SEC_COUNT);
6732                 if (sec_count) {
6733                         dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6734                                 "SEC %d\n", i, vml2_walker_mems[i], sec_count);
6735                         err_data->ce_count += sec_count;
6736                 }
6737
6738                 ded_count = REG_GET_FIELD(data, VM_L2_WALKER_MEM_ECC_CNT,
6739                                                 DED_COUNT);
6740                 if (ded_count) {
6741                         dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6742                                 "DED %d\n", i, vml2_walker_mems[i], ded_count);
6743                         err_data->ue_count += ded_count;
6744                 }
6745         }
6746
6747         for (i = 0; i < ARRAY_SIZE(atc_l2_cache_2m_mems); i++) {
6748                 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, i);
6749                 data = RREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT);
6750
6751                 sec_count = (data & 0x00006000L) >> 0xd;
6752                 if (sec_count) {
6753                         dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6754                                 "SEC %d\n", i, atc_l2_cache_2m_mems[i],
6755                                 sec_count);
6756                         err_data->ce_count += sec_count;
6757                 }
6758         }
6759
6760         for (i = 0; i < ARRAY_SIZE(atc_l2_cache_4k_mems); i++) {
6761                 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, i);
6762                 data = RREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT);
6763
6764                 sec_count = (data & 0x00006000L) >> 0xd;
6765                 if (sec_count) {
6766                         dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6767                                 "SEC %d\n", i, atc_l2_cache_4k_mems[i],
6768                                 sec_count);
6769                         err_data->ce_count += sec_count;
6770                 }
6771
6772                 ded_count = (data & 0x00018000L) >> 0xf;
6773                 if (ded_count) {
6774                         dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6775                                 "DED %d\n", i, atc_l2_cache_4k_mems[i],
6776                                 ded_count);
6777                         err_data->ue_count += ded_count;
6778                 }
6779         }
6780
6781         WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
6782         WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
6783         WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
6784         WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
6785
6786         return 0;
6787 }
6788
6789 static int gfx_v9_0_ras_error_count(struct amdgpu_device *adev,
6790         const struct soc15_reg_entry *reg,
6791         uint32_t se_id, uint32_t inst_id, uint32_t value,
6792         uint32_t *sec_count, uint32_t *ded_count)
6793 {
6794         uint32_t i;
6795         uint32_t sec_cnt, ded_cnt;
6796
6797         for (i = 0; i < ARRAY_SIZE(gfx_v9_0_ras_fields); i++) {
6798                 if(gfx_v9_0_ras_fields[i].reg_offset != reg->reg_offset ||
6799                         gfx_v9_0_ras_fields[i].seg != reg->seg ||
6800                         gfx_v9_0_ras_fields[i].inst != reg->inst)
6801                         continue;
6802
6803                 sec_cnt = (value &
6804                                 gfx_v9_0_ras_fields[i].sec_count_mask) >>
6805                                 gfx_v9_0_ras_fields[i].sec_count_shift;
6806                 if (sec_cnt) {
6807                         dev_info(adev->dev, "GFX SubBlock %s, "
6808                                 "Instance[%d][%d], SEC %d\n",
6809                                 gfx_v9_0_ras_fields[i].name,
6810                                 se_id, inst_id,
6811                                 sec_cnt);
6812                         *sec_count += sec_cnt;
6813                 }
6814
6815                 ded_cnt = (value &
6816                                 gfx_v9_0_ras_fields[i].ded_count_mask) >>
6817                                 gfx_v9_0_ras_fields[i].ded_count_shift;
6818                 if (ded_cnt) {
6819                         dev_info(adev->dev, "GFX SubBlock %s, "
6820                                 "Instance[%d][%d], DED %d\n",
6821                                 gfx_v9_0_ras_fields[i].name,
6822                                 se_id, inst_id,
6823                                 ded_cnt);
6824                         *ded_count += ded_cnt;
6825                 }
6826         }
6827
6828         return 0;
6829 }
6830
6831 static void gfx_v9_0_reset_ras_error_count(struct amdgpu_device *adev)
6832 {
6833         int i, j, k;
6834
6835         if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
6836                 return;
6837
6838         /* read back registers to clear the counters */
6839         mutex_lock(&adev->grbm_idx_mutex);
6840         for (i = 0; i < ARRAY_SIZE(gfx_v9_0_edc_counter_regs); i++) {
6841                 for (j = 0; j < gfx_v9_0_edc_counter_regs[i].se_num; j++) {
6842                         for (k = 0; k < gfx_v9_0_edc_counter_regs[i].instance; k++) {
6843                                 amdgpu_gfx_select_se_sh(adev, j, 0x0, k, 0);
6844                                 RREG32(SOC15_REG_ENTRY_OFFSET(gfx_v9_0_edc_counter_regs[i]));
6845                         }
6846                 }
6847         }
6848         WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, 0xe0000000);
6849         mutex_unlock(&adev->grbm_idx_mutex);
6850
6851         WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
6852         WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT, 0);
6853         WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
6854         WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT, 0);
6855         WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
6856         WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT, 0);
6857         WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
6858         WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT, 0);
6859
6860         for (i = 0; i < ARRAY_SIZE(vml2_mems); i++) {
6861                 WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, i);
6862                 RREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT);
6863         }
6864
6865         for (i = 0; i < ARRAY_SIZE(vml2_walker_mems); i++) {
6866                 WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, i);
6867                 RREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT);
6868         }
6869
6870         for (i = 0; i < ARRAY_SIZE(atc_l2_cache_2m_mems); i++) {
6871                 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, i);
6872                 RREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT);
6873         }
6874
6875         for (i = 0; i < ARRAY_SIZE(atc_l2_cache_4k_mems); i++) {
6876                 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, i);
6877                 RREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT);
6878         }
6879
6880         WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
6881         WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
6882         WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
6883         WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
6884 }
6885
6886 static void gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev,
6887                                           void *ras_error_status)
6888 {
6889         struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
6890         uint32_t sec_count = 0, ded_count = 0;
6891         uint32_t i, j, k;
6892         uint32_t reg_value;
6893
6894         if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
6895                 return;
6896
6897         err_data->ue_count = 0;
6898         err_data->ce_count = 0;
6899
6900         mutex_lock(&adev->grbm_idx_mutex);
6901
6902         for (i = 0; i < ARRAY_SIZE(gfx_v9_0_edc_counter_regs); i++) {
6903                 for (j = 0; j < gfx_v9_0_edc_counter_regs[i].se_num; j++) {
6904                         for (k = 0; k < gfx_v9_0_edc_counter_regs[i].instance; k++) {
6905                                 amdgpu_gfx_select_se_sh(adev, j, 0, k, 0);
6906                                 reg_value =
6907                                         RREG32(SOC15_REG_ENTRY_OFFSET(gfx_v9_0_edc_counter_regs[i]));
6908                                 if (reg_value)
6909                                         gfx_v9_0_ras_error_count(adev,
6910                                                 &gfx_v9_0_edc_counter_regs[i],
6911                                                 j, k, reg_value,
6912                                                 &sec_count, &ded_count);
6913                         }
6914                 }
6915         }
6916
6917         err_data->ce_count += sec_count;
6918         err_data->ue_count += ded_count;
6919
6920         amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
6921         mutex_unlock(&adev->grbm_idx_mutex);
6922
6923         gfx_v9_0_query_utc_edc_status(adev, err_data);
6924 }
6925
6926 static void gfx_v9_0_emit_mem_sync(struct amdgpu_ring *ring)
6927 {
6928         const unsigned int cp_coher_cntl =
6929                         PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_SH_ICACHE_ACTION_ENA(1) |
6930                         PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_SH_KCACHE_ACTION_ENA(1) |
6931                         PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TC_ACTION_ENA(1) |
6932                         PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TCL1_ACTION_ENA(1) |
6933                         PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TC_WB_ACTION_ENA(1);
6934
6935         /* ACQUIRE_MEM -make one or more surfaces valid for use by the subsequent operations */
6936         amdgpu_ring_write(ring, PACKET3(PACKET3_ACQUIRE_MEM, 5));
6937         amdgpu_ring_write(ring, cp_coher_cntl); /* CP_COHER_CNTL */
6938         amdgpu_ring_write(ring, 0xffffffff);  /* CP_COHER_SIZE */
6939         amdgpu_ring_write(ring, 0xffffff);  /* CP_COHER_SIZE_HI */
6940         amdgpu_ring_write(ring, 0); /* CP_COHER_BASE */
6941         amdgpu_ring_write(ring, 0);  /* CP_COHER_BASE_HI */
6942         amdgpu_ring_write(ring, 0x0000000A); /* POLL_INTERVAL */
6943 }
6944
6945 static void gfx_v9_0_emit_wave_limit_cs(struct amdgpu_ring *ring,
6946                                         uint32_t pipe, bool enable)
6947 {
6948         struct amdgpu_device *adev = ring->adev;
6949         uint32_t val;
6950         uint32_t wcl_cs_reg;
6951
6952         /* mmSPI_WCL_PIPE_PERCENT_CS[0-7]_DEFAULT values are same */
6953         val = enable ? 0x1 : mmSPI_WCL_PIPE_PERCENT_CS0_DEFAULT;
6954
6955         switch (pipe) {
6956         case 0:
6957                 wcl_cs_reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_CS0);
6958                 break;
6959         case 1:
6960                 wcl_cs_reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_CS1);
6961                 break;
6962         case 2:
6963                 wcl_cs_reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_CS2);
6964                 break;
6965         case 3:
6966                 wcl_cs_reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_CS3);
6967                 break;
6968         default:
6969                 DRM_DEBUG("invalid pipe %d\n", pipe);
6970                 return;
6971         }
6972
6973         amdgpu_ring_emit_wreg(ring, wcl_cs_reg, val);
6974
6975 }
6976 static void gfx_v9_0_emit_wave_limit(struct amdgpu_ring *ring, bool enable)
6977 {
6978         struct amdgpu_device *adev = ring->adev;
6979         uint32_t val;
6980         int i;
6981
6982
6983         /* mmSPI_WCL_PIPE_PERCENT_GFX is 7 bit multiplier register to limit
6984          * number of gfx waves. Setting 5 bit will make sure gfx only gets
6985          * around 25% of gpu resources.
6986          */
6987         val = enable ? 0x1f : mmSPI_WCL_PIPE_PERCENT_GFX_DEFAULT;
6988         amdgpu_ring_emit_wreg(ring,
6989                               SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_GFX),
6990                               val);
6991
6992         /* Restrict waves for normal/low priority compute queues as well
6993          * to get best QoS for high priority compute jobs.
6994          *
6995          * amdgpu controls only 1st ME(0-3 CS pipes).
6996          */
6997         for (i = 0; i < adev->gfx.mec.num_pipe_per_mec; i++) {
6998                 if (i != ring->pipe)
6999                         gfx_v9_0_emit_wave_limit_cs(ring, i, enable);
7000
7001         }
7002 }
7003
7004 static void gfx_v9_ip_print(void *handle, struct drm_printer *p)
7005 {
7006         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
7007         uint32_t i, j, k, reg, index = 0;
7008         uint32_t reg_count = ARRAY_SIZE(gc_reg_list_9);
7009
7010         if (!adev->gfx.ip_dump_core)
7011                 return;
7012
7013         for (i = 0; i < reg_count; i++)
7014                 drm_printf(p, "%-50s \t 0x%08x\n",
7015                            gc_reg_list_9[i].reg_name,
7016                            adev->gfx.ip_dump_core[i]);
7017
7018         /* print compute queue registers for all instances */
7019         if (!adev->gfx.ip_dump_compute_queues)
7020                 return;
7021
7022         reg_count = ARRAY_SIZE(gc_cp_reg_list_9);
7023         drm_printf(p, "\nnum_mec: %d num_pipe: %d num_queue: %d\n",
7024                    adev->gfx.mec.num_mec,
7025                    adev->gfx.mec.num_pipe_per_mec,
7026                    adev->gfx.mec.num_queue_per_pipe);
7027
7028         for (i = 0; i < adev->gfx.mec.num_mec; i++) {
7029                 for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) {
7030                         for (k = 0; k < adev->gfx.mec.num_queue_per_pipe; k++) {
7031                                 drm_printf(p, "\nmec %d, pipe %d, queue %d\n", i, j, k);
7032                                 for (reg = 0; reg < reg_count; reg++) {
7033                                         drm_printf(p, "%-50s \t 0x%08x\n",
7034                                                    gc_cp_reg_list_9[reg].reg_name,
7035                                                    adev->gfx.ip_dump_compute_queues[index + reg]);
7036                                 }
7037                                 index += reg_count;
7038                         }
7039                 }
7040         }
7041
7042 }
7043
7044 static void gfx_v9_ip_dump(void *handle)
7045 {
7046         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
7047         uint32_t i, j, k, reg, index = 0;
7048         uint32_t reg_count = ARRAY_SIZE(gc_reg_list_9);
7049
7050         if (!adev->gfx.ip_dump_core || !adev->gfx.num_gfx_rings)
7051                 return;
7052
7053         amdgpu_gfx_off_ctrl(adev, false);
7054         for (i = 0; i < reg_count; i++)
7055                 adev->gfx.ip_dump_core[i] = RREG32(SOC15_REG_ENTRY_OFFSET(gc_reg_list_9[i]));
7056         amdgpu_gfx_off_ctrl(adev, true);
7057
7058         /* dump compute queue registers for all instances */
7059         if (!adev->gfx.ip_dump_compute_queues)
7060                 return;
7061
7062         reg_count = ARRAY_SIZE(gc_cp_reg_list_9);
7063         amdgpu_gfx_off_ctrl(adev, false);
7064         mutex_lock(&adev->srbm_mutex);
7065         for (i = 0; i < adev->gfx.mec.num_mec; i++) {
7066                 for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) {
7067                         for (k = 0; k < adev->gfx.mec.num_queue_per_pipe; k++) {
7068                                 /* ME0 is for GFX so start from 1 for CP */
7069                                 soc15_grbm_select(adev, 1 + i, j, k, 0, 0);
7070
7071                                 for (reg = 0; reg < reg_count; reg++) {
7072                                         adev->gfx.ip_dump_compute_queues[index + reg] =
7073                                                 RREG32(SOC15_REG_ENTRY_OFFSET(
7074                                                         gc_cp_reg_list_9[reg]));
7075                                 }
7076                                 index += reg_count;
7077                         }
7078                 }
7079         }
7080         soc15_grbm_select(adev, 0, 0, 0, 0, 0);
7081         mutex_unlock(&adev->srbm_mutex);
7082         amdgpu_gfx_off_ctrl(adev, true);
7083
7084 }
7085
7086 static const struct amd_ip_funcs gfx_v9_0_ip_funcs = {
7087         .name = "gfx_v9_0",
7088         .early_init = gfx_v9_0_early_init,
7089         .late_init = gfx_v9_0_late_init,
7090         .sw_init = gfx_v9_0_sw_init,
7091         .sw_fini = gfx_v9_0_sw_fini,
7092         .hw_init = gfx_v9_0_hw_init,
7093         .hw_fini = gfx_v9_0_hw_fini,
7094         .suspend = gfx_v9_0_suspend,
7095         .resume = gfx_v9_0_resume,
7096         .is_idle = gfx_v9_0_is_idle,
7097         .wait_for_idle = gfx_v9_0_wait_for_idle,
7098         .soft_reset = gfx_v9_0_soft_reset,
7099         .set_clockgating_state = gfx_v9_0_set_clockgating_state,
7100         .set_powergating_state = gfx_v9_0_set_powergating_state,
7101         .get_clockgating_state = gfx_v9_0_get_clockgating_state,
7102         .dump_ip_state = gfx_v9_ip_dump,
7103         .print_ip_state = gfx_v9_ip_print,
7104 };
7105
7106 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = {
7107         .type = AMDGPU_RING_TYPE_GFX,
7108         .align_mask = 0xff,
7109         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
7110         .support_64bit_ptrs = true,
7111         .secure_submission_supported = true,
7112         .get_rptr = gfx_v9_0_ring_get_rptr_gfx,
7113         .get_wptr = gfx_v9_0_ring_get_wptr_gfx,
7114         .set_wptr = gfx_v9_0_ring_set_wptr_gfx,
7115         .emit_frame_size = /* totally 242 maximum if 16 IBs */
7116                 5 +  /* COND_EXEC */
7117                 7 +  /* PIPELINE_SYNC */
7118                 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
7119                 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
7120                 2 + /* VM_FLUSH */
7121                 8 +  /* FENCE for VM_FLUSH */
7122                 20 + /* GDS switch */
7123                 4 + /* double SWITCH_BUFFER,
7124                        the first COND_EXEC jump to the place just
7125                            prior to this double SWITCH_BUFFER  */
7126                 5 + /* COND_EXEC */
7127                 7 +      /*     HDP_flush */
7128                 4 +      /*     VGT_flush */
7129                 14 + /* CE_META */
7130                 31 + /* DE_META */
7131                 3 + /* CNTX_CTRL */
7132                 5 + /* HDP_INVL */
7133                 8 + 8 + /* FENCE x2 */
7134                 2 + /* SWITCH_BUFFER */
7135                 7, /* gfx_v9_0_emit_mem_sync */
7136         .emit_ib_size = 4, /* gfx_v9_0_ring_emit_ib_gfx */
7137         .emit_ib = gfx_v9_0_ring_emit_ib_gfx,
7138         .emit_fence = gfx_v9_0_ring_emit_fence,
7139         .emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
7140         .emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
7141         .emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
7142         .emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
7143         .test_ring = gfx_v9_0_ring_test_ring,
7144         .insert_nop = amdgpu_ring_insert_nop,
7145         .pad_ib = amdgpu_ring_generic_pad_ib,
7146         .emit_switch_buffer = gfx_v9_ring_emit_sb,
7147         .emit_cntxcntl = gfx_v9_ring_emit_cntxcntl,
7148         .init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec,
7149         .preempt_ib = gfx_v9_0_ring_preempt_ib,
7150         .emit_frame_cntl = gfx_v9_0_ring_emit_frame_cntl,
7151         .emit_wreg = gfx_v9_0_ring_emit_wreg,
7152         .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
7153         .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
7154         .soft_recovery = gfx_v9_0_ring_soft_recovery,
7155         .emit_mem_sync = gfx_v9_0_emit_mem_sync,
7156 };
7157
7158 static const struct amdgpu_ring_funcs gfx_v9_0_sw_ring_funcs_gfx = {
7159         .type = AMDGPU_RING_TYPE_GFX,
7160         .align_mask = 0xff,
7161         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
7162         .support_64bit_ptrs = true,
7163         .secure_submission_supported = true,
7164         .get_rptr = amdgpu_sw_ring_get_rptr_gfx,
7165         .get_wptr = amdgpu_sw_ring_get_wptr_gfx,
7166         .set_wptr = amdgpu_sw_ring_set_wptr_gfx,
7167         .emit_frame_size = /* totally 242 maximum if 16 IBs */
7168                 5 +  /* COND_EXEC */
7169                 7 +  /* PIPELINE_SYNC */
7170                 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
7171                 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
7172                 2 + /* VM_FLUSH */
7173                 8 +  /* FENCE for VM_FLUSH */
7174                 20 + /* GDS switch */
7175                 4 + /* double SWITCH_BUFFER,
7176                      * the first COND_EXEC jump to the place just
7177                      * prior to this double SWITCH_BUFFER
7178                      */
7179                 5 + /* COND_EXEC */
7180                 7 +      /*     HDP_flush */
7181                 4 +      /*     VGT_flush */
7182                 14 + /* CE_META */
7183                 31 + /* DE_META */
7184                 3 + /* CNTX_CTRL */
7185                 5 + /* HDP_INVL */
7186                 8 + 8 + /* FENCE x2 */
7187                 2 + /* SWITCH_BUFFER */
7188                 7, /* gfx_v9_0_emit_mem_sync */
7189         .emit_ib_size = 4, /* gfx_v9_0_ring_emit_ib_gfx */
7190         .emit_ib = gfx_v9_0_ring_emit_ib_gfx,
7191         .emit_fence = gfx_v9_0_ring_emit_fence,
7192         .emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
7193         .emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
7194         .emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
7195         .emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
7196         .test_ring = gfx_v9_0_ring_test_ring,
7197         .test_ib = gfx_v9_0_ring_test_ib,
7198         .insert_nop = amdgpu_sw_ring_insert_nop,
7199         .pad_ib = amdgpu_ring_generic_pad_ib,
7200         .emit_switch_buffer = gfx_v9_ring_emit_sb,
7201         .emit_cntxcntl = gfx_v9_ring_emit_cntxcntl,
7202         .init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec,
7203         .emit_frame_cntl = gfx_v9_0_ring_emit_frame_cntl,
7204         .emit_wreg = gfx_v9_0_ring_emit_wreg,
7205         .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
7206         .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
7207         .soft_recovery = gfx_v9_0_ring_soft_recovery,
7208         .emit_mem_sync = gfx_v9_0_emit_mem_sync,
7209         .patch_cntl = gfx_v9_0_ring_patch_cntl,
7210         .patch_de = gfx_v9_0_ring_patch_de_meta,
7211         .patch_ce = gfx_v9_0_ring_patch_ce_meta,
7212 };
7213
7214 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = {
7215         .type = AMDGPU_RING_TYPE_COMPUTE,
7216         .align_mask = 0xff,
7217         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
7218         .support_64bit_ptrs = true,
7219         .get_rptr = gfx_v9_0_ring_get_rptr_compute,
7220         .get_wptr = gfx_v9_0_ring_get_wptr_compute,
7221         .set_wptr = gfx_v9_0_ring_set_wptr_compute,
7222         .emit_frame_size =
7223                 20 + /* gfx_v9_0_ring_emit_gds_switch */
7224                 7 + /* gfx_v9_0_ring_emit_hdp_flush */
7225                 5 + /* hdp invalidate */
7226                 7 + /* gfx_v9_0_ring_emit_pipeline_sync */
7227                 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
7228                 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
7229                 8 + 8 + 8 + /* gfx_v9_0_ring_emit_fence x3 for user fence, vm fence */
7230                 7 + /* gfx_v9_0_emit_mem_sync */
7231                 5 + /* gfx_v9_0_emit_wave_limit for updating mmSPI_WCL_PIPE_PERCENT_GFX register */
7232                 15, /* for updating 3 mmSPI_WCL_PIPE_PERCENT_CS registers */
7233         .emit_ib_size = 7, /* gfx_v9_0_ring_emit_ib_compute */
7234         .emit_ib = gfx_v9_0_ring_emit_ib_compute,
7235         .emit_fence = gfx_v9_0_ring_emit_fence,
7236         .emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
7237         .emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
7238         .emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
7239         .emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
7240         .test_ring = gfx_v9_0_ring_test_ring,
7241         .test_ib = gfx_v9_0_ring_test_ib,
7242         .insert_nop = amdgpu_ring_insert_nop,
7243         .pad_ib = amdgpu_ring_generic_pad_ib,
7244         .emit_wreg = gfx_v9_0_ring_emit_wreg,
7245         .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
7246         .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
7247         .emit_mem_sync = gfx_v9_0_emit_mem_sync,
7248         .emit_wave_limit = gfx_v9_0_emit_wave_limit,
7249 };
7250
7251 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = {
7252         .type = AMDGPU_RING_TYPE_KIQ,
7253         .align_mask = 0xff,
7254         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
7255         .support_64bit_ptrs = true,
7256         .get_rptr = gfx_v9_0_ring_get_rptr_compute,
7257         .get_wptr = gfx_v9_0_ring_get_wptr_compute,
7258         .set_wptr = gfx_v9_0_ring_set_wptr_compute,
7259         .emit_frame_size =
7260                 20 + /* gfx_v9_0_ring_emit_gds_switch */
7261                 7 + /* gfx_v9_0_ring_emit_hdp_flush */
7262                 5 + /* hdp invalidate */
7263                 7 + /* gfx_v9_0_ring_emit_pipeline_sync */
7264                 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
7265                 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
7266                 8 + 8 + 8, /* gfx_v9_0_ring_emit_fence_kiq x3 for user fence, vm fence */
7267         .emit_ib_size = 7, /* gfx_v9_0_ring_emit_ib_compute */
7268         .emit_fence = gfx_v9_0_ring_emit_fence_kiq,
7269         .test_ring = gfx_v9_0_ring_test_ring,
7270         .insert_nop = amdgpu_ring_insert_nop,
7271         .pad_ib = amdgpu_ring_generic_pad_ib,
7272         .emit_rreg = gfx_v9_0_ring_emit_rreg,
7273         .emit_wreg = gfx_v9_0_ring_emit_wreg,
7274         .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
7275         .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
7276 };
7277
7278 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev)
7279 {
7280         int i;
7281
7282         adev->gfx.kiq[0].ring.funcs = &gfx_v9_0_ring_funcs_kiq;
7283
7284         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
7285                 adev->gfx.gfx_ring[i].funcs = &gfx_v9_0_ring_funcs_gfx;
7286
7287         if (adev->gfx.mcbp && adev->gfx.num_gfx_rings) {
7288                 for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++)
7289                         adev->gfx.sw_gfx_ring[i].funcs = &gfx_v9_0_sw_ring_funcs_gfx;
7290         }
7291
7292         for (i = 0; i < adev->gfx.num_compute_rings; i++)
7293                 adev->gfx.compute_ring[i].funcs = &gfx_v9_0_ring_funcs_compute;
7294 }
7295
7296 static const struct amdgpu_irq_src_funcs gfx_v9_0_eop_irq_funcs = {
7297         .set = gfx_v9_0_set_eop_interrupt_state,
7298         .process = gfx_v9_0_eop_irq,
7299 };
7300
7301 static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_reg_irq_funcs = {
7302         .set = gfx_v9_0_set_priv_reg_fault_state,
7303         .process = gfx_v9_0_priv_reg_irq,
7304 };
7305
7306 static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_inst_irq_funcs = {
7307         .set = gfx_v9_0_set_priv_inst_fault_state,
7308         .process = gfx_v9_0_priv_inst_irq,
7309 };
7310
7311 static const struct amdgpu_irq_src_funcs gfx_v9_0_cp_ecc_error_irq_funcs = {
7312         .set = gfx_v9_0_set_cp_ecc_error_state,
7313         .process = amdgpu_gfx_cp_ecc_error_irq,
7314 };
7315
7316
7317 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev)
7318 {
7319         adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
7320         adev->gfx.eop_irq.funcs = &gfx_v9_0_eop_irq_funcs;
7321
7322         adev->gfx.priv_reg_irq.num_types = 1;
7323         adev->gfx.priv_reg_irq.funcs = &gfx_v9_0_priv_reg_irq_funcs;
7324
7325         adev->gfx.priv_inst_irq.num_types = 1;
7326         adev->gfx.priv_inst_irq.funcs = &gfx_v9_0_priv_inst_irq_funcs;
7327
7328         adev->gfx.cp_ecc_error_irq.num_types = 2; /*C5 ECC error and C9 FUE error*/
7329         adev->gfx.cp_ecc_error_irq.funcs = &gfx_v9_0_cp_ecc_error_irq_funcs;
7330 }
7331
7332 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev)
7333 {
7334         switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
7335         case IP_VERSION(9, 0, 1):
7336         case IP_VERSION(9, 2, 1):
7337         case IP_VERSION(9, 4, 0):
7338         case IP_VERSION(9, 2, 2):
7339         case IP_VERSION(9, 1, 0):
7340         case IP_VERSION(9, 4, 1):
7341         case IP_VERSION(9, 3, 0):
7342         case IP_VERSION(9, 4, 2):
7343                 adev->gfx.rlc.funcs = &gfx_v9_0_rlc_funcs;
7344                 break;
7345         default:
7346                 break;
7347         }
7348 }
7349
7350 static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev)
7351 {
7352         /* init asci gds info */
7353         switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
7354         case IP_VERSION(9, 0, 1):
7355         case IP_VERSION(9, 2, 1):
7356         case IP_VERSION(9, 4, 0):
7357                 adev->gds.gds_size = 0x10000;
7358                 break;
7359         case IP_VERSION(9, 2, 2):
7360         case IP_VERSION(9, 1, 0):
7361         case IP_VERSION(9, 4, 1):
7362                 adev->gds.gds_size = 0x1000;
7363                 break;
7364         case IP_VERSION(9, 4, 2):
7365                 /* aldebaran removed all the GDS internal memory,
7366                  * only support GWS opcode in kernel, like barrier
7367                  * semaphore.etc */
7368                 adev->gds.gds_size = 0;
7369                 break;
7370         default:
7371                 adev->gds.gds_size = 0x10000;
7372                 break;
7373         }
7374
7375         switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
7376         case IP_VERSION(9, 0, 1):
7377         case IP_VERSION(9, 4, 0):
7378                 adev->gds.gds_compute_max_wave_id = 0x7ff;
7379                 break;
7380         case IP_VERSION(9, 2, 1):
7381                 adev->gds.gds_compute_max_wave_id = 0x27f;
7382                 break;
7383         case IP_VERSION(9, 2, 2):
7384         case IP_VERSION(9, 1, 0):
7385                 if (adev->apu_flags & AMD_APU_IS_RAVEN2)
7386                         adev->gds.gds_compute_max_wave_id = 0x77; /* raven2 */
7387                 else
7388                         adev->gds.gds_compute_max_wave_id = 0x15f; /* raven1 */
7389                 break;
7390         case IP_VERSION(9, 4, 1):
7391                 adev->gds.gds_compute_max_wave_id = 0xfff;
7392                 break;
7393         case IP_VERSION(9, 4, 2):
7394                 /* deprecated for Aldebaran, no usage at all */
7395                 adev->gds.gds_compute_max_wave_id = 0;
7396                 break;
7397         default:
7398                 /* this really depends on the chip */
7399                 adev->gds.gds_compute_max_wave_id = 0x7ff;
7400                 break;
7401         }
7402
7403         adev->gds.gws_size = 64;
7404         adev->gds.oa_size = 16;
7405 }
7406
7407 static void gfx_v9_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
7408                                                  u32 bitmap)
7409 {
7410         u32 data;
7411
7412         if (!bitmap)
7413                 return;
7414
7415         data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
7416         data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
7417
7418         WREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG, data);
7419 }
7420
7421 static u32 gfx_v9_0_get_cu_active_bitmap(struct amdgpu_device *adev)
7422 {
7423         u32 data, mask;
7424
7425         data = RREG32_SOC15(GC, 0, mmCC_GC_SHADER_ARRAY_CONFIG);
7426         data |= RREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG);
7427
7428         data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
7429         data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
7430
7431         mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh);
7432
7433         return (~data) & mask;
7434 }
7435
7436 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
7437                                  struct amdgpu_cu_info *cu_info)
7438 {
7439         int i, j, k, counter, active_cu_number = 0;
7440         u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
7441         unsigned disable_masks[4 * 4];
7442
7443         if (!adev || !cu_info)
7444                 return -EINVAL;
7445
7446         /*
7447          * 16 comes from bitmap array size 4*4, and it can cover all gfx9 ASICs
7448          */
7449         if (adev->gfx.config.max_shader_engines *
7450                 adev->gfx.config.max_sh_per_se > 16)
7451                 return -EINVAL;
7452
7453         amdgpu_gfx_parse_disable_cu(disable_masks,
7454                                     adev->gfx.config.max_shader_engines,
7455                                     adev->gfx.config.max_sh_per_se);
7456
7457         mutex_lock(&adev->grbm_idx_mutex);
7458         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
7459                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
7460                         mask = 1;
7461                         ao_bitmap = 0;
7462                         counter = 0;
7463                         amdgpu_gfx_select_se_sh(adev, i, j, 0xffffffff, 0);
7464                         gfx_v9_0_set_user_cu_inactive_bitmap(
7465                                 adev, disable_masks[i * adev->gfx.config.max_sh_per_se + j]);
7466                         bitmap = gfx_v9_0_get_cu_active_bitmap(adev);
7467
7468                         /*
7469                          * The bitmap(and ao_cu_bitmap) in cu_info structure is
7470                          * 4x4 size array, and it's usually suitable for Vega
7471                          * ASICs which has 4*2 SE/SH layout.
7472                          * But for Arcturus, SE/SH layout is changed to 8*1.
7473                          * To mostly reduce the impact, we make it compatible
7474                          * with current bitmap array as below:
7475                          *    SE4,SH0 --> bitmap[0][1]
7476                          *    SE5,SH0 --> bitmap[1][1]
7477                          *    SE6,SH0 --> bitmap[2][1]
7478                          *    SE7,SH0 --> bitmap[3][1]
7479                          */
7480                         cu_info->bitmap[0][i % 4][j + i / 4] = bitmap;
7481
7482                         for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
7483                                 if (bitmap & mask) {
7484                                         if (counter < adev->gfx.config.max_cu_per_sh)
7485                                                 ao_bitmap |= mask;
7486                                         counter ++;
7487                                 }
7488                                 mask <<= 1;
7489                         }
7490                         active_cu_number += counter;
7491                         if (i < 2 && j < 2)
7492                                 ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
7493                         cu_info->ao_cu_bitmap[i % 4][j + i / 4] = ao_bitmap;
7494                 }
7495         }
7496         amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
7497         mutex_unlock(&adev->grbm_idx_mutex);
7498
7499         cu_info->number = active_cu_number;
7500         cu_info->ao_cu_mask = ao_cu_mask;
7501         cu_info->simd_per_cu = NUM_SIMD_PER_CU;
7502
7503         return 0;
7504 }
7505
7506 const struct amdgpu_ip_block_version gfx_v9_0_ip_block =
7507 {
7508         .type = AMD_IP_BLOCK_TYPE_GFX,
7509         .major = 9,
7510         .minor = 0,
7511         .rev = 0,
7512         .funcs = &gfx_v9_0_ip_funcs,
7513 };
This page took 0.51065 seconds and 4 git commands to generate.