]> Git Repo - linux.git/blob - drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
Merge tag '6.13-rc-part1-SMB3-client-fixes' of git://git.samba.org/sfrench/cifs-2.6
[linux.git] / drivers / gpu / drm / amd / amdgpu / gfx_v9_0.c
1 /*
2  * Copyright 2016 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23
24 #include <linux/delay.h>
25 #include <linux/kernel.h>
26 #include <linux/firmware.h>
27 #include <linux/module.h>
28 #include <linux/pci.h>
29
30 #include "amdgpu.h"
31 #include "amdgpu_gfx.h"
32 #include "soc15.h"
33 #include "soc15d.h"
34 #include "amdgpu_atomfirmware.h"
35 #include "amdgpu_pm.h"
36
37 #include "gc/gc_9_0_offset.h"
38 #include "gc/gc_9_0_sh_mask.h"
39
40 #include "vega10_enum.h"
41
42 #include "soc15_common.h"
43 #include "clearstate_gfx9.h"
44 #include "v9_structs.h"
45
46 #include "ivsrcid/gfx/irqsrcs_gfx_9_0.h"
47
48 #include "amdgpu_ras.h"
49
50 #include "amdgpu_ring_mux.h"
51 #include "gfx_v9_4.h"
52 #include "gfx_v9_0.h"
53 #include "gfx_v9_0_cleaner_shader.h"
54 #include "gfx_v9_4_2.h"
55
56 #include "asic_reg/pwr/pwr_10_0_offset.h"
57 #include "asic_reg/pwr/pwr_10_0_sh_mask.h"
58 #include "asic_reg/gc/gc_9_0_default.h"
59
60 #define GFX9_NUM_GFX_RINGS     1
61 #define GFX9_NUM_SW_GFX_RINGS  2
62 #define GFX9_MEC_HPD_SIZE 4096
63 #define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L
64 #define RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET 0x00000000L
65
66 #define mmGCEA_PROBE_MAP                        0x070c
67 #define mmGCEA_PROBE_MAP_BASE_IDX               0
68
69 MODULE_FIRMWARE("amdgpu/vega10_ce.bin");
70 MODULE_FIRMWARE("amdgpu/vega10_pfp.bin");
71 MODULE_FIRMWARE("amdgpu/vega10_me.bin");
72 MODULE_FIRMWARE("amdgpu/vega10_mec.bin");
73 MODULE_FIRMWARE("amdgpu/vega10_mec2.bin");
74 MODULE_FIRMWARE("amdgpu/vega10_rlc.bin");
75
76 MODULE_FIRMWARE("amdgpu/vega12_ce.bin");
77 MODULE_FIRMWARE("amdgpu/vega12_pfp.bin");
78 MODULE_FIRMWARE("amdgpu/vega12_me.bin");
79 MODULE_FIRMWARE("amdgpu/vega12_mec.bin");
80 MODULE_FIRMWARE("amdgpu/vega12_mec2.bin");
81 MODULE_FIRMWARE("amdgpu/vega12_rlc.bin");
82
83 MODULE_FIRMWARE("amdgpu/vega20_ce.bin");
84 MODULE_FIRMWARE("amdgpu/vega20_pfp.bin");
85 MODULE_FIRMWARE("amdgpu/vega20_me.bin");
86 MODULE_FIRMWARE("amdgpu/vega20_mec.bin");
87 MODULE_FIRMWARE("amdgpu/vega20_mec2.bin");
88 MODULE_FIRMWARE("amdgpu/vega20_rlc.bin");
89
90 MODULE_FIRMWARE("amdgpu/raven_ce.bin");
91 MODULE_FIRMWARE("amdgpu/raven_pfp.bin");
92 MODULE_FIRMWARE("amdgpu/raven_me.bin");
93 MODULE_FIRMWARE("amdgpu/raven_mec.bin");
94 MODULE_FIRMWARE("amdgpu/raven_mec2.bin");
95 MODULE_FIRMWARE("amdgpu/raven_rlc.bin");
96
97 MODULE_FIRMWARE("amdgpu/picasso_ce.bin");
98 MODULE_FIRMWARE("amdgpu/picasso_pfp.bin");
99 MODULE_FIRMWARE("amdgpu/picasso_me.bin");
100 MODULE_FIRMWARE("amdgpu/picasso_mec.bin");
101 MODULE_FIRMWARE("amdgpu/picasso_mec2.bin");
102 MODULE_FIRMWARE("amdgpu/picasso_rlc.bin");
103 MODULE_FIRMWARE("amdgpu/picasso_rlc_am4.bin");
104
105 MODULE_FIRMWARE("amdgpu/raven2_ce.bin");
106 MODULE_FIRMWARE("amdgpu/raven2_pfp.bin");
107 MODULE_FIRMWARE("amdgpu/raven2_me.bin");
108 MODULE_FIRMWARE("amdgpu/raven2_mec.bin");
109 MODULE_FIRMWARE("amdgpu/raven2_mec2.bin");
110 MODULE_FIRMWARE("amdgpu/raven2_rlc.bin");
111 MODULE_FIRMWARE("amdgpu/raven_kicker_rlc.bin");
112
113 MODULE_FIRMWARE("amdgpu/arcturus_mec.bin");
114 MODULE_FIRMWARE("amdgpu/arcturus_rlc.bin");
115
116 MODULE_FIRMWARE("amdgpu/renoir_ce.bin");
117 MODULE_FIRMWARE("amdgpu/renoir_pfp.bin");
118 MODULE_FIRMWARE("amdgpu/renoir_me.bin");
119 MODULE_FIRMWARE("amdgpu/renoir_mec.bin");
120 MODULE_FIRMWARE("amdgpu/renoir_rlc.bin");
121
122 MODULE_FIRMWARE("amdgpu/green_sardine_ce.bin");
123 MODULE_FIRMWARE("amdgpu/green_sardine_pfp.bin");
124 MODULE_FIRMWARE("amdgpu/green_sardine_me.bin");
125 MODULE_FIRMWARE("amdgpu/green_sardine_mec.bin");
126 MODULE_FIRMWARE("amdgpu/green_sardine_mec2.bin");
127 MODULE_FIRMWARE("amdgpu/green_sardine_rlc.bin");
128
129 MODULE_FIRMWARE("amdgpu/aldebaran_mec.bin");
130 MODULE_FIRMWARE("amdgpu/aldebaran_mec2.bin");
131 MODULE_FIRMWARE("amdgpu/aldebaran_rlc.bin");
132 MODULE_FIRMWARE("amdgpu/aldebaran_sjt_mec.bin");
133 MODULE_FIRMWARE("amdgpu/aldebaran_sjt_mec2.bin");
134
135 #define mmTCP_CHAN_STEER_0_ARCT                                                         0x0b03
136 #define mmTCP_CHAN_STEER_0_ARCT_BASE_IDX                                                        0
137 #define mmTCP_CHAN_STEER_1_ARCT                                                         0x0b04
138 #define mmTCP_CHAN_STEER_1_ARCT_BASE_IDX                                                        0
139 #define mmTCP_CHAN_STEER_2_ARCT                                                         0x0b09
140 #define mmTCP_CHAN_STEER_2_ARCT_BASE_IDX                                                        0
141 #define mmTCP_CHAN_STEER_3_ARCT                                                         0x0b0a
142 #define mmTCP_CHAN_STEER_3_ARCT_BASE_IDX                                                        0
143 #define mmTCP_CHAN_STEER_4_ARCT                                                         0x0b0b
144 #define mmTCP_CHAN_STEER_4_ARCT_BASE_IDX                                                        0
145 #define mmTCP_CHAN_STEER_5_ARCT                                                         0x0b0c
146 #define mmTCP_CHAN_STEER_5_ARCT_BASE_IDX                                                        0
147
148 #define mmGOLDEN_TSC_COUNT_UPPER_Renoir                0x0025
149 #define mmGOLDEN_TSC_COUNT_UPPER_Renoir_BASE_IDX       1
150 #define mmGOLDEN_TSC_COUNT_LOWER_Renoir                0x0026
151 #define mmGOLDEN_TSC_COUNT_LOWER_Renoir_BASE_IDX       1
152
153 static const struct amdgpu_hwip_reg_entry gc_reg_list_9[] = {
154         SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS),
155         SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS2),
156         SOC15_REG_ENTRY_STR(GC, 0, mmCP_STALLED_STAT1),
157         SOC15_REG_ENTRY_STR(GC, 0, mmCP_STALLED_STAT2),
158         SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPC_STALLED_STAT1),
159         SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPF_STALLED_STAT1),
160         SOC15_REG_ENTRY_STR(GC, 0, mmCP_BUSY_STAT),
161         SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPC_BUSY_STAT),
162         SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPF_BUSY_STAT),
163         SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPF_STATUS),
164         SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_ERROR),
165         SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB_BASE),
166         SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB_RPTR),
167         SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB_WPTR),
168         SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB0_BASE),
169         SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB0_RPTR),
170         SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB0_WPTR),
171         SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB1_BASE),
172         SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB1_RPTR),
173         SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB1_WPTR),
174         SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB2_BASE),
175         SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB2_WPTR),
176         SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB2_WPTR),
177         SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB1_CMD_BUFSZ),
178         SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB2_CMD_BUFSZ),
179         SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB1_CMD_BUFSZ),
180         SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB2_CMD_BUFSZ),
181         SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB1_BASE_LO),
182         SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB1_BASE_HI),
183         SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB1_BUFSZ),
184         SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB2_BASE_LO),
185         SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB2_BASE_HI),
186         SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB2_BUFSZ),
187         SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB1_BASE_LO),
188         SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB1_BASE_HI),
189         SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB1_BUFSZ),
190         SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB2_BASE_LO),
191         SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB2_BASE_HI),
192         SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB2_BUFSZ),
193         SOC15_REG_ENTRY_STR(GC, 0, mmCPF_UTCL1_STATUS),
194         SOC15_REG_ENTRY_STR(GC, 0, mmCPC_UTCL1_STATUS),
195         SOC15_REG_ENTRY_STR(GC, 0, mmCPG_UTCL1_STATUS),
196         SOC15_REG_ENTRY_STR(GC, 0, mmGDS_PROTECTION_FAULT),
197         SOC15_REG_ENTRY_STR(GC, 0, mmGDS_VM_PROTECTION_FAULT),
198         SOC15_REG_ENTRY_STR(GC, 0, mmIA_UTCL1_STATUS),
199         SOC15_REG_ENTRY_STR(GC, 0, mmIA_UTCL1_CNTL),
200         SOC15_REG_ENTRY_STR(GC, 0, mmPA_CL_CNTL_STATUS),
201         SOC15_REG_ENTRY_STR(GC, 0, mmRLC_UTCL1_STATUS),
202         SOC15_REG_ENTRY_STR(GC, 0, mmRMI_UTCL1_STATUS),
203         SOC15_REG_ENTRY_STR(GC, 0, mmSQC_DCACHE_UTCL1_STATUS),
204         SOC15_REG_ENTRY_STR(GC, 0, mmSQC_ICACHE_UTCL1_STATUS),
205         SOC15_REG_ENTRY_STR(GC, 0, mmSQ_UTCL1_STATUS),
206         SOC15_REG_ENTRY_STR(GC, 0, mmTCP_UTCL1_STATUS),
207         SOC15_REG_ENTRY_STR(GC, 0, mmWD_UTCL1_STATUS),
208         SOC15_REG_ENTRY_STR(GC, 0, mmVM_L2_PROTECTION_FAULT_CNTL),
209         SOC15_REG_ENTRY_STR(GC, 0, mmVM_L2_PROTECTION_FAULT_STATUS),
210         SOC15_REG_ENTRY_STR(GC, 0, mmCP_DEBUG),
211         SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_CNTL),
212         SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_INSTR_PNTR),
213         SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC1_INSTR_PNTR),
214         SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC2_INSTR_PNTR),
215         SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_INSTR_PNTR),
216         SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_INSTR_PNTR),
217         SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPC_STATUS),
218         SOC15_REG_ENTRY_STR(GC, 0, mmRLC_STAT),
219         SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SMU_COMMAND),
220         SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SMU_MESSAGE),
221         SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SMU_ARGUMENT_1),
222         SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SMU_ARGUMENT_2),
223         SOC15_REG_ENTRY_STR(GC, 0, mmSMU_RLC_RESPONSE),
224         SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SAFE_MODE),
225         SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SMU_SAFE_MODE),
226         SOC15_REG_ENTRY_STR(GC, 0, mmRLC_INT_STAT),
227         SOC15_REG_ENTRY_STR(GC, 0, mmRLC_GPM_GENERAL_6),
228         /* cp header registers */
229         SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_HEADER_DUMP),
230         SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME1_HEADER_DUMP),
231         SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME2_HEADER_DUMP),
232         SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_HEADER_DUMP),
233         SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_HEADER_DUMP),
234         /* SE status registers */
235         SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS_SE0),
236         SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS_SE1),
237         SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS_SE2),
238         SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS_SE3)
239 };
240
241 static const struct amdgpu_hwip_reg_entry gc_cp_reg_list_9[] = {
242         /* compute queue registers */
243         SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_VMID),
244         SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_ACTIVE),
245         SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PERSISTENT_STATE),
246         SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PIPE_PRIORITY),
247         SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_QUEUE_PRIORITY),
248         SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_QUANTUM),
249         SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_BASE),
250         SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_BASE_HI),
251         SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_RPTR),
252         SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR),
253         SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI),
254         SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL),
255         SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_CONTROL),
256         SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_IB_BASE_ADDR),
257         SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_IB_BASE_ADDR_HI),
258         SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_IB_RPTR),
259         SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_IB_CONTROL),
260         SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_DEQUEUE_REQUEST),
261         SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_BASE_ADDR),
262         SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI),
263         SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_CONTROL),
264         SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_RPTR),
265         SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_WPTR),
266         SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_EVENTS),
267         SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_CTX_SAVE_BASE_ADDR_LO),
268         SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_CTX_SAVE_BASE_ADDR_HI),
269         SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_CTX_SAVE_CONTROL),
270         SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_CNTL_STACK_OFFSET),
271         SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_CNTL_STACK_SIZE),
272         SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_WG_STATE_OFFSET),
273         SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_CTX_SAVE_SIZE),
274         SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_GDS_RESOURCE_STATE),
275         SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_ERROR),
276         SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_WPTR_MEM),
277         SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_WPTR_LO),
278         SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_WPTR_HI),
279         SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_GFX_STATUS),
280 };
281
282 enum ta_ras_gfx_subblock {
283         /*CPC*/
284         TA_RAS_BLOCK__GFX_CPC_INDEX_START = 0,
285         TA_RAS_BLOCK__GFX_CPC_SCRATCH = TA_RAS_BLOCK__GFX_CPC_INDEX_START,
286         TA_RAS_BLOCK__GFX_CPC_UCODE,
287         TA_RAS_BLOCK__GFX_DC_STATE_ME1,
288         TA_RAS_BLOCK__GFX_DC_CSINVOC_ME1,
289         TA_RAS_BLOCK__GFX_DC_RESTORE_ME1,
290         TA_RAS_BLOCK__GFX_DC_STATE_ME2,
291         TA_RAS_BLOCK__GFX_DC_CSINVOC_ME2,
292         TA_RAS_BLOCK__GFX_DC_RESTORE_ME2,
293         TA_RAS_BLOCK__GFX_CPC_INDEX_END = TA_RAS_BLOCK__GFX_DC_RESTORE_ME2,
294         /* CPF*/
295         TA_RAS_BLOCK__GFX_CPF_INDEX_START,
296         TA_RAS_BLOCK__GFX_CPF_ROQ_ME2 = TA_RAS_BLOCK__GFX_CPF_INDEX_START,
297         TA_RAS_BLOCK__GFX_CPF_ROQ_ME1,
298         TA_RAS_BLOCK__GFX_CPF_TAG,
299         TA_RAS_BLOCK__GFX_CPF_INDEX_END = TA_RAS_BLOCK__GFX_CPF_TAG,
300         /* CPG*/
301         TA_RAS_BLOCK__GFX_CPG_INDEX_START,
302         TA_RAS_BLOCK__GFX_CPG_DMA_ROQ = TA_RAS_BLOCK__GFX_CPG_INDEX_START,
303         TA_RAS_BLOCK__GFX_CPG_DMA_TAG,
304         TA_RAS_BLOCK__GFX_CPG_TAG,
305         TA_RAS_BLOCK__GFX_CPG_INDEX_END = TA_RAS_BLOCK__GFX_CPG_TAG,
306         /* GDS*/
307         TA_RAS_BLOCK__GFX_GDS_INDEX_START,
308         TA_RAS_BLOCK__GFX_GDS_MEM = TA_RAS_BLOCK__GFX_GDS_INDEX_START,
309         TA_RAS_BLOCK__GFX_GDS_INPUT_QUEUE,
310         TA_RAS_BLOCK__GFX_GDS_OA_PHY_CMD_RAM_MEM,
311         TA_RAS_BLOCK__GFX_GDS_OA_PHY_DATA_RAM_MEM,
312         TA_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM,
313         TA_RAS_BLOCK__GFX_GDS_INDEX_END = TA_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM,
314         /* SPI*/
315         TA_RAS_BLOCK__GFX_SPI_SR_MEM,
316         /* SQ*/
317         TA_RAS_BLOCK__GFX_SQ_INDEX_START,
318         TA_RAS_BLOCK__GFX_SQ_SGPR = TA_RAS_BLOCK__GFX_SQ_INDEX_START,
319         TA_RAS_BLOCK__GFX_SQ_LDS_D,
320         TA_RAS_BLOCK__GFX_SQ_LDS_I,
321         TA_RAS_BLOCK__GFX_SQ_VGPR, /* VGPR = SP*/
322         TA_RAS_BLOCK__GFX_SQ_INDEX_END = TA_RAS_BLOCK__GFX_SQ_VGPR,
323         /* SQC (3 ranges)*/
324         TA_RAS_BLOCK__GFX_SQC_INDEX_START,
325         /* SQC range 0*/
326         TA_RAS_BLOCK__GFX_SQC_INDEX0_START = TA_RAS_BLOCK__GFX_SQC_INDEX_START,
327         TA_RAS_BLOCK__GFX_SQC_INST_UTCL1_LFIFO =
328                 TA_RAS_BLOCK__GFX_SQC_INDEX0_START,
329         TA_RAS_BLOCK__GFX_SQC_DATA_CU0_WRITE_DATA_BUF,
330         TA_RAS_BLOCK__GFX_SQC_DATA_CU0_UTCL1_LFIFO,
331         TA_RAS_BLOCK__GFX_SQC_DATA_CU1_WRITE_DATA_BUF,
332         TA_RAS_BLOCK__GFX_SQC_DATA_CU1_UTCL1_LFIFO,
333         TA_RAS_BLOCK__GFX_SQC_DATA_CU2_WRITE_DATA_BUF,
334         TA_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO,
335         TA_RAS_BLOCK__GFX_SQC_INDEX0_END =
336                 TA_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO,
337         /* SQC range 1*/
338         TA_RAS_BLOCK__GFX_SQC_INDEX1_START,
339         TA_RAS_BLOCK__GFX_SQC_INST_BANKA_TAG_RAM =
340                 TA_RAS_BLOCK__GFX_SQC_INDEX1_START,
341         TA_RAS_BLOCK__GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO,
342         TA_RAS_BLOCK__GFX_SQC_INST_BANKA_MISS_FIFO,
343         TA_RAS_BLOCK__GFX_SQC_INST_BANKA_BANK_RAM,
344         TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_TAG_RAM,
345         TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_HIT_FIFO,
346         TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_MISS_FIFO,
347         TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM,
348         TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM,
349         TA_RAS_BLOCK__GFX_SQC_INDEX1_END =
350                 TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM,
351         /* SQC range 2*/
352         TA_RAS_BLOCK__GFX_SQC_INDEX2_START,
353         TA_RAS_BLOCK__GFX_SQC_INST_BANKB_TAG_RAM =
354                 TA_RAS_BLOCK__GFX_SQC_INDEX2_START,
355         TA_RAS_BLOCK__GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO,
356         TA_RAS_BLOCK__GFX_SQC_INST_BANKB_MISS_FIFO,
357         TA_RAS_BLOCK__GFX_SQC_INST_BANKB_BANK_RAM,
358         TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_TAG_RAM,
359         TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_HIT_FIFO,
360         TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_MISS_FIFO,
361         TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM,
362         TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM,
363         TA_RAS_BLOCK__GFX_SQC_INDEX2_END =
364                 TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM,
365         TA_RAS_BLOCK__GFX_SQC_INDEX_END = TA_RAS_BLOCK__GFX_SQC_INDEX2_END,
366         /* TA*/
367         TA_RAS_BLOCK__GFX_TA_INDEX_START,
368         TA_RAS_BLOCK__GFX_TA_FS_DFIFO = TA_RAS_BLOCK__GFX_TA_INDEX_START,
369         TA_RAS_BLOCK__GFX_TA_FS_AFIFO,
370         TA_RAS_BLOCK__GFX_TA_FL_LFIFO,
371         TA_RAS_BLOCK__GFX_TA_FX_LFIFO,
372         TA_RAS_BLOCK__GFX_TA_FS_CFIFO,
373         TA_RAS_BLOCK__GFX_TA_INDEX_END = TA_RAS_BLOCK__GFX_TA_FS_CFIFO,
374         /* TCA*/
375         TA_RAS_BLOCK__GFX_TCA_INDEX_START,
376         TA_RAS_BLOCK__GFX_TCA_HOLE_FIFO = TA_RAS_BLOCK__GFX_TCA_INDEX_START,
377         TA_RAS_BLOCK__GFX_TCA_REQ_FIFO,
378         TA_RAS_BLOCK__GFX_TCA_INDEX_END = TA_RAS_BLOCK__GFX_TCA_REQ_FIFO,
379         /* TCC (5 sub-ranges)*/
380         TA_RAS_BLOCK__GFX_TCC_INDEX_START,
381         /* TCC range 0*/
382         TA_RAS_BLOCK__GFX_TCC_INDEX0_START = TA_RAS_BLOCK__GFX_TCC_INDEX_START,
383         TA_RAS_BLOCK__GFX_TCC_CACHE_DATA = TA_RAS_BLOCK__GFX_TCC_INDEX0_START,
384         TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_0_1,
385         TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_0,
386         TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_1,
387         TA_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_0,
388         TA_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_1,
389         TA_RAS_BLOCK__GFX_TCC_HIGH_RATE_TAG,
390         TA_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG,
391         TA_RAS_BLOCK__GFX_TCC_INDEX0_END = TA_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG,
392         /* TCC range 1*/
393         TA_RAS_BLOCK__GFX_TCC_INDEX1_START,
394         TA_RAS_BLOCK__GFX_TCC_IN_USE_DEC = TA_RAS_BLOCK__GFX_TCC_INDEX1_START,
395         TA_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER,
396         TA_RAS_BLOCK__GFX_TCC_INDEX1_END =
397                 TA_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER,
398         /* TCC range 2*/
399         TA_RAS_BLOCK__GFX_TCC_INDEX2_START,
400         TA_RAS_BLOCK__GFX_TCC_RETURN_DATA = TA_RAS_BLOCK__GFX_TCC_INDEX2_START,
401         TA_RAS_BLOCK__GFX_TCC_RETURN_CONTROL,
402         TA_RAS_BLOCK__GFX_TCC_UC_ATOMIC_FIFO,
403         TA_RAS_BLOCK__GFX_TCC_WRITE_RETURN,
404         TA_RAS_BLOCK__GFX_TCC_WRITE_CACHE_READ,
405         TA_RAS_BLOCK__GFX_TCC_SRC_FIFO,
406         TA_RAS_BLOCK__GFX_TCC_SRC_FIFO_NEXT_RAM,
407         TA_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO,
408         TA_RAS_BLOCK__GFX_TCC_INDEX2_END =
409                 TA_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO,
410         /* TCC range 3*/
411         TA_RAS_BLOCK__GFX_TCC_INDEX3_START,
412         TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO = TA_RAS_BLOCK__GFX_TCC_INDEX3_START,
413         TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM,
414         TA_RAS_BLOCK__GFX_TCC_INDEX3_END =
415                 TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM,
416         /* TCC range 4*/
417         TA_RAS_BLOCK__GFX_TCC_INDEX4_START,
418         TA_RAS_BLOCK__GFX_TCC_WRRET_TAG_WRITE_RETURN =
419                 TA_RAS_BLOCK__GFX_TCC_INDEX4_START,
420         TA_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER,
421         TA_RAS_BLOCK__GFX_TCC_INDEX4_END =
422                 TA_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER,
423         TA_RAS_BLOCK__GFX_TCC_INDEX_END = TA_RAS_BLOCK__GFX_TCC_INDEX4_END,
424         /* TCI*/
425         TA_RAS_BLOCK__GFX_TCI_WRITE_RAM,
426         /* TCP*/
427         TA_RAS_BLOCK__GFX_TCP_INDEX_START,
428         TA_RAS_BLOCK__GFX_TCP_CACHE_RAM = TA_RAS_BLOCK__GFX_TCP_INDEX_START,
429         TA_RAS_BLOCK__GFX_TCP_LFIFO_RAM,
430         TA_RAS_BLOCK__GFX_TCP_CMD_FIFO,
431         TA_RAS_BLOCK__GFX_TCP_VM_FIFO,
432         TA_RAS_BLOCK__GFX_TCP_DB_RAM,
433         TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO0,
434         TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1,
435         TA_RAS_BLOCK__GFX_TCP_INDEX_END = TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1,
436         /* TD*/
437         TA_RAS_BLOCK__GFX_TD_INDEX_START,
438         TA_RAS_BLOCK__GFX_TD_SS_FIFO_LO = TA_RAS_BLOCK__GFX_TD_INDEX_START,
439         TA_RAS_BLOCK__GFX_TD_SS_FIFO_HI,
440         TA_RAS_BLOCK__GFX_TD_CS_FIFO,
441         TA_RAS_BLOCK__GFX_TD_INDEX_END = TA_RAS_BLOCK__GFX_TD_CS_FIFO,
442         /* EA (3 sub-ranges)*/
443         TA_RAS_BLOCK__GFX_EA_INDEX_START,
444         /* EA range 0*/
445         TA_RAS_BLOCK__GFX_EA_INDEX0_START = TA_RAS_BLOCK__GFX_EA_INDEX_START,
446         TA_RAS_BLOCK__GFX_EA_DRAMRD_CMDMEM = TA_RAS_BLOCK__GFX_EA_INDEX0_START,
447         TA_RAS_BLOCK__GFX_EA_DRAMWR_CMDMEM,
448         TA_RAS_BLOCK__GFX_EA_DRAMWR_DATAMEM,
449         TA_RAS_BLOCK__GFX_EA_RRET_TAGMEM,
450         TA_RAS_BLOCK__GFX_EA_WRET_TAGMEM,
451         TA_RAS_BLOCK__GFX_EA_GMIRD_CMDMEM,
452         TA_RAS_BLOCK__GFX_EA_GMIWR_CMDMEM,
453         TA_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM,
454         TA_RAS_BLOCK__GFX_EA_INDEX0_END = TA_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM,
455         /* EA range 1*/
456         TA_RAS_BLOCK__GFX_EA_INDEX1_START,
457         TA_RAS_BLOCK__GFX_EA_DRAMRD_PAGEMEM = TA_RAS_BLOCK__GFX_EA_INDEX1_START,
458         TA_RAS_BLOCK__GFX_EA_DRAMWR_PAGEMEM,
459         TA_RAS_BLOCK__GFX_EA_IORD_CMDMEM,
460         TA_RAS_BLOCK__GFX_EA_IOWR_CMDMEM,
461         TA_RAS_BLOCK__GFX_EA_IOWR_DATAMEM,
462         TA_RAS_BLOCK__GFX_EA_GMIRD_PAGEMEM,
463         TA_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM,
464         TA_RAS_BLOCK__GFX_EA_INDEX1_END = TA_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM,
465         /* EA range 2*/
466         TA_RAS_BLOCK__GFX_EA_INDEX2_START,
467         TA_RAS_BLOCK__GFX_EA_MAM_D0MEM = TA_RAS_BLOCK__GFX_EA_INDEX2_START,
468         TA_RAS_BLOCK__GFX_EA_MAM_D1MEM,
469         TA_RAS_BLOCK__GFX_EA_MAM_D2MEM,
470         TA_RAS_BLOCK__GFX_EA_MAM_D3MEM,
471         TA_RAS_BLOCK__GFX_EA_INDEX2_END = TA_RAS_BLOCK__GFX_EA_MAM_D3MEM,
472         TA_RAS_BLOCK__GFX_EA_INDEX_END = TA_RAS_BLOCK__GFX_EA_INDEX2_END,
473         /* UTC VM L2 bank*/
474         TA_RAS_BLOCK__UTC_VML2_BANK_CACHE,
475         /* UTC VM walker*/
476         TA_RAS_BLOCK__UTC_VML2_WALKER,
477         /* UTC ATC L2 2MB cache*/
478         TA_RAS_BLOCK__UTC_ATCL2_CACHE_2M_BANK,
479         /* UTC ATC L2 4KB cache*/
480         TA_RAS_BLOCK__UTC_ATCL2_CACHE_4K_BANK,
481         TA_RAS_BLOCK__GFX_MAX
482 };
483
484 struct ras_gfx_subblock {
485         unsigned char *name;
486         int ta_subblock;
487         int hw_supported_error_type;
488         int sw_supported_error_type;
489 };
490
491 #define AMDGPU_RAS_SUB_BLOCK(subblock, a, b, c, d, e, f, g, h)                             \
492         [AMDGPU_RAS_BLOCK__##subblock] = {                                     \
493                 #subblock,                                                     \
494                 TA_RAS_BLOCK__##subblock,                                      \
495                 ((a) | ((b) << 1) | ((c) << 2) | ((d) << 3)),                  \
496                 (((e) << 1) | ((f) << 3) | (g) | ((h) << 2)),                  \
497         }
498
499 static const struct ras_gfx_subblock ras_gfx_subblocks[] = {
500         AMDGPU_RAS_SUB_BLOCK(GFX_CPC_SCRATCH, 0, 1, 1, 1, 1, 0, 0, 1),
501         AMDGPU_RAS_SUB_BLOCK(GFX_CPC_UCODE, 0, 1, 1, 1, 1, 0, 0, 1),
502         AMDGPU_RAS_SUB_BLOCK(GFX_DC_STATE_ME1, 1, 0, 0, 1, 0, 0, 1, 0),
503         AMDGPU_RAS_SUB_BLOCK(GFX_DC_CSINVOC_ME1, 1, 0, 0, 1, 0, 0, 0, 0),
504         AMDGPU_RAS_SUB_BLOCK(GFX_DC_RESTORE_ME1, 1, 0, 0, 1, 0, 0, 0, 0),
505         AMDGPU_RAS_SUB_BLOCK(GFX_DC_STATE_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
506         AMDGPU_RAS_SUB_BLOCK(GFX_DC_CSINVOC_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
507         AMDGPU_RAS_SUB_BLOCK(GFX_DC_RESTORE_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
508         AMDGPU_RAS_SUB_BLOCK(GFX_CPF_ROQ_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
509         AMDGPU_RAS_SUB_BLOCK(GFX_CPF_ROQ_ME1, 1, 0, 0, 1, 0, 0, 1, 0),
510         AMDGPU_RAS_SUB_BLOCK(GFX_CPF_TAG, 0, 1, 1, 1, 1, 0, 0, 1),
511         AMDGPU_RAS_SUB_BLOCK(GFX_CPG_DMA_ROQ, 1, 0, 0, 1, 0, 0, 1, 0),
512         AMDGPU_RAS_SUB_BLOCK(GFX_CPG_DMA_TAG, 0, 1, 1, 1, 0, 1, 0, 1),
513         AMDGPU_RAS_SUB_BLOCK(GFX_CPG_TAG, 0, 1, 1, 1, 1, 1, 0, 1),
514         AMDGPU_RAS_SUB_BLOCK(GFX_GDS_MEM, 0, 1, 1, 1, 0, 0, 0, 0),
515         AMDGPU_RAS_SUB_BLOCK(GFX_GDS_INPUT_QUEUE, 1, 0, 0, 1, 0, 0, 0, 0),
516         AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PHY_CMD_RAM_MEM, 0, 1, 1, 1, 0, 0, 0,
517                              0),
518         AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PHY_DATA_RAM_MEM, 1, 0, 0, 1, 0, 0, 0,
519                              0),
520         AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PIPE_MEM, 0, 1, 1, 1, 0, 0, 0, 0),
521         AMDGPU_RAS_SUB_BLOCK(GFX_SPI_SR_MEM, 1, 0, 0, 1, 0, 0, 0, 0),
522         AMDGPU_RAS_SUB_BLOCK(GFX_SQ_SGPR, 0, 1, 1, 1, 0, 0, 0, 0),
523         AMDGPU_RAS_SUB_BLOCK(GFX_SQ_LDS_D, 0, 1, 1, 1, 1, 0, 0, 1),
524         AMDGPU_RAS_SUB_BLOCK(GFX_SQ_LDS_I, 0, 1, 1, 1, 0, 0, 0, 0),
525         AMDGPU_RAS_SUB_BLOCK(GFX_SQ_VGPR, 0, 1, 1, 1, 0, 0, 0, 0),
526         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0, 1),
527         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU0_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0,
528                              0, 0),
529         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU0_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0,
530                              0),
531         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU1_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0,
532                              0, 0),
533         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU1_UTCL1_LFIFO, 0, 1, 1, 1, 1, 0, 0,
534                              0),
535         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU2_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0,
536                              0, 0),
537         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU2_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0,
538                              0),
539         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_TAG_RAM, 0, 1, 1, 1, 1, 0, 0,
540                              1),
541         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO, 1, 0, 0, 1, 0,
542                              0, 0, 0),
543         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
544                              0),
545         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
546                              0),
547         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_TAG_RAM, 0, 1, 1, 1, 0, 0, 0,
548                              0),
549         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_HIT_FIFO, 1, 0, 0, 1, 0, 0, 0,
550                              0),
551         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
552                              0),
553         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM, 1, 0, 0, 1, 0, 0,
554                              0, 0),
555         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
556                              0),
557         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_TAG_RAM, 0, 1, 1, 1, 1, 0, 0,
558                              0),
559         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO, 1, 0, 0, 1, 0,
560                              0, 0, 0),
561         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
562                              0),
563         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
564                              0),
565         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_TAG_RAM, 0, 1, 1, 1, 0, 0, 0,
566                              0),
567         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_HIT_FIFO, 1, 0, 0, 1, 0, 0, 0,
568                              0),
569         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
570                              0),
571         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM, 1, 0, 0, 1, 0, 0,
572                              0, 0),
573         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
574                              0),
575         AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_DFIFO, 0, 1, 1, 1, 1, 0, 0, 1),
576         AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_AFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
577         AMDGPU_RAS_SUB_BLOCK(GFX_TA_FL_LFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
578         AMDGPU_RAS_SUB_BLOCK(GFX_TA_FX_LFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
579         AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_CFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
580         AMDGPU_RAS_SUB_BLOCK(GFX_TCA_HOLE_FIFO, 1, 0, 0, 1, 0, 1, 1, 0),
581         AMDGPU_RAS_SUB_BLOCK(GFX_TCA_REQ_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
582         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA, 0, 1, 1, 1, 1, 0, 0, 1),
583         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_0_1, 0, 1, 1, 1, 1, 0, 0,
584                              1),
585         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_1_0, 0, 1, 1, 1, 1, 0, 0,
586                              1),
587         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_1_1, 0, 1, 1, 1, 1, 0, 0,
588                              1),
589         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DIRTY_BANK_0, 0, 1, 1, 1, 0, 0, 0,
590                              0),
591         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DIRTY_BANK_1, 0, 1, 1, 1, 0, 0, 0,
592                              0),
593         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_HIGH_RATE_TAG, 0, 1, 1, 1, 0, 0, 0, 0),
594         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LOW_RATE_TAG, 0, 1, 1, 1, 0, 0, 0, 0),
595         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_IN_USE_DEC, 1, 0, 0, 1, 0, 0, 0, 0),
596         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_IN_USE_TRANSFER, 1, 0, 0, 1, 0, 0, 0, 0),
597         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_RETURN_DATA, 1, 0, 0, 1, 0, 0, 0, 0),
598         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_RETURN_CONTROL, 1, 0, 0, 1, 0, 0, 0, 0),
599         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_UC_ATOMIC_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
600         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRITE_RETURN, 1, 0, 0, 1, 0, 1, 1, 0),
601         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRITE_CACHE_READ, 1, 0, 0, 1, 0, 0, 0, 0),
602         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_SRC_FIFO, 0, 1, 1, 1, 0, 0, 0, 0),
603         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_SRC_FIFO_NEXT_RAM, 1, 0, 0, 1, 0, 0, 1, 0),
604         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_TAG_PROBE_FIFO, 1, 0, 0, 1, 0, 0, 0,
605                              0),
606         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LATENCY_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
607         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LATENCY_FIFO_NEXT_RAM, 1, 0, 0, 1, 0, 0, 0,
608                              0),
609         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRRET_TAG_WRITE_RETURN, 1, 0, 0, 1, 0, 0,
610                              0, 0),
611         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_ATOMIC_RETURN_BUFFER, 1, 0, 0, 1, 0, 0, 0,
612                              0),
613         AMDGPU_RAS_SUB_BLOCK(GFX_TCI_WRITE_RAM, 1, 0, 0, 1, 0, 0, 0, 0),
614         AMDGPU_RAS_SUB_BLOCK(GFX_TCP_CACHE_RAM, 0, 1, 1, 1, 1, 0, 0, 1),
615         AMDGPU_RAS_SUB_BLOCK(GFX_TCP_LFIFO_RAM, 0, 1, 1, 1, 0, 0, 0, 0),
616         AMDGPU_RAS_SUB_BLOCK(GFX_TCP_CMD_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
617         AMDGPU_RAS_SUB_BLOCK(GFX_TCP_VM_FIFO, 0, 1, 1, 1, 0, 0, 0, 0),
618         AMDGPU_RAS_SUB_BLOCK(GFX_TCP_DB_RAM, 1, 0, 0, 1, 0, 0, 0, 0),
619         AMDGPU_RAS_SUB_BLOCK(GFX_TCP_UTCL1_LFIFO0, 0, 1, 1, 1, 0, 0, 0, 0),
620         AMDGPU_RAS_SUB_BLOCK(GFX_TCP_UTCL1_LFIFO1, 0, 1, 1, 1, 0, 0, 0, 0),
621         AMDGPU_RAS_SUB_BLOCK(GFX_TD_SS_FIFO_LO, 0, 1, 1, 1, 1, 0, 0, 1),
622         AMDGPU_RAS_SUB_BLOCK(GFX_TD_SS_FIFO_HI, 0, 1, 1, 1, 0, 0, 0, 0),
623         AMDGPU_RAS_SUB_BLOCK(GFX_TD_CS_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
624         AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMRD_CMDMEM, 0, 1, 1, 1, 1, 0, 0, 1),
625         AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0),
626         AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_DATAMEM, 0, 1, 1, 1, 0, 0, 0, 0),
627         AMDGPU_RAS_SUB_BLOCK(GFX_EA_RRET_TAGMEM, 0, 1, 1, 1, 0, 0, 0, 0),
628         AMDGPU_RAS_SUB_BLOCK(GFX_EA_WRET_TAGMEM, 0, 1, 1, 1, 0, 0, 0, 0),
629         AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIRD_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0),
630         AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0),
631         AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_DATAMEM, 0, 1, 1, 1, 0, 0, 0, 0),
632         AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMRD_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
633         AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
634         AMDGPU_RAS_SUB_BLOCK(GFX_EA_IORD_CMDMEM, 1, 0, 0, 1, 0, 0, 0, 0),
635         AMDGPU_RAS_SUB_BLOCK(GFX_EA_IOWR_CMDMEM, 1, 0, 0, 1, 0, 0, 0, 0),
636         AMDGPU_RAS_SUB_BLOCK(GFX_EA_IOWR_DATAMEM, 1, 0, 0, 1, 0, 0, 0, 0),
637         AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIRD_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
638         AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
639         AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D0MEM, 1, 0, 0, 1, 0, 0, 0, 0),
640         AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D1MEM, 1, 0, 0, 1, 0, 0, 0, 0),
641         AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D2MEM, 1, 0, 0, 1, 0, 0, 0, 0),
642         AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D3MEM, 1, 0, 0, 1, 0, 0, 0, 0),
643         AMDGPU_RAS_SUB_BLOCK(UTC_VML2_BANK_CACHE, 0, 1, 1, 1, 0, 0, 0, 0),
644         AMDGPU_RAS_SUB_BLOCK(UTC_VML2_WALKER, 0, 1, 1, 1, 0, 0, 0, 0),
645         AMDGPU_RAS_SUB_BLOCK(UTC_ATCL2_CACHE_2M_BANK, 1, 0, 0, 1, 0, 0, 0, 0),
646         AMDGPU_RAS_SUB_BLOCK(UTC_ATCL2_CACHE_4K_BANK, 0, 1, 1, 1, 0, 0, 0, 0),
647 };
648
649 static const struct soc15_reg_golden golden_settings_gc_9_0[] =
650 {
651         SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000400),
652         SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0x80000000, 0x80000000),
653         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
654         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
655         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
656         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
657         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000),
658         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800),
659         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800),
660         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x00ffff87),
661         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x00ffff8f),
662         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000),
663         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
664         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68),
665         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197),
666         SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
667         SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff),
668         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800),
669         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800),
670         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000)
671 };
672
673 static const struct soc15_reg_golden golden_settings_gc_9_0_vg10[] =
674 {
675         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0x0000f000, 0x00012107),
676         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
677         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080),
678         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080),
679         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080),
680         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042),
681         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x2a114042),
682         SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080),
683         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0x00008000, 0x00048000),
684         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080),
685         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080),
686         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080),
687         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080),
688         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080),
689         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000),
690         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x01000107),
691         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x00001800, 0x00000800),
692         SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080)
693 };
694
695 static const struct soc15_reg_golden golden_settings_gc_9_0_vg20[] =
696 {
697         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x0f000080, 0x04000080),
698         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000),
699         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
700         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xf3e777ff, 0x22014042),
701         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xf3e777ff, 0x22014042),
702         SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0x00003e00, 0x00000400),
703         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xff840000, 0x04040000),
704         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00030000),
705         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff010f, 0x01000107),
706         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0x000b0000, 0x000b0000),
707         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01000000, 0x01000000)
708 };
709
710 static const struct soc15_reg_golden golden_settings_gc_9_1[] =
711 {
712         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
713         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080),
714         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080),
715         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080),
716         SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420),
717         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
718         SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080),
719         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
720         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
721         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
722         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080),
723         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080),
724         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080),
725         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080),
726         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080),
727         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
728         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
729         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003120),
730         SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
731         SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000000ff),
732         SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080),
733         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800),
734         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800),
735         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000)
736 };
737
738 static const struct soc15_reg_golden golden_settings_gc_9_1_rv1[] =
739 {
740         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
741         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24000042),
742         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24000042),
743         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04048000),
744         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_MODE_CNTL_1, 0x06000000, 0x06000000),
745         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000),
746         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x00000800)
747 };
748
749 static const struct soc15_reg_golden golden_settings_gc_9_1_rv2[] =
750 {
751         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0xff7fffff, 0x04000000),
752         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
753         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0xff7fffff, 0x0a000000),
754         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x7f0fffff, 0x08000080),
755         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0xff8fffff, 0x08000080),
756         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x7f8fffff, 0x08000080),
757         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x26013041),
758         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x26013041),
759         SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x3f8fffff, 0x08000080),
760         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
761         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0xff0fffff, 0x08000080),
762         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0xff0fffff, 0x08000080),
763         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0xff0fffff, 0x08000080),
764         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0xff0fffff, 0x08000080),
765         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0xff0fffff, 0x08000080),
766         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
767         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010),
768         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000),
769         SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x3f8fffff, 0x08000080),
770 };
771
772 static const struct soc15_reg_golden golden_settings_gc_9_1_rn[] =
773 {
774         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
775         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0xff7fffff, 0x0a000000),
776         SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000400),
777         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xf3e777ff, 0x24000042),
778         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xf3e777ff, 0x24000042),
779         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
780         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
781         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
782         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
783         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
784         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003120),
785         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCEA_PROBE_MAP, 0xffffffff, 0x0000cccc),
786 };
787
788 static const struct soc15_reg_golden golden_settings_gc_9_x_common[] =
789 {
790         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_SD_CNTL, 0xffffffff, 0x000001ff),
791         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_INDEX, 0xffffffff, 0x00000000),
792         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_DATA, 0xffffffff, 0x2544c382)
793 };
794
795 static const struct soc15_reg_golden golden_settings_gc_9_2_1[] =
796 {
797         SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420),
798         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
799         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
800         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
801         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
802         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000),
803         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800),
804         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800),
805         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x0000ff87),
806         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x0000ff8f),
807         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000),
808         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
809         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68),
810         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197),
811         SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
812         SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff)
813 };
814
815 static const struct soc15_reg_golden golden_settings_gc_9_2_1_vg12[] =
816 {
817         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x00000080, 0x04000080),
818         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
819         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000),
820         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24104041),
821         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24104041),
822         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
823         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff03ff, 0x01000107),
824         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
825         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x76325410),
826         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000),
827         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800),
828         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800),
829         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000)
830 };
831
832 static const struct soc15_reg_golden golden_settings_gc_9_4_1_arct[] =
833 {
834         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042),
835         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x10b0000),
836         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_0_ARCT, 0x3fffffff, 0x346f0a4e),
837         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_1_ARCT, 0x3fffffff, 0x1c642ca),
838         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_2_ARCT, 0x3fffffff, 0x26f45098),
839         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_3_ARCT, 0x3fffffff, 0x2ebd9fe3),
840         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_4_ARCT, 0x3fffffff, 0xb90f5b1),
841         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_5_ARCT, 0x3ff, 0x135),
842         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_CONFIG, 0xffffffff, 0x011A0000),
843         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_FIFO_SIZES, 0xffffffff, 0x00000f00),
844         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_UTCL1_CNTL1, 0x30000000, 0x30000000)
845 };
846
847 static const struct soc15_reg_rlcg rlcg_access_gc_9_0[] = {
848         {SOC15_REG_ENTRY(GC, 0, mmGRBM_GFX_INDEX)},
849         {SOC15_REG_ENTRY(GC, 0, mmSQ_IND_INDEX)},
850 };
851
852 static const u32 GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[] =
853 {
854         mmRLC_SRM_INDEX_CNTL_ADDR_0 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
855         mmRLC_SRM_INDEX_CNTL_ADDR_1 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
856         mmRLC_SRM_INDEX_CNTL_ADDR_2 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
857         mmRLC_SRM_INDEX_CNTL_ADDR_3 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
858         mmRLC_SRM_INDEX_CNTL_ADDR_4 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
859         mmRLC_SRM_INDEX_CNTL_ADDR_5 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
860         mmRLC_SRM_INDEX_CNTL_ADDR_6 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
861         mmRLC_SRM_INDEX_CNTL_ADDR_7 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
862 };
863
864 static const u32 GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[] =
865 {
866         mmRLC_SRM_INDEX_CNTL_DATA_0 - mmRLC_SRM_INDEX_CNTL_DATA_0,
867         mmRLC_SRM_INDEX_CNTL_DATA_1 - mmRLC_SRM_INDEX_CNTL_DATA_0,
868         mmRLC_SRM_INDEX_CNTL_DATA_2 - mmRLC_SRM_INDEX_CNTL_DATA_0,
869         mmRLC_SRM_INDEX_CNTL_DATA_3 - mmRLC_SRM_INDEX_CNTL_DATA_0,
870         mmRLC_SRM_INDEX_CNTL_DATA_4 - mmRLC_SRM_INDEX_CNTL_DATA_0,
871         mmRLC_SRM_INDEX_CNTL_DATA_5 - mmRLC_SRM_INDEX_CNTL_DATA_0,
872         mmRLC_SRM_INDEX_CNTL_DATA_6 - mmRLC_SRM_INDEX_CNTL_DATA_0,
873         mmRLC_SRM_INDEX_CNTL_DATA_7 - mmRLC_SRM_INDEX_CNTL_DATA_0,
874 };
875
876 #define VEGA10_GB_ADDR_CONFIG_GOLDEN 0x2a114042
877 #define VEGA12_GB_ADDR_CONFIG_GOLDEN 0x24104041
878 #define RAVEN_GB_ADDR_CONFIG_GOLDEN 0x24000042
879 #define RAVEN2_GB_ADDR_CONFIG_GOLDEN 0x26013041
880
881 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev);
882 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev);
883 static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev);
884 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev);
885 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
886                                 struct amdgpu_cu_info *cu_info);
887 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev);
888 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume, bool usegds);
889 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring);
890 static void gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev,
891                                           void *ras_error_status);
892 static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev,
893                                      void *inject_if, uint32_t instance_mask);
894 static void gfx_v9_0_reset_ras_error_count(struct amdgpu_device *adev);
895 static void gfx_v9_0_update_spm_vmid_internal(struct amdgpu_device *adev,
896                                               unsigned int vmid);
897 static void gfx_v9_0_set_safe_mode(struct amdgpu_device *adev, int xcc_id);
898 static void gfx_v9_0_unset_safe_mode(struct amdgpu_device *adev, int xcc_id);
899
900 static void gfx_v9_0_kiq_set_resources(struct amdgpu_ring *kiq_ring,
901                                 uint64_t queue_mask)
902 {
903         struct amdgpu_device *adev = kiq_ring->adev;
904         u64 shader_mc_addr;
905
906         /* Cleaner shader MC address */
907         shader_mc_addr = adev->gfx.cleaner_shader_gpu_addr >> 8;
908
909         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
910         amdgpu_ring_write(kiq_ring,
911                 PACKET3_SET_RESOURCES_VMID_MASK(0) |
912                 /* vmid_mask:0* queue_type:0 (KIQ) */
913                 PACKET3_SET_RESOURCES_QUEUE_TYPE(0));
914         amdgpu_ring_write(kiq_ring,
915                         lower_32_bits(queue_mask));     /* queue mask lo */
916         amdgpu_ring_write(kiq_ring,
917                         upper_32_bits(queue_mask));     /* queue mask hi */
918         amdgpu_ring_write(kiq_ring, lower_32_bits(shader_mc_addr)); /* cleaner shader addr lo */
919         amdgpu_ring_write(kiq_ring, upper_32_bits(shader_mc_addr)); /* cleaner shader addr hi */
920         amdgpu_ring_write(kiq_ring, 0); /* oac mask */
921         amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */
922 }
923
924 static void gfx_v9_0_kiq_map_queues(struct amdgpu_ring *kiq_ring,
925                                  struct amdgpu_ring *ring)
926 {
927         uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
928         uint64_t wptr_addr = ring->wptr_gpu_addr;
929         uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
930
931         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
932         /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
933         amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
934                          PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */
935                          PACKET3_MAP_QUEUES_VMID(0) | /* VMID */
936                          PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
937                          PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
938                          PACKET3_MAP_QUEUES_ME((ring->me == 1 ? 0 : 1)) |
939                          /*queue_type: normal compute queue */
940                          PACKET3_MAP_QUEUES_QUEUE_TYPE(0) |
941                          /* alloc format: all_on_one_pipe */
942                          PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) |
943                          PACKET3_MAP_QUEUES_ENGINE_SEL(eng_sel) |
944                          /* num_queues: must be 1 */
945                          PACKET3_MAP_QUEUES_NUM_QUEUES(1));
946         amdgpu_ring_write(kiq_ring,
947                         PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index));
948         amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
949         amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
950         amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
951         amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
952 }
953
954 static void gfx_v9_0_kiq_unmap_queues(struct amdgpu_ring *kiq_ring,
955                                    struct amdgpu_ring *ring,
956                                    enum amdgpu_unmap_queues_action action,
957                                    u64 gpu_addr, u64 seq)
958 {
959         uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
960
961         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
962         amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
963                           PACKET3_UNMAP_QUEUES_ACTION(action) |
964                           PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) |
965                           PACKET3_UNMAP_QUEUES_ENGINE_SEL(eng_sel) |
966                           PACKET3_UNMAP_QUEUES_NUM_QUEUES(1));
967         amdgpu_ring_write(kiq_ring,
968                         PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
969
970         if (action == PREEMPT_QUEUES_NO_UNMAP) {
971                 amdgpu_ring_write(kiq_ring, lower_32_bits(ring->wptr & ring->buf_mask));
972                 amdgpu_ring_write(kiq_ring, 0);
973                 amdgpu_ring_write(kiq_ring, 0);
974
975         } else {
976                 amdgpu_ring_write(kiq_ring, 0);
977                 amdgpu_ring_write(kiq_ring, 0);
978                 amdgpu_ring_write(kiq_ring, 0);
979         }
980 }
981
982 static void gfx_v9_0_kiq_query_status(struct amdgpu_ring *kiq_ring,
983                                    struct amdgpu_ring *ring,
984                                    u64 addr,
985                                    u64 seq)
986 {
987         uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
988
989         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_QUERY_STATUS, 5));
990         amdgpu_ring_write(kiq_ring,
991                           PACKET3_QUERY_STATUS_CONTEXT_ID(0) |
992                           PACKET3_QUERY_STATUS_INTERRUPT_SEL(0) |
993                           PACKET3_QUERY_STATUS_COMMAND(2));
994         /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
995         amdgpu_ring_write(kiq_ring,
996                         PACKET3_QUERY_STATUS_DOORBELL_OFFSET(ring->doorbell_index) |
997                         PACKET3_QUERY_STATUS_ENG_SEL(eng_sel));
998         amdgpu_ring_write(kiq_ring, lower_32_bits(addr));
999         amdgpu_ring_write(kiq_ring, upper_32_bits(addr));
1000         amdgpu_ring_write(kiq_ring, lower_32_bits(seq));
1001         amdgpu_ring_write(kiq_ring, upper_32_bits(seq));
1002 }
1003
1004 static void gfx_v9_0_kiq_invalidate_tlbs(struct amdgpu_ring *kiq_ring,
1005                                 uint16_t pasid, uint32_t flush_type,
1006                                 bool all_hub)
1007 {
1008         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0));
1009         amdgpu_ring_write(kiq_ring,
1010                         PACKET3_INVALIDATE_TLBS_DST_SEL(1) |
1011                         PACKET3_INVALIDATE_TLBS_ALL_HUB(all_hub) |
1012                         PACKET3_INVALIDATE_TLBS_PASID(pasid) |
1013                         PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type));
1014 }
1015
1016
1017 static void gfx_v9_0_kiq_reset_hw_queue(struct amdgpu_ring *kiq_ring, uint32_t queue_type,
1018                                         uint32_t me_id, uint32_t pipe_id, uint32_t queue_id,
1019                                         uint32_t xcc_id, uint32_t vmid)
1020 {
1021         struct amdgpu_device *adev = kiq_ring->adev;
1022         unsigned i;
1023
1024         /* enter save mode */
1025         amdgpu_gfx_rlc_enter_safe_mode(adev, xcc_id);
1026         mutex_lock(&adev->srbm_mutex);
1027         soc15_grbm_select(adev, me_id, pipe_id, queue_id, 0, 0);
1028
1029         if (queue_type == AMDGPU_RING_TYPE_COMPUTE) {
1030                 WREG32_SOC15(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 0x2);
1031                 WREG32_SOC15(GC, 0, mmSPI_COMPUTE_QUEUE_RESET, 0x1);
1032                 /* wait till dequeue take effects */
1033                 for (i = 0; i < adev->usec_timeout; i++) {
1034                         if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
1035                                 break;
1036                         udelay(1);
1037                 }
1038                 if (i >= adev->usec_timeout)
1039                         dev_err(adev->dev, "fail to wait on hqd deactive\n");
1040         } else {
1041                 dev_err(adev->dev, "reset queue_type(%d) not supported\n", queue_type);
1042         }
1043
1044         soc15_grbm_select(adev, 0, 0, 0, 0, 0);
1045         mutex_unlock(&adev->srbm_mutex);
1046         /* exit safe mode */
1047         amdgpu_gfx_rlc_exit_safe_mode(adev, xcc_id);
1048 }
1049
1050 static const struct kiq_pm4_funcs gfx_v9_0_kiq_pm4_funcs = {
1051         .kiq_set_resources = gfx_v9_0_kiq_set_resources,
1052         .kiq_map_queues = gfx_v9_0_kiq_map_queues,
1053         .kiq_unmap_queues = gfx_v9_0_kiq_unmap_queues,
1054         .kiq_query_status = gfx_v9_0_kiq_query_status,
1055         .kiq_invalidate_tlbs = gfx_v9_0_kiq_invalidate_tlbs,
1056         .kiq_reset_hw_queue = gfx_v9_0_kiq_reset_hw_queue,
1057         .set_resources_size = 8,
1058         .map_queues_size = 7,
1059         .unmap_queues_size = 6,
1060         .query_status_size = 7,
1061         .invalidate_tlbs_size = 2,
1062 };
1063
1064 static void gfx_v9_0_set_kiq_pm4_funcs(struct amdgpu_device *adev)
1065 {
1066         adev->gfx.kiq[0].pmf = &gfx_v9_0_kiq_pm4_funcs;
1067 }
1068
1069 static void gfx_v9_0_init_golden_registers(struct amdgpu_device *adev)
1070 {
1071         switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
1072         case IP_VERSION(9, 0, 1):
1073                 soc15_program_register_sequence(adev,
1074                                                 golden_settings_gc_9_0,
1075                                                 ARRAY_SIZE(golden_settings_gc_9_0));
1076                 soc15_program_register_sequence(adev,
1077                                                 golden_settings_gc_9_0_vg10,
1078                                                 ARRAY_SIZE(golden_settings_gc_9_0_vg10));
1079                 break;
1080         case IP_VERSION(9, 2, 1):
1081                 soc15_program_register_sequence(adev,
1082                                                 golden_settings_gc_9_2_1,
1083                                                 ARRAY_SIZE(golden_settings_gc_9_2_1));
1084                 soc15_program_register_sequence(adev,
1085                                                 golden_settings_gc_9_2_1_vg12,
1086                                                 ARRAY_SIZE(golden_settings_gc_9_2_1_vg12));
1087                 break;
1088         case IP_VERSION(9, 4, 0):
1089                 soc15_program_register_sequence(adev,
1090                                                 golden_settings_gc_9_0,
1091                                                 ARRAY_SIZE(golden_settings_gc_9_0));
1092                 soc15_program_register_sequence(adev,
1093                                                 golden_settings_gc_9_0_vg20,
1094                                                 ARRAY_SIZE(golden_settings_gc_9_0_vg20));
1095                 break;
1096         case IP_VERSION(9, 4, 1):
1097                 soc15_program_register_sequence(adev,
1098                                                 golden_settings_gc_9_4_1_arct,
1099                                                 ARRAY_SIZE(golden_settings_gc_9_4_1_arct));
1100                 break;
1101         case IP_VERSION(9, 2, 2):
1102         case IP_VERSION(9, 1, 0):
1103                 soc15_program_register_sequence(adev, golden_settings_gc_9_1,
1104                                                 ARRAY_SIZE(golden_settings_gc_9_1));
1105                 if (adev->apu_flags & AMD_APU_IS_RAVEN2)
1106                         soc15_program_register_sequence(adev,
1107                                                         golden_settings_gc_9_1_rv2,
1108                                                         ARRAY_SIZE(golden_settings_gc_9_1_rv2));
1109                 else
1110                         soc15_program_register_sequence(adev,
1111                                                         golden_settings_gc_9_1_rv1,
1112                                                         ARRAY_SIZE(golden_settings_gc_9_1_rv1));
1113                 break;
1114          case IP_VERSION(9, 3, 0):
1115                 soc15_program_register_sequence(adev,
1116                                                 golden_settings_gc_9_1_rn,
1117                                                 ARRAY_SIZE(golden_settings_gc_9_1_rn));
1118                 return; /* for renoir, don't need common goldensetting */
1119         case IP_VERSION(9, 4, 2):
1120                 gfx_v9_4_2_init_golden_registers(adev,
1121                                                  adev->smuio.funcs->get_die_id(adev));
1122                 break;
1123         default:
1124                 break;
1125         }
1126
1127         if ((amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 4, 1)) &&
1128             (amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 4, 2)))
1129                 soc15_program_register_sequence(adev, golden_settings_gc_9_x_common,
1130                                                 (const u32)ARRAY_SIZE(golden_settings_gc_9_x_common));
1131 }
1132
1133 static void gfx_v9_0_write_data_to_reg(struct amdgpu_ring *ring, int eng_sel,
1134                                        bool wc, uint32_t reg, uint32_t val)
1135 {
1136         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
1137         amdgpu_ring_write(ring, WRITE_DATA_ENGINE_SEL(eng_sel) |
1138                                 WRITE_DATA_DST_SEL(0) |
1139                                 (wc ? WR_CONFIRM : 0));
1140         amdgpu_ring_write(ring, reg);
1141         amdgpu_ring_write(ring, 0);
1142         amdgpu_ring_write(ring, val);
1143 }
1144
1145 static void gfx_v9_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel,
1146                                   int mem_space, int opt, uint32_t addr0,
1147                                   uint32_t addr1, uint32_t ref, uint32_t mask,
1148                                   uint32_t inv)
1149 {
1150         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
1151         amdgpu_ring_write(ring,
1152                                  /* memory (1) or register (0) */
1153                                  (WAIT_REG_MEM_MEM_SPACE(mem_space) |
1154                                  WAIT_REG_MEM_OPERATION(opt) | /* wait */
1155                                  WAIT_REG_MEM_FUNCTION(3) |  /* equal */
1156                                  WAIT_REG_MEM_ENGINE(eng_sel)));
1157
1158         if (mem_space)
1159                 BUG_ON(addr0 & 0x3); /* Dword align */
1160         amdgpu_ring_write(ring, addr0);
1161         amdgpu_ring_write(ring, addr1);
1162         amdgpu_ring_write(ring, ref);
1163         amdgpu_ring_write(ring, mask);
1164         amdgpu_ring_write(ring, inv); /* poll interval */
1165 }
1166
1167 static int gfx_v9_0_ring_test_ring(struct amdgpu_ring *ring)
1168 {
1169         struct amdgpu_device *adev = ring->adev;
1170         uint32_t scratch = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0);
1171         uint32_t tmp = 0;
1172         unsigned i;
1173         int r;
1174
1175         WREG32(scratch, 0xCAFEDEAD);
1176         r = amdgpu_ring_alloc(ring, 3);
1177         if (r)
1178                 return r;
1179
1180         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
1181         amdgpu_ring_write(ring, scratch - PACKET3_SET_UCONFIG_REG_START);
1182         amdgpu_ring_write(ring, 0xDEADBEEF);
1183         amdgpu_ring_commit(ring);
1184
1185         for (i = 0; i < adev->usec_timeout; i++) {
1186                 tmp = RREG32(scratch);
1187                 if (tmp == 0xDEADBEEF)
1188                         break;
1189                 udelay(1);
1190         }
1191
1192         if (i >= adev->usec_timeout)
1193                 r = -ETIMEDOUT;
1194         return r;
1195 }
1196
1197 static int gfx_v9_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
1198 {
1199         struct amdgpu_device *adev = ring->adev;
1200         struct amdgpu_ib ib;
1201         struct dma_fence *f = NULL;
1202
1203         unsigned index;
1204         uint64_t gpu_addr;
1205         uint32_t tmp;
1206         long r;
1207
1208         r = amdgpu_device_wb_get(adev, &index);
1209         if (r)
1210                 return r;
1211
1212         gpu_addr = adev->wb.gpu_addr + (index * 4);
1213         adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
1214         memset(&ib, 0, sizeof(ib));
1215
1216         r = amdgpu_ib_get(adev, NULL, 20, AMDGPU_IB_POOL_DIRECT, &ib);
1217         if (r)
1218                 goto err1;
1219
1220         ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3);
1221         ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
1222         ib.ptr[2] = lower_32_bits(gpu_addr);
1223         ib.ptr[3] = upper_32_bits(gpu_addr);
1224         ib.ptr[4] = 0xDEADBEEF;
1225         ib.length_dw = 5;
1226
1227         r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
1228         if (r)
1229                 goto err2;
1230
1231         r = dma_fence_wait_timeout(f, false, timeout);
1232         if (r == 0) {
1233                 r = -ETIMEDOUT;
1234                 goto err2;
1235         } else if (r < 0) {
1236                 goto err2;
1237         }
1238
1239         tmp = adev->wb.wb[index];
1240         if (tmp == 0xDEADBEEF)
1241                 r = 0;
1242         else
1243                 r = -EINVAL;
1244
1245 err2:
1246         amdgpu_ib_free(adev, &ib, NULL);
1247         dma_fence_put(f);
1248 err1:
1249         amdgpu_device_wb_free(adev, index);
1250         return r;
1251 }
1252
1253
1254 static void gfx_v9_0_free_microcode(struct amdgpu_device *adev)
1255 {
1256         amdgpu_ucode_release(&adev->gfx.pfp_fw);
1257         amdgpu_ucode_release(&adev->gfx.me_fw);
1258         amdgpu_ucode_release(&adev->gfx.ce_fw);
1259         amdgpu_ucode_release(&adev->gfx.rlc_fw);
1260         amdgpu_ucode_release(&adev->gfx.mec_fw);
1261         amdgpu_ucode_release(&adev->gfx.mec2_fw);
1262
1263         kfree(adev->gfx.rlc.register_list_format);
1264 }
1265
1266 static void gfx_v9_0_check_fw_write_wait(struct amdgpu_device *adev)
1267 {
1268         adev->gfx.me_fw_write_wait = false;
1269         adev->gfx.mec_fw_write_wait = false;
1270
1271         if ((amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 4, 1)) &&
1272             ((adev->gfx.mec_fw_version < 0x000001a5) ||
1273              (adev->gfx.mec_feature_version < 46) ||
1274              (adev->gfx.pfp_fw_version < 0x000000b7) ||
1275              (adev->gfx.pfp_feature_version < 46)))
1276                 DRM_WARN_ONCE("CP firmware version too old, please update!");
1277
1278         switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
1279         case IP_VERSION(9, 0, 1):
1280                 if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1281                     (adev->gfx.me_feature_version >= 42) &&
1282                     (adev->gfx.pfp_fw_version >=  0x000000b1) &&
1283                     (adev->gfx.pfp_feature_version >= 42))
1284                         adev->gfx.me_fw_write_wait = true;
1285
1286                 if ((adev->gfx.mec_fw_version >=  0x00000193) &&
1287                     (adev->gfx.mec_feature_version >= 42))
1288                         adev->gfx.mec_fw_write_wait = true;
1289                 break;
1290         case IP_VERSION(9, 2, 1):
1291                 if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1292                     (adev->gfx.me_feature_version >= 44) &&
1293                     (adev->gfx.pfp_fw_version >=  0x000000b2) &&
1294                     (adev->gfx.pfp_feature_version >= 44))
1295                         adev->gfx.me_fw_write_wait = true;
1296
1297                 if ((adev->gfx.mec_fw_version >=  0x00000196) &&
1298                     (adev->gfx.mec_feature_version >= 44))
1299                         adev->gfx.mec_fw_write_wait = true;
1300                 break;
1301         case IP_VERSION(9, 4, 0):
1302                 if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1303                     (adev->gfx.me_feature_version >= 44) &&
1304                     (adev->gfx.pfp_fw_version >=  0x000000b2) &&
1305                     (adev->gfx.pfp_feature_version >= 44))
1306                         adev->gfx.me_fw_write_wait = true;
1307
1308                 if ((adev->gfx.mec_fw_version >=  0x00000197) &&
1309                     (adev->gfx.mec_feature_version >= 44))
1310                         adev->gfx.mec_fw_write_wait = true;
1311                 break;
1312         case IP_VERSION(9, 1, 0):
1313         case IP_VERSION(9, 2, 2):
1314                 if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1315                     (adev->gfx.me_feature_version >= 42) &&
1316                     (adev->gfx.pfp_fw_version >=  0x000000b1) &&
1317                     (adev->gfx.pfp_feature_version >= 42))
1318                         adev->gfx.me_fw_write_wait = true;
1319
1320                 if ((adev->gfx.mec_fw_version >=  0x00000192) &&
1321                     (adev->gfx.mec_feature_version >= 42))
1322                         adev->gfx.mec_fw_write_wait = true;
1323                 break;
1324         default:
1325                 adev->gfx.me_fw_write_wait = true;
1326                 adev->gfx.mec_fw_write_wait = true;
1327                 break;
1328         }
1329 }
1330
1331 struct amdgpu_gfxoff_quirk {
1332         u16 chip_vendor;
1333         u16 chip_device;
1334         u16 subsys_vendor;
1335         u16 subsys_device;
1336         u8 revision;
1337 };
1338
1339 static const struct amdgpu_gfxoff_quirk amdgpu_gfxoff_quirk_list[] = {
1340         /* https://bugzilla.kernel.org/show_bug.cgi?id=204689 */
1341         { 0x1002, 0x15dd, 0x1002, 0x15dd, 0xc8 },
1342         /* https://bugzilla.kernel.org/show_bug.cgi?id=207171 */
1343         { 0x1002, 0x15dd, 0x103c, 0x83e7, 0xd3 },
1344         /* GFXOFF is unstable on C6 parts with a VBIOS 113-RAVEN-114 */
1345         { 0x1002, 0x15dd, 0x1002, 0x15dd, 0xc6 },
1346         /* Apple MacBook Pro (15-inch, 2019) Radeon Pro Vega 20 4 GB */
1347         { 0x1002, 0x69af, 0x106b, 0x019a, 0xc0 },
1348         /* https://bbs.openkylin.top/t/topic/171497 */
1349         { 0x1002, 0x15d8, 0x19e5, 0x3e14, 0xc2 },
1350         /* HP 705G4 DM with R5 2400G */
1351         { 0x1002, 0x15dd, 0x103c, 0x8464, 0xd6 },
1352         { 0, 0, 0, 0, 0 },
1353 };
1354
1355 static bool gfx_v9_0_should_disable_gfxoff(struct pci_dev *pdev)
1356 {
1357         const struct amdgpu_gfxoff_quirk *p = amdgpu_gfxoff_quirk_list;
1358
1359         while (p && p->chip_device != 0) {
1360                 if (pdev->vendor == p->chip_vendor &&
1361                     pdev->device == p->chip_device &&
1362                     pdev->subsystem_vendor == p->subsys_vendor &&
1363                     pdev->subsystem_device == p->subsys_device &&
1364                     pdev->revision == p->revision) {
1365                         return true;
1366                 }
1367                 ++p;
1368         }
1369         return false;
1370 }
1371
1372 static bool is_raven_kicker(struct amdgpu_device *adev)
1373 {
1374         if (adev->pm.fw_version >= 0x41e2b)
1375                 return true;
1376         else
1377                 return false;
1378 }
1379
1380 static bool check_if_enlarge_doorbell_range(struct amdgpu_device *adev)
1381 {
1382         if ((amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 3, 0)) &&
1383             (adev->gfx.me_fw_version >= 0x000000a5) &&
1384             (adev->gfx.me_feature_version >= 52))
1385                 return true;
1386         else
1387                 return false;
1388 }
1389
1390 static void gfx_v9_0_check_if_need_gfxoff(struct amdgpu_device *adev)
1391 {
1392         if (gfx_v9_0_should_disable_gfxoff(adev->pdev))
1393                 adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
1394
1395         switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
1396         case IP_VERSION(9, 0, 1):
1397         case IP_VERSION(9, 2, 1):
1398         case IP_VERSION(9, 4, 0):
1399                 break;
1400         case IP_VERSION(9, 2, 2):
1401         case IP_VERSION(9, 1, 0):
1402                 if (!((adev->apu_flags & AMD_APU_IS_RAVEN2) ||
1403                       (adev->apu_flags & AMD_APU_IS_PICASSO)) &&
1404                     ((!is_raven_kicker(adev) &&
1405                       adev->gfx.rlc_fw_version < 531) ||
1406                      (adev->gfx.rlc_feature_version < 1) ||
1407                      !adev->gfx.rlc.is_rlc_v2_1))
1408                         adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
1409
1410                 if (adev->pm.pp_feature & PP_GFXOFF_MASK)
1411                         adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG |
1412                                 AMD_PG_SUPPORT_CP |
1413                                 AMD_PG_SUPPORT_RLC_SMU_HS;
1414                 break;
1415         case IP_VERSION(9, 3, 0):
1416                 if (adev->pm.pp_feature & PP_GFXOFF_MASK)
1417                         adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG |
1418                                 AMD_PG_SUPPORT_CP |
1419                                 AMD_PG_SUPPORT_RLC_SMU_HS;
1420                 break;
1421         default:
1422                 break;
1423         }
1424 }
1425
1426 static int gfx_v9_0_init_cp_gfx_microcode(struct amdgpu_device *adev,
1427                                           char *chip_name)
1428 {
1429         int err;
1430
1431         err = amdgpu_ucode_request(adev, &adev->gfx.pfp_fw,
1432                                    "amdgpu/%s_pfp.bin", chip_name);
1433         if (err)
1434                 goto out;
1435         amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_PFP);
1436
1437         err = amdgpu_ucode_request(adev, &adev->gfx.me_fw,
1438                                    "amdgpu/%s_me.bin", chip_name);
1439         if (err)
1440                 goto out;
1441         amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_ME);
1442
1443         err = amdgpu_ucode_request(adev, &adev->gfx.ce_fw,
1444                                    "amdgpu/%s_ce.bin", chip_name);
1445         if (err)
1446                 goto out;
1447         amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_CE);
1448
1449 out:
1450         if (err) {
1451                 amdgpu_ucode_release(&adev->gfx.pfp_fw);
1452                 amdgpu_ucode_release(&adev->gfx.me_fw);
1453                 amdgpu_ucode_release(&adev->gfx.ce_fw);
1454         }
1455         return err;
1456 }
1457
1458 static int gfx_v9_0_init_rlc_microcode(struct amdgpu_device *adev,
1459                                        char *chip_name)
1460 {
1461         int err;
1462         const struct rlc_firmware_header_v2_0 *rlc_hdr;
1463         uint16_t version_major;
1464         uint16_t version_minor;
1465         uint32_t smu_version;
1466
1467         /*
1468          * For Picasso && AM4 SOCKET board, we use picasso_rlc_am4.bin
1469          * instead of picasso_rlc.bin.
1470          * Judgment method:
1471          * PCO AM4: revision >= 0xC8 && revision <= 0xCF
1472          *          or revision >= 0xD8 && revision <= 0xDF
1473          * otherwise is PCO FP5
1474          */
1475         if (!strcmp(chip_name, "picasso") &&
1476                 (((adev->pdev->revision >= 0xC8) && (adev->pdev->revision <= 0xCF)) ||
1477                 ((adev->pdev->revision >= 0xD8) && (adev->pdev->revision <= 0xDF))))
1478                 err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw,
1479                                            "amdgpu/%s_rlc_am4.bin", chip_name);
1480         else if (!strcmp(chip_name, "raven") && (amdgpu_pm_load_smu_firmware(adev, &smu_version) == 0) &&
1481                 (smu_version >= 0x41e2b))
1482                 /**
1483                 *SMC is loaded by SBIOS on APU and it's able to get the SMU version directly.
1484                 */
1485                 err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw,
1486                                            "amdgpu/%s_kicker_rlc.bin", chip_name);
1487         else
1488                 err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw,
1489                                            "amdgpu/%s_rlc.bin", chip_name);
1490         if (err)
1491                 goto out;
1492
1493         rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
1494         version_major = le16_to_cpu(rlc_hdr->header.header_version_major);
1495         version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor);
1496         err = amdgpu_gfx_rlc_init_microcode(adev, version_major, version_minor);
1497 out:
1498         if (err)
1499                 amdgpu_ucode_release(&adev->gfx.rlc_fw);
1500
1501         return err;
1502 }
1503
1504 static bool gfx_v9_0_load_mec2_fw_bin_support(struct amdgpu_device *adev)
1505 {
1506         if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 2) ||
1507             amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 1) ||
1508             amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 3, 0))
1509                 return false;
1510
1511         return true;
1512 }
1513
1514 static int gfx_v9_0_init_cp_compute_microcode(struct amdgpu_device *adev,
1515                                               char *chip_name)
1516 {
1517         int err;
1518
1519         if (amdgpu_sriov_vf(adev) && (adev->asic_type == CHIP_ALDEBARAN))
1520                 err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw,
1521                                            "amdgpu/%s_sjt_mec.bin", chip_name);
1522         else
1523                 err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw,
1524                                            "amdgpu/%s_mec.bin", chip_name);
1525         if (err)
1526                 goto out;
1527
1528         amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC1);
1529         amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC1_JT);
1530
1531         if (gfx_v9_0_load_mec2_fw_bin_support(adev)) {
1532                 if (amdgpu_sriov_vf(adev) && (adev->asic_type == CHIP_ALDEBARAN))
1533                         err = amdgpu_ucode_request(adev, &adev->gfx.mec2_fw,
1534                                                    "amdgpu/%s_sjt_mec2.bin", chip_name);
1535                 else
1536                         err = amdgpu_ucode_request(adev, &adev->gfx.mec2_fw,
1537                                                    "amdgpu/%s_mec2.bin", chip_name);
1538                 if (!err) {
1539                         amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC2);
1540                         amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC2_JT);
1541                 } else {
1542                         err = 0;
1543                         amdgpu_ucode_release(&adev->gfx.mec2_fw);
1544                 }
1545         } else {
1546                 adev->gfx.mec2_fw_version = adev->gfx.mec_fw_version;
1547                 adev->gfx.mec2_feature_version = adev->gfx.mec_feature_version;
1548         }
1549
1550         gfx_v9_0_check_if_need_gfxoff(adev);
1551         gfx_v9_0_check_fw_write_wait(adev);
1552
1553 out:
1554         if (err)
1555                 amdgpu_ucode_release(&adev->gfx.mec_fw);
1556         return err;
1557 }
1558
1559 static int gfx_v9_0_init_microcode(struct amdgpu_device *adev)
1560 {
1561         char ucode_prefix[30];
1562         int r;
1563
1564         DRM_DEBUG("\n");
1565         amdgpu_ucode_ip_version_decode(adev, GC_HWIP, ucode_prefix, sizeof(ucode_prefix));
1566
1567         /* No CPG in Arcturus */
1568         if (adev->gfx.num_gfx_rings) {
1569                 r = gfx_v9_0_init_cp_gfx_microcode(adev, ucode_prefix);
1570                 if (r)
1571                         return r;
1572         }
1573
1574         r = gfx_v9_0_init_rlc_microcode(adev, ucode_prefix);
1575         if (r)
1576                 return r;
1577
1578         r = gfx_v9_0_init_cp_compute_microcode(adev, ucode_prefix);
1579         if (r)
1580                 return r;
1581
1582         return r;
1583 }
1584
1585 static u32 gfx_v9_0_get_csb_size(struct amdgpu_device *adev)
1586 {
1587         u32 count = 0;
1588         const struct cs_section_def *sect = NULL;
1589         const struct cs_extent_def *ext = NULL;
1590
1591         /* begin clear state */
1592         count += 2;
1593         /* context control state */
1594         count += 3;
1595
1596         for (sect = gfx9_cs_data; sect->section != NULL; ++sect) {
1597                 for (ext = sect->section; ext->extent != NULL; ++ext) {
1598                         if (sect->id == SECT_CONTEXT)
1599                                 count += 2 + ext->reg_count;
1600                         else
1601                                 return 0;
1602                 }
1603         }
1604
1605         /* end clear state */
1606         count += 2;
1607         /* clear state */
1608         count += 2;
1609
1610         return count;
1611 }
1612
1613 static void gfx_v9_0_get_csb_buffer(struct amdgpu_device *adev,
1614                                     volatile u32 *buffer)
1615 {
1616         u32 count = 0, i;
1617         const struct cs_section_def *sect = NULL;
1618         const struct cs_extent_def *ext = NULL;
1619
1620         if (adev->gfx.rlc.cs_data == NULL)
1621                 return;
1622         if (buffer == NULL)
1623                 return;
1624
1625         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1626         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1627
1628         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
1629         buffer[count++] = cpu_to_le32(0x80000000);
1630         buffer[count++] = cpu_to_le32(0x80000000);
1631
1632         for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
1633                 for (ext = sect->section; ext->extent != NULL; ++ext) {
1634                         if (sect->id == SECT_CONTEXT) {
1635                                 buffer[count++] =
1636                                         cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
1637                                 buffer[count++] = cpu_to_le32(ext->reg_index -
1638                                                 PACKET3_SET_CONTEXT_REG_START);
1639                                 for (i = 0; i < ext->reg_count; i++)
1640                                         buffer[count++] = cpu_to_le32(ext->extent[i]);
1641                         } else {
1642                                 return;
1643                         }
1644                 }
1645         }
1646
1647         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1648         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
1649
1650         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
1651         buffer[count++] = cpu_to_le32(0);
1652 }
1653
1654 static void gfx_v9_0_init_always_on_cu_mask(struct amdgpu_device *adev)
1655 {
1656         struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
1657         uint32_t pg_always_on_cu_num = 2;
1658         uint32_t always_on_cu_num;
1659         uint32_t i, j, k;
1660         uint32_t mask, cu_bitmap, counter;
1661
1662         if (adev->flags & AMD_IS_APU)
1663                 always_on_cu_num = 4;
1664         else if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 2, 1))
1665                 always_on_cu_num = 8;
1666         else
1667                 always_on_cu_num = 12;
1668
1669         mutex_lock(&adev->grbm_idx_mutex);
1670         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
1671                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
1672                         mask = 1;
1673                         cu_bitmap = 0;
1674                         counter = 0;
1675                         amdgpu_gfx_select_se_sh(adev, i, j, 0xffffffff, 0);
1676
1677                         for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
1678                                 if (cu_info->bitmap[0][i][j] & mask) {
1679                                         if (counter == pg_always_on_cu_num)
1680                                                 WREG32_SOC15(GC, 0, mmRLC_PG_ALWAYS_ON_CU_MASK, cu_bitmap);
1681                                         if (counter < always_on_cu_num)
1682                                                 cu_bitmap |= mask;
1683                                         else
1684                                                 break;
1685                                         counter++;
1686                                 }
1687                                 mask <<= 1;
1688                         }
1689
1690                         WREG32_SOC15(GC, 0, mmRLC_LB_ALWAYS_ACTIVE_CU_MASK, cu_bitmap);
1691                         cu_info->ao_cu_bitmap[i][j] = cu_bitmap;
1692                 }
1693         }
1694         amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
1695         mutex_unlock(&adev->grbm_idx_mutex);
1696 }
1697
1698 static void gfx_v9_0_init_lbpw(struct amdgpu_device *adev)
1699 {
1700         uint32_t data;
1701
1702         /* set mmRLC_LB_THR_CONFIG_1/2/3/4 */
1703         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F);
1704         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x0333A5A7);
1705         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077);
1706         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x30 | 0x40 << 8 | 0x02FA << 16));
1707
1708         /* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */
1709         WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000);
1710
1711         /* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */
1712         WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000500);
1713
1714         mutex_lock(&adev->grbm_idx_mutex);
1715         /* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/
1716         amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
1717         WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff);
1718
1719         /* set mmRLC_LB_PARAMS = 0x003F_1006 */
1720         data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003);
1721         data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010);
1722         data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F);
1723         WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data);
1724
1725         /* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */
1726         data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7);
1727         data &= 0x0000FFFF;
1728         data |= 0x00C00000;
1729         WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data);
1730
1731         /*
1732          * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xF (4 CUs AON for Raven),
1733          * programmed in gfx_v9_0_init_always_on_cu_mask()
1734          */
1735
1736         /* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved,
1737          * but used for RLC_LB_CNTL configuration */
1738         data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK;
1739         data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09);
1740         data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000);
1741         WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data);
1742         mutex_unlock(&adev->grbm_idx_mutex);
1743
1744         gfx_v9_0_init_always_on_cu_mask(adev);
1745 }
1746
1747 static void gfx_v9_4_init_lbpw(struct amdgpu_device *adev)
1748 {
1749         uint32_t data;
1750
1751         /* set mmRLC_LB_THR_CONFIG_1/2/3/4 */
1752         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F);
1753         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x033388F8);
1754         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077);
1755         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x10 | 0x27 << 8 | 0x02FA << 16));
1756
1757         /* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */
1758         WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000);
1759
1760         /* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */
1761         WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000800);
1762
1763         mutex_lock(&adev->grbm_idx_mutex);
1764         /* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/
1765         amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
1766         WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff);
1767
1768         /* set mmRLC_LB_PARAMS = 0x003F_1006 */
1769         data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003);
1770         data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010);
1771         data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F);
1772         WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data);
1773
1774         /* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */
1775         data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7);
1776         data &= 0x0000FFFF;
1777         data |= 0x00C00000;
1778         WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data);
1779
1780         /*
1781          * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xFFF (12 CUs AON),
1782          * programmed in gfx_v9_0_init_always_on_cu_mask()
1783          */
1784
1785         /* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved,
1786          * but used for RLC_LB_CNTL configuration */
1787         data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK;
1788         data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09);
1789         data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000);
1790         WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data);
1791         mutex_unlock(&adev->grbm_idx_mutex);
1792
1793         gfx_v9_0_init_always_on_cu_mask(adev);
1794 }
1795
1796 static void gfx_v9_0_enable_lbpw(struct amdgpu_device *adev, bool enable)
1797 {
1798         WREG32_FIELD15(GC, 0, RLC_LB_CNTL, LOAD_BALANCE_ENABLE, enable ? 1 : 0);
1799 }
1800
1801 static int gfx_v9_0_cp_jump_table_num(struct amdgpu_device *adev)
1802 {
1803         if (gfx_v9_0_load_mec2_fw_bin_support(adev))
1804                 return 5;
1805         else
1806                 return 4;
1807 }
1808
1809 static void gfx_v9_0_init_rlcg_reg_access_ctrl(struct amdgpu_device *adev)
1810 {
1811         struct amdgpu_rlcg_reg_access_ctrl *reg_access_ctrl;
1812
1813         reg_access_ctrl = &adev->gfx.rlc.reg_access_ctrl[0];
1814         reg_access_ctrl->scratch_reg0 = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0);
1815         reg_access_ctrl->scratch_reg1 = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG1);
1816         reg_access_ctrl->scratch_reg2 = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG2);
1817         reg_access_ctrl->scratch_reg3 = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG3);
1818         reg_access_ctrl->grbm_cntl = SOC15_REG_OFFSET(GC, 0, mmGRBM_GFX_CNTL);
1819         reg_access_ctrl->grbm_idx = SOC15_REG_OFFSET(GC, 0, mmGRBM_GFX_INDEX);
1820         reg_access_ctrl->spare_int = SOC15_REG_OFFSET(GC, 0, mmRLC_SPARE_INT);
1821         adev->gfx.rlc.rlcg_reg_access_supported = true;
1822 }
1823
1824 static int gfx_v9_0_rlc_init(struct amdgpu_device *adev)
1825 {
1826         const struct cs_section_def *cs_data;
1827         int r;
1828
1829         adev->gfx.rlc.cs_data = gfx9_cs_data;
1830
1831         cs_data = adev->gfx.rlc.cs_data;
1832
1833         if (cs_data) {
1834                 /* init clear state block */
1835                 r = amdgpu_gfx_rlc_init_csb(adev);
1836                 if (r)
1837                         return r;
1838         }
1839
1840         if (adev->flags & AMD_IS_APU) {
1841                 /* TODO: double check the cp_table_size for RV */
1842                 adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
1843                 r = amdgpu_gfx_rlc_init_cpt(adev);
1844                 if (r)
1845                         return r;
1846         }
1847
1848         return 0;
1849 }
1850
1851 static void gfx_v9_0_mec_fini(struct amdgpu_device *adev)
1852 {
1853         amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
1854         amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_obj, NULL, NULL);
1855 }
1856
1857 static int gfx_v9_0_mec_init(struct amdgpu_device *adev)
1858 {
1859         int r;
1860         u32 *hpd;
1861         const __le32 *fw_data;
1862         unsigned fw_size;
1863         u32 *fw;
1864         size_t mec_hpd_size;
1865
1866         const struct gfx_firmware_header_v1_0 *mec_hdr;
1867
1868         bitmap_zero(adev->gfx.mec_bitmap[0].queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
1869
1870         /* take ownership of the relevant compute queues */
1871         amdgpu_gfx_compute_queue_acquire(adev);
1872         mec_hpd_size = adev->gfx.num_compute_rings * GFX9_MEC_HPD_SIZE;
1873         if (mec_hpd_size) {
1874                 r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
1875                                               AMDGPU_GEM_DOMAIN_VRAM |
1876                                               AMDGPU_GEM_DOMAIN_GTT,
1877                                               &adev->gfx.mec.hpd_eop_obj,
1878                                               &adev->gfx.mec.hpd_eop_gpu_addr,
1879                                               (void **)&hpd);
1880                 if (r) {
1881                         dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
1882                         gfx_v9_0_mec_fini(adev);
1883                         return r;
1884                 }
1885
1886                 memset(hpd, 0, mec_hpd_size);
1887
1888                 amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
1889                 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1890         }
1891
1892         mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1893
1894         fw_data = (const __le32 *)
1895                 (adev->gfx.mec_fw->data +
1896                  le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
1897         fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes);
1898
1899         r = amdgpu_bo_create_reserved(adev, mec_hdr->header.ucode_size_bytes,
1900                                       PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
1901                                       &adev->gfx.mec.mec_fw_obj,
1902                                       &adev->gfx.mec.mec_fw_gpu_addr,
1903                                       (void **)&fw);
1904         if (r) {
1905                 dev_warn(adev->dev, "(%d) create mec firmware bo failed\n", r);
1906                 gfx_v9_0_mec_fini(adev);
1907                 return r;
1908         }
1909
1910         memcpy(fw, fw_data, fw_size);
1911
1912         amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj);
1913         amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj);
1914
1915         return 0;
1916 }
1917
1918 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
1919 {
1920         WREG32_SOC15_RLC(GC, 0, mmSQ_IND_INDEX,
1921                 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
1922                 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
1923                 (address << SQ_IND_INDEX__INDEX__SHIFT) |
1924                 (SQ_IND_INDEX__FORCE_READ_MASK));
1925         return RREG32_SOC15(GC, 0, mmSQ_IND_DATA);
1926 }
1927
1928 static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
1929                            uint32_t wave, uint32_t thread,
1930                            uint32_t regno, uint32_t num, uint32_t *out)
1931 {
1932         WREG32_SOC15_RLC(GC, 0, mmSQ_IND_INDEX,
1933                 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
1934                 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
1935                 (regno << SQ_IND_INDEX__INDEX__SHIFT) |
1936                 (thread << SQ_IND_INDEX__THREAD_ID__SHIFT) |
1937                 (SQ_IND_INDEX__FORCE_READ_MASK) |
1938                 (SQ_IND_INDEX__AUTO_INCR_MASK));
1939         while (num--)
1940                 *(out++) = RREG32_SOC15(GC, 0, mmSQ_IND_DATA);
1941 }
1942
1943 static void gfx_v9_0_read_wave_data(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
1944 {
1945         /* type 1 wave data */
1946         dst[(*no_fields)++] = 1;
1947         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS);
1948         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO);
1949         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI);
1950         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO);
1951         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI);
1952         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID);
1953         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0);
1954         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1);
1955         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC);
1956         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC);
1957         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS);
1958         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS);
1959         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0);
1960         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0);
1961         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_MODE);
1962 }
1963
1964 static void gfx_v9_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd,
1965                                      uint32_t wave, uint32_t start,
1966                                      uint32_t size, uint32_t *dst)
1967 {
1968         wave_read_regs(
1969                 adev, simd, wave, 0,
1970                 start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
1971 }
1972
1973 static void gfx_v9_0_read_wave_vgprs(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd,
1974                                      uint32_t wave, uint32_t thread,
1975                                      uint32_t start, uint32_t size,
1976                                      uint32_t *dst)
1977 {
1978         wave_read_regs(
1979                 adev, simd, wave, thread,
1980                 start + SQIND_WAVE_VGPRS_OFFSET, size, dst);
1981 }
1982
1983 static void gfx_v9_0_select_me_pipe_q(struct amdgpu_device *adev,
1984                                   u32 me, u32 pipe, u32 q, u32 vm, u32 xcc_id)
1985 {
1986         soc15_grbm_select(adev, me, pipe, q, vm, 0);
1987 }
1988
1989 static const struct amdgpu_gfx_funcs gfx_v9_0_gfx_funcs = {
1990         .get_gpu_clock_counter = &gfx_v9_0_get_gpu_clock_counter,
1991         .select_se_sh = &gfx_v9_0_select_se_sh,
1992         .read_wave_data = &gfx_v9_0_read_wave_data,
1993         .read_wave_sgprs = &gfx_v9_0_read_wave_sgprs,
1994         .read_wave_vgprs = &gfx_v9_0_read_wave_vgprs,
1995         .select_me_pipe_q = &gfx_v9_0_select_me_pipe_q,
1996 };
1997
1998 const struct amdgpu_ras_block_hw_ops  gfx_v9_0_ras_ops = {
1999                 .ras_error_inject = &gfx_v9_0_ras_error_inject,
2000                 .query_ras_error_count = &gfx_v9_0_query_ras_error_count,
2001                 .reset_ras_error_count = &gfx_v9_0_reset_ras_error_count,
2002 };
2003
2004 static struct amdgpu_gfx_ras gfx_v9_0_ras = {
2005         .ras_block = {
2006                 .hw_ops = &gfx_v9_0_ras_ops,
2007         },
2008 };
2009
2010 static int gfx_v9_0_gpu_early_init(struct amdgpu_device *adev)
2011 {
2012         u32 gb_addr_config;
2013         int err;
2014
2015         switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
2016         case IP_VERSION(9, 0, 1):
2017                 adev->gfx.config.max_hw_contexts = 8;
2018                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2019                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2020                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2021                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2022                 gb_addr_config = VEGA10_GB_ADDR_CONFIG_GOLDEN;
2023                 break;
2024         case IP_VERSION(9, 2, 1):
2025                 adev->gfx.config.max_hw_contexts = 8;
2026                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2027                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2028                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2029                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2030                 gb_addr_config = VEGA12_GB_ADDR_CONFIG_GOLDEN;
2031                 DRM_INFO("fix gfx.config for vega12\n");
2032                 break;
2033         case IP_VERSION(9, 4, 0):
2034                 adev->gfx.ras = &gfx_v9_0_ras;
2035                 adev->gfx.config.max_hw_contexts = 8;
2036                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2037                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2038                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2039                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2040                 gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
2041                 gb_addr_config &= ~0xf3e777ff;
2042                 gb_addr_config |= 0x22014042;
2043                 /* check vbios table if gpu info is not available */
2044                 err = amdgpu_atomfirmware_get_gfx_info(adev);
2045                 if (err)
2046                         return err;
2047                 break;
2048         case IP_VERSION(9, 2, 2):
2049         case IP_VERSION(9, 1, 0):
2050                 adev->gfx.config.max_hw_contexts = 8;
2051                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2052                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2053                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2054                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2055                 if (adev->apu_flags & AMD_APU_IS_RAVEN2)
2056                         gb_addr_config = RAVEN2_GB_ADDR_CONFIG_GOLDEN;
2057                 else
2058                         gb_addr_config = RAVEN_GB_ADDR_CONFIG_GOLDEN;
2059                 break;
2060         case IP_VERSION(9, 4, 1):
2061                 adev->gfx.ras = &gfx_v9_4_ras;
2062                 adev->gfx.config.max_hw_contexts = 8;
2063                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2064                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2065                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2066                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2067                 gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
2068                 gb_addr_config &= ~0xf3e777ff;
2069                 gb_addr_config |= 0x22014042;
2070                 break;
2071         case IP_VERSION(9, 3, 0):
2072                 adev->gfx.config.max_hw_contexts = 8;
2073                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2074                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2075                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x80;
2076                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2077                 gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
2078                 gb_addr_config &= ~0xf3e777ff;
2079                 gb_addr_config |= 0x22010042;
2080                 break;
2081         case IP_VERSION(9, 4, 2):
2082                 adev->gfx.ras = &gfx_v9_4_2_ras;
2083                 adev->gfx.config.max_hw_contexts = 8;
2084                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2085                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2086                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2087                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2088                 gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
2089                 gb_addr_config &= ~0xf3e777ff;
2090                 gb_addr_config |= 0x22014042;
2091                 /* check vbios table if gpu info is not available */
2092                 err = amdgpu_atomfirmware_get_gfx_info(adev);
2093                 if (err)
2094                         return err;
2095                 break;
2096         default:
2097                 BUG();
2098                 break;
2099         }
2100
2101         adev->gfx.config.gb_addr_config = gb_addr_config;
2102
2103         adev->gfx.config.gb_addr_config_fields.num_pipes = 1 <<
2104                         REG_GET_FIELD(
2105                                         adev->gfx.config.gb_addr_config,
2106                                         GB_ADDR_CONFIG,
2107                                         NUM_PIPES);
2108
2109         adev->gfx.config.max_tile_pipes =
2110                 adev->gfx.config.gb_addr_config_fields.num_pipes;
2111
2112         adev->gfx.config.gb_addr_config_fields.num_banks = 1 <<
2113                         REG_GET_FIELD(
2114                                         adev->gfx.config.gb_addr_config,
2115                                         GB_ADDR_CONFIG,
2116                                         NUM_BANKS);
2117         adev->gfx.config.gb_addr_config_fields.max_compress_frags = 1 <<
2118                         REG_GET_FIELD(
2119                                         adev->gfx.config.gb_addr_config,
2120                                         GB_ADDR_CONFIG,
2121                                         MAX_COMPRESSED_FRAGS);
2122         adev->gfx.config.gb_addr_config_fields.num_rb_per_se = 1 <<
2123                         REG_GET_FIELD(
2124                                         adev->gfx.config.gb_addr_config,
2125                                         GB_ADDR_CONFIG,
2126                                         NUM_RB_PER_SE);
2127         adev->gfx.config.gb_addr_config_fields.num_se = 1 <<
2128                         REG_GET_FIELD(
2129                                         adev->gfx.config.gb_addr_config,
2130                                         GB_ADDR_CONFIG,
2131                                         NUM_SHADER_ENGINES);
2132         adev->gfx.config.gb_addr_config_fields.pipe_interleave_size = 1 << (8 +
2133                         REG_GET_FIELD(
2134                                         adev->gfx.config.gb_addr_config,
2135                                         GB_ADDR_CONFIG,
2136                                         PIPE_INTERLEAVE_SIZE));
2137
2138         return 0;
2139 }
2140
2141 static int gfx_v9_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
2142                                       int mec, int pipe, int queue)
2143 {
2144         unsigned irq_type;
2145         struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
2146         unsigned int hw_prio;
2147
2148         ring = &adev->gfx.compute_ring[ring_id];
2149
2150         /* mec0 is me1 */
2151         ring->me = mec + 1;
2152         ring->pipe = pipe;
2153         ring->queue = queue;
2154
2155         ring->ring_obj = NULL;
2156         ring->use_doorbell = true;
2157         ring->doorbell_index = (adev->doorbell_index.mec_ring0 + ring_id) << 1;
2158         ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
2159                                 + (ring_id * GFX9_MEC_HPD_SIZE);
2160         ring->vm_hub = AMDGPU_GFXHUB(0);
2161         sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
2162
2163         irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
2164                 + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
2165                 + ring->pipe;
2166         hw_prio = amdgpu_gfx_is_high_priority_compute_queue(adev, ring) ?
2167                         AMDGPU_RING_PRIO_2 : AMDGPU_RING_PRIO_DEFAULT;
2168         /* type-2 packets are deprecated on MEC, use type-3 instead */
2169         return amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, irq_type,
2170                                 hw_prio, NULL);
2171 }
2172
2173 static void gfx_v9_0_alloc_ip_dump(struct amdgpu_device *adev)
2174 {
2175         uint32_t reg_count = ARRAY_SIZE(gc_reg_list_9);
2176         uint32_t *ptr;
2177         uint32_t inst;
2178
2179         ptr = kcalloc(reg_count, sizeof(uint32_t), GFP_KERNEL);
2180         if (!ptr) {
2181                 DRM_ERROR("Failed to allocate memory for GFX IP Dump\n");
2182                 adev->gfx.ip_dump_core = NULL;
2183         } else {
2184                 adev->gfx.ip_dump_core = ptr;
2185         }
2186
2187         /* Allocate memory for compute queue registers for all the instances */
2188         reg_count = ARRAY_SIZE(gc_cp_reg_list_9);
2189         inst = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe_per_mec *
2190                 adev->gfx.mec.num_queue_per_pipe;
2191
2192         ptr = kcalloc(reg_count * inst, sizeof(uint32_t), GFP_KERNEL);
2193         if (!ptr) {
2194                 DRM_ERROR("Failed to allocate memory for Compute Queues IP Dump\n");
2195                 adev->gfx.ip_dump_compute_queues = NULL;
2196         } else {
2197                 adev->gfx.ip_dump_compute_queues = ptr;
2198         }
2199 }
2200
2201 static int gfx_v9_0_sw_init(struct amdgpu_ip_block *ip_block)
2202 {
2203         int i, j, k, r, ring_id;
2204         int xcc_id = 0;
2205         struct amdgpu_ring *ring;
2206         struct amdgpu_device *adev = ip_block->adev;
2207         unsigned int hw_prio;
2208
2209         switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
2210         case IP_VERSION(9, 0, 1):
2211         case IP_VERSION(9, 2, 1):
2212         case IP_VERSION(9, 4, 0):
2213         case IP_VERSION(9, 2, 2):
2214         case IP_VERSION(9, 1, 0):
2215         case IP_VERSION(9, 4, 1):
2216         case IP_VERSION(9, 3, 0):
2217         case IP_VERSION(9, 4, 2):
2218                 adev->gfx.mec.num_mec = 2;
2219                 break;
2220         default:
2221                 adev->gfx.mec.num_mec = 1;
2222                 break;
2223         }
2224
2225         switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
2226         case IP_VERSION(9, 4, 2):
2227                 adev->gfx.cleaner_shader_ptr = gfx_9_4_2_cleaner_shader_hex;
2228                 adev->gfx.cleaner_shader_size = sizeof(gfx_9_4_2_cleaner_shader_hex);
2229                 if (adev->gfx.mec_fw_version >= 88) {
2230                         adev->gfx.enable_cleaner_shader = true;
2231                         r = amdgpu_gfx_cleaner_shader_sw_init(adev, adev->gfx.cleaner_shader_size);
2232                         if (r) {
2233                                 adev->gfx.enable_cleaner_shader = false;
2234                                 dev_err(adev->dev, "Failed to initialize cleaner shader\n");
2235                         }
2236                 }
2237                 break;
2238         default:
2239                 adev->gfx.enable_cleaner_shader = false;
2240                 break;
2241         }
2242
2243         adev->gfx.mec.num_pipe_per_mec = 4;
2244         adev->gfx.mec.num_queue_per_pipe = 8;
2245
2246         /* EOP Event */
2247         r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_EOP_INTERRUPT, &adev->gfx.eop_irq);
2248         if (r)
2249                 return r;
2250
2251         /* Bad opcode Event */
2252         r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP,
2253                               GFX_9_0__SRCID__CP_BAD_OPCODE_ERROR,
2254                               &adev->gfx.bad_op_irq);
2255         if (r)
2256                 return r;
2257
2258         /* Privileged reg */
2259         r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_REG_FAULT,
2260                               &adev->gfx.priv_reg_irq);
2261         if (r)
2262                 return r;
2263
2264         /* Privileged inst */
2265         r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_INSTR_FAULT,
2266                               &adev->gfx.priv_inst_irq);
2267         if (r)
2268                 return r;
2269
2270         /* ECC error */
2271         r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_ECC_ERROR,
2272                               &adev->gfx.cp_ecc_error_irq);
2273         if (r)
2274                 return r;
2275
2276         /* FUE error */
2277         r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_FUE_ERROR,
2278                               &adev->gfx.cp_ecc_error_irq);
2279         if (r)
2280                 return r;
2281
2282         adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
2283
2284         if (adev->gfx.rlc.funcs) {
2285                 if (adev->gfx.rlc.funcs->init) {
2286                         r = adev->gfx.rlc.funcs->init(adev);
2287                         if (r) {
2288                                 dev_err(adev->dev, "Failed to init rlc BOs!\n");
2289                                 return r;
2290                         }
2291                 }
2292         }
2293
2294         r = gfx_v9_0_mec_init(adev);
2295         if (r) {
2296                 DRM_ERROR("Failed to init MEC BOs!\n");
2297                 return r;
2298         }
2299
2300         /* set up the gfx ring */
2301         for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
2302                 ring = &adev->gfx.gfx_ring[i];
2303                 ring->ring_obj = NULL;
2304                 if (!i)
2305                         sprintf(ring->name, "gfx");
2306                 else
2307                         sprintf(ring->name, "gfx_%d", i);
2308                 ring->use_doorbell = true;
2309                 ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1;
2310
2311                 /* disable scheduler on the real ring */
2312                 ring->no_scheduler = adev->gfx.mcbp;
2313                 ring->vm_hub = AMDGPU_GFXHUB(0);
2314                 r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
2315                                      AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP,
2316                                      AMDGPU_RING_PRIO_DEFAULT, NULL);
2317                 if (r)
2318                         return r;
2319         }
2320
2321         /* set up the software rings */
2322         if (adev->gfx.mcbp && adev->gfx.num_gfx_rings) {
2323                 for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++) {
2324                         ring = &adev->gfx.sw_gfx_ring[i];
2325                         ring->ring_obj = NULL;
2326                         sprintf(ring->name, amdgpu_sw_ring_name(i));
2327                         ring->use_doorbell = true;
2328                         ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1;
2329                         ring->is_sw_ring = true;
2330                         hw_prio = amdgpu_sw_ring_priority(i);
2331                         ring->vm_hub = AMDGPU_GFXHUB(0);
2332                         r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
2333                                              AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP, hw_prio,
2334                                              NULL);
2335                         if (r)
2336                                 return r;
2337                         ring->wptr = 0;
2338                 }
2339
2340                 /* init the muxer and add software rings */
2341                 r = amdgpu_ring_mux_init(&adev->gfx.muxer, &adev->gfx.gfx_ring[0],
2342                                          GFX9_NUM_SW_GFX_RINGS);
2343                 if (r) {
2344                         DRM_ERROR("amdgpu_ring_mux_init failed(%d)\n", r);
2345                         return r;
2346                 }
2347                 for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++) {
2348                         r = amdgpu_ring_mux_add_sw_ring(&adev->gfx.muxer,
2349                                                         &adev->gfx.sw_gfx_ring[i]);
2350                         if (r) {
2351                                 DRM_ERROR("amdgpu_ring_mux_add_sw_ring failed(%d)\n", r);
2352                                 return r;
2353                         }
2354                 }
2355         }
2356
2357         /* set up the compute queues - allocate horizontally across pipes */
2358         ring_id = 0;
2359         for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
2360                 for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
2361                         for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
2362                                 if (!amdgpu_gfx_is_mec_queue_enabled(adev, 0, i,
2363                                                                      k, j))
2364                                         continue;
2365
2366                                 r = gfx_v9_0_compute_ring_init(adev,
2367                                                                ring_id,
2368                                                                i, k, j);
2369                                 if (r)
2370                                         return r;
2371
2372                                 ring_id++;
2373                         }
2374                 }
2375         }
2376
2377         /* TODO: Add queue reset mask when FW fully supports it */
2378         adev->gfx.gfx_supported_reset =
2379                 amdgpu_get_soft_full_reset_mask(&adev->gfx.gfx_ring[0]);
2380         adev->gfx.compute_supported_reset =
2381                 amdgpu_get_soft_full_reset_mask(&adev->gfx.compute_ring[0]);
2382
2383         r = amdgpu_gfx_kiq_init(adev, GFX9_MEC_HPD_SIZE, 0);
2384         if (r) {
2385                 DRM_ERROR("Failed to init KIQ BOs!\n");
2386                 return r;
2387         }
2388
2389         r = amdgpu_gfx_kiq_init_ring(adev, xcc_id);
2390         if (r)
2391                 return r;
2392
2393         /* create MQD for all compute queues as wel as KIQ for SRIOV case */
2394         r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct v9_mqd_allocation), 0);
2395         if (r)
2396                 return r;
2397
2398         adev->gfx.ce_ram_size = 0x8000;
2399
2400         r = gfx_v9_0_gpu_early_init(adev);
2401         if (r)
2402                 return r;
2403
2404         if (amdgpu_gfx_ras_sw_init(adev)) {
2405                 dev_err(adev->dev, "Failed to initialize gfx ras block!\n");
2406                 return -EINVAL;
2407         }
2408
2409         gfx_v9_0_alloc_ip_dump(adev);
2410
2411         r = amdgpu_gfx_sysfs_init(adev);
2412         if (r)
2413                 return r;
2414
2415         return 0;
2416 }
2417
2418
2419 static int gfx_v9_0_sw_fini(struct amdgpu_ip_block *ip_block)
2420 {
2421         int i;
2422         struct amdgpu_device *adev = ip_block->adev;
2423
2424         if (adev->gfx.mcbp && adev->gfx.num_gfx_rings) {
2425                 for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++)
2426                         amdgpu_ring_fini(&adev->gfx.sw_gfx_ring[i]);
2427                 amdgpu_ring_mux_fini(&adev->gfx.muxer);
2428         }
2429
2430         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2431                 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
2432         for (i = 0; i < adev->gfx.num_compute_rings; i++)
2433                 amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
2434
2435         amdgpu_gfx_mqd_sw_fini(adev, 0);
2436         amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq[0].ring);
2437         amdgpu_gfx_kiq_fini(adev, 0);
2438
2439         amdgpu_gfx_cleaner_shader_sw_fini(adev);
2440
2441         gfx_v9_0_mec_fini(adev);
2442         amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj,
2443                                 &adev->gfx.rlc.clear_state_gpu_addr,
2444                                 (void **)&adev->gfx.rlc.cs_ptr);
2445         if (adev->flags & AMD_IS_APU) {
2446                 amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj,
2447                                 &adev->gfx.rlc.cp_table_gpu_addr,
2448                                 (void **)&adev->gfx.rlc.cp_table_ptr);
2449         }
2450         gfx_v9_0_free_microcode(adev);
2451
2452         amdgpu_gfx_sysfs_fini(adev);
2453
2454         kfree(adev->gfx.ip_dump_core);
2455         kfree(adev->gfx.ip_dump_compute_queues);
2456
2457         return 0;
2458 }
2459
2460
2461 static void gfx_v9_0_tiling_mode_table_init(struct amdgpu_device *adev)
2462 {
2463         /* TODO */
2464 }
2465
2466 void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num,
2467                            u32 instance, int xcc_id)
2468 {
2469         u32 data;
2470
2471         if (instance == 0xffffffff)
2472                 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
2473         else
2474                 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
2475
2476         if (se_num == 0xffffffff)
2477                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
2478         else
2479                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
2480
2481         if (sh_num == 0xffffffff)
2482                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
2483         else
2484                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
2485
2486         WREG32_SOC15_RLC_SHADOW(GC, 0, mmGRBM_GFX_INDEX, data);
2487 }
2488
2489 static u32 gfx_v9_0_get_rb_active_bitmap(struct amdgpu_device *adev)
2490 {
2491         u32 data, mask;
2492
2493         data = RREG32_SOC15(GC, 0, mmCC_RB_BACKEND_DISABLE);
2494         data |= RREG32_SOC15(GC, 0, mmGC_USER_RB_BACKEND_DISABLE);
2495
2496         data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK;
2497         data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT;
2498
2499         mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se /
2500                                          adev->gfx.config.max_sh_per_se);
2501
2502         return (~data) & mask;
2503 }
2504
2505 static void gfx_v9_0_setup_rb(struct amdgpu_device *adev)
2506 {
2507         int i, j;
2508         u32 data;
2509         u32 active_rbs = 0;
2510         u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
2511                                         adev->gfx.config.max_sh_per_se;
2512
2513         mutex_lock(&adev->grbm_idx_mutex);
2514         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
2515                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
2516                         amdgpu_gfx_select_se_sh(adev, i, j, 0xffffffff, 0);
2517                         data = gfx_v9_0_get_rb_active_bitmap(adev);
2518                         active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
2519                                                rb_bitmap_width_per_sh);
2520                 }
2521         }
2522         amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
2523         mutex_unlock(&adev->grbm_idx_mutex);
2524
2525         adev->gfx.config.backend_enable_mask = active_rbs;
2526         adev->gfx.config.num_rbs = hweight32(active_rbs);
2527 }
2528
2529 static void gfx_v9_0_debug_trap_config_init(struct amdgpu_device *adev,
2530                                 uint32_t first_vmid,
2531                                 uint32_t last_vmid)
2532 {
2533         uint32_t data;
2534         uint32_t trap_config_vmid_mask = 0;
2535         int i;
2536
2537         /* Calculate trap config vmid mask */
2538         for (i = first_vmid; i < last_vmid; i++)
2539                 trap_config_vmid_mask |= (1 << i);
2540
2541         data = REG_SET_FIELD(0, SPI_GDBG_TRAP_CONFIG,
2542                         VMID_SEL, trap_config_vmid_mask);
2543         data = REG_SET_FIELD(data, SPI_GDBG_TRAP_CONFIG,
2544                         TRAP_EN, 1);
2545         WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_CONFIG), data);
2546         WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_MASK), 0);
2547
2548         WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_DATA0), 0);
2549         WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_DATA1), 0);
2550 }
2551
2552 #define DEFAULT_SH_MEM_BASES    (0x6000)
2553 static void gfx_v9_0_init_compute_vmid(struct amdgpu_device *adev)
2554 {
2555         int i;
2556         uint32_t sh_mem_config;
2557         uint32_t sh_mem_bases;
2558
2559         /*
2560          * Configure apertures:
2561          * LDS:         0x60000000'00000000 - 0x60000001'00000000 (4GB)
2562          * Scratch:     0x60000001'00000000 - 0x60000002'00000000 (4GB)
2563          * GPUVM:       0x60010000'00000000 - 0x60020000'00000000 (1TB)
2564          */
2565         sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
2566
2567         sh_mem_config = SH_MEM_ADDRESS_MODE_64 |
2568                         SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
2569                         SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT;
2570
2571         mutex_lock(&adev->srbm_mutex);
2572         for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
2573                 soc15_grbm_select(adev, 0, 0, 0, i, 0);
2574                 /* CP and shaders */
2575                 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, sh_mem_config);
2576                 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, sh_mem_bases);
2577         }
2578         soc15_grbm_select(adev, 0, 0, 0, 0, 0);
2579         mutex_unlock(&adev->srbm_mutex);
2580
2581         /* Initialize all compute VMIDs to have no GDS, GWS, or OA
2582            access. These should be enabled by FW for target VMIDs. */
2583         for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
2584                 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * i, 0);
2585                 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * i, 0);
2586                 WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, i, 0);
2587                 WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, i, 0);
2588         }
2589 }
2590
2591 static void gfx_v9_0_init_gds_vmid(struct amdgpu_device *adev)
2592 {
2593         int vmid;
2594
2595         /*
2596          * Initialize all compute and user-gfx VMIDs to have no GDS, GWS, or OA
2597          * access. Compute VMIDs should be enabled by FW for target VMIDs,
2598          * the driver can enable them for graphics. VMID0 should maintain
2599          * access so that HWS firmware can save/restore entries.
2600          */
2601         for (vmid = 1; vmid < AMDGPU_NUM_VMID; vmid++) {
2602                 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * vmid, 0);
2603                 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * vmid, 0);
2604                 WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, vmid, 0);
2605                 WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, vmid, 0);
2606         }
2607 }
2608
2609 static void gfx_v9_0_init_sq_config(struct amdgpu_device *adev)
2610 {
2611         uint32_t tmp;
2612
2613         switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
2614         case IP_VERSION(9, 4, 1):
2615                 tmp = RREG32_SOC15(GC, 0, mmSQ_CONFIG);
2616                 tmp = REG_SET_FIELD(tmp, SQ_CONFIG, DISABLE_BARRIER_WAITCNT,
2617                                 !READ_ONCE(adev->barrier_has_auto_waitcnt));
2618                 WREG32_SOC15(GC, 0, mmSQ_CONFIG, tmp);
2619                 break;
2620         default:
2621                 break;
2622         }
2623 }
2624
2625 static void gfx_v9_0_constants_init(struct amdgpu_device *adev)
2626 {
2627         u32 tmp;
2628         int i;
2629
2630         WREG32_FIELD15_RLC(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff);
2631
2632         gfx_v9_0_tiling_mode_table_init(adev);
2633
2634         if (adev->gfx.num_gfx_rings)
2635                 gfx_v9_0_setup_rb(adev);
2636         gfx_v9_0_get_cu_info(adev, &adev->gfx.cu_info);
2637         adev->gfx.config.db_debug2 = RREG32_SOC15(GC, 0, mmDB_DEBUG2);
2638
2639         /* XXX SH_MEM regs */
2640         /* where to put LDS, scratch, GPUVM in FSA64 space */
2641         mutex_lock(&adev->srbm_mutex);
2642         for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB(0)].num_ids; i++) {
2643                 soc15_grbm_select(adev, 0, 0, 0, i, 0);
2644                 /* CP and shaders */
2645                 if (i == 0) {
2646                         tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
2647                                             SH_MEM_ALIGNMENT_MODE_UNALIGNED);
2648                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE,
2649                                             !!adev->gmc.noretry);
2650                         WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp);
2651                         WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, 0);
2652                 } else {
2653                         tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
2654                                             SH_MEM_ALIGNMENT_MODE_UNALIGNED);
2655                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE,
2656                                             !!adev->gmc.noretry);
2657                         WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp);
2658                         tmp = REG_SET_FIELD(0, SH_MEM_BASES, PRIVATE_BASE,
2659                                 (adev->gmc.private_aperture_start >> 48));
2660                         tmp = REG_SET_FIELD(tmp, SH_MEM_BASES, SHARED_BASE,
2661                                 (adev->gmc.shared_aperture_start >> 48));
2662                         WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, tmp);
2663                 }
2664         }
2665         soc15_grbm_select(adev, 0, 0, 0, 0, 0);
2666
2667         mutex_unlock(&adev->srbm_mutex);
2668
2669         gfx_v9_0_init_compute_vmid(adev);
2670         gfx_v9_0_init_gds_vmid(adev);
2671         gfx_v9_0_init_sq_config(adev);
2672 }
2673
2674 static void gfx_v9_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
2675 {
2676         u32 i, j, k;
2677         u32 mask;
2678
2679         mutex_lock(&adev->grbm_idx_mutex);
2680         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
2681                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
2682                         amdgpu_gfx_select_se_sh(adev, i, j, 0xffffffff, 0);
2683                         for (k = 0; k < adev->usec_timeout; k++) {
2684                                 if (RREG32_SOC15(GC, 0, mmRLC_SERDES_CU_MASTER_BUSY) == 0)
2685                                         break;
2686                                 udelay(1);
2687                         }
2688                         if (k == adev->usec_timeout) {
2689                                 amdgpu_gfx_select_se_sh(adev, 0xffffffff,
2690                                                       0xffffffff, 0xffffffff, 0);
2691                                 mutex_unlock(&adev->grbm_idx_mutex);
2692                                 DRM_INFO("Timeout wait for RLC serdes %u,%u\n",
2693                                          i, j);
2694                                 return;
2695                         }
2696                 }
2697         }
2698         amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
2699         mutex_unlock(&adev->grbm_idx_mutex);
2700
2701         mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
2702                 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
2703                 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
2704                 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
2705         for (k = 0; k < adev->usec_timeout; k++) {
2706                 if ((RREG32_SOC15(GC, 0, mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
2707                         break;
2708                 udelay(1);
2709         }
2710 }
2711
2712 static void gfx_v9_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
2713                                                bool enable)
2714 {
2715         u32 tmp;
2716
2717         /* These interrupts should be enabled to drive DS clock */
2718
2719         tmp= RREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0);
2720
2721         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
2722         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
2723         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
2724         if (adev->gfx.num_gfx_rings)
2725                 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
2726
2727         WREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0, tmp);
2728 }
2729
2730 static void gfx_v9_0_init_csb(struct amdgpu_device *adev)
2731 {
2732         adev->gfx.rlc.funcs->get_csb_buffer(adev, adev->gfx.rlc.cs_ptr);
2733         /* csib */
2734         WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_HI),
2735                         adev->gfx.rlc.clear_state_gpu_addr >> 32);
2736         WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_LO),
2737                         adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
2738         WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_LENGTH),
2739                         adev->gfx.rlc.clear_state_size);
2740 }
2741
2742 static void gfx_v9_1_parse_ind_reg_list(int *register_list_format,
2743                                 int indirect_offset,
2744                                 int list_size,
2745                                 int *unique_indirect_regs,
2746                                 int unique_indirect_reg_count,
2747                                 int *indirect_start_offsets,
2748                                 int *indirect_start_offsets_count,
2749                                 int max_start_offsets_count)
2750 {
2751         int idx;
2752
2753         for (; indirect_offset < list_size; indirect_offset++) {
2754                 WARN_ON(*indirect_start_offsets_count >= max_start_offsets_count);
2755                 indirect_start_offsets[*indirect_start_offsets_count] = indirect_offset;
2756                 *indirect_start_offsets_count = *indirect_start_offsets_count + 1;
2757
2758                 while (register_list_format[indirect_offset] != 0xFFFFFFFF) {
2759                         indirect_offset += 2;
2760
2761                         /* look for the matching indice */
2762                         for (idx = 0; idx < unique_indirect_reg_count; idx++) {
2763                                 if (unique_indirect_regs[idx] ==
2764                                         register_list_format[indirect_offset] ||
2765                                         !unique_indirect_regs[idx])
2766                                         break;
2767                         }
2768
2769                         BUG_ON(idx >= unique_indirect_reg_count);
2770
2771                         if (!unique_indirect_regs[idx])
2772                                 unique_indirect_regs[idx] = register_list_format[indirect_offset];
2773
2774                         indirect_offset++;
2775                 }
2776         }
2777 }
2778
2779 static int gfx_v9_1_init_rlc_save_restore_list(struct amdgpu_device *adev)
2780 {
2781         int unique_indirect_regs[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
2782         int unique_indirect_reg_count = 0;
2783
2784         int indirect_start_offsets[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
2785         int indirect_start_offsets_count = 0;
2786
2787         int list_size = 0;
2788         int i = 0, j = 0;
2789         u32 tmp = 0;
2790
2791         u32 *register_list_format =
2792                 kmemdup(adev->gfx.rlc.register_list_format,
2793                         adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
2794         if (!register_list_format)
2795                 return -ENOMEM;
2796
2797         /* setup unique_indirect_regs array and indirect_start_offsets array */
2798         unique_indirect_reg_count = ARRAY_SIZE(unique_indirect_regs);
2799         gfx_v9_1_parse_ind_reg_list(register_list_format,
2800                                     adev->gfx.rlc.reg_list_format_direct_reg_list_length,
2801                                     adev->gfx.rlc.reg_list_format_size_bytes >> 2,
2802                                     unique_indirect_regs,
2803                                     unique_indirect_reg_count,
2804                                     indirect_start_offsets,
2805                                     &indirect_start_offsets_count,
2806                                     ARRAY_SIZE(indirect_start_offsets));
2807
2808         /* enable auto inc in case it is disabled */
2809         tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL));
2810         tmp |= RLC_SRM_CNTL__AUTO_INCR_ADDR_MASK;
2811         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL), tmp);
2812
2813         /* write register_restore table to offset 0x0 using RLC_SRM_ARAM_ADDR/DATA */
2814         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_ADDR),
2815                 RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET);
2816         for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
2817                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_DATA),
2818                         adev->gfx.rlc.register_restore[i]);
2819
2820         /* load indirect register */
2821         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2822                 adev->gfx.rlc.reg_list_format_start);
2823
2824         /* direct register portion */
2825         for (i = 0; i < adev->gfx.rlc.reg_list_format_direct_reg_list_length; i++)
2826                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA),
2827                         register_list_format[i]);
2828
2829         /* indirect register portion */
2830         while (i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2)) {
2831                 if (register_list_format[i] == 0xFFFFFFFF) {
2832                         WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2833                         continue;
2834                 }
2835
2836                 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2837                 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2838
2839                 for (j = 0; j < unique_indirect_reg_count; j++) {
2840                         if (register_list_format[i] == unique_indirect_regs[j]) {
2841                                 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, j);
2842                                 break;
2843                         }
2844                 }
2845
2846                 BUG_ON(j >= unique_indirect_reg_count);
2847
2848                 i++;
2849         }
2850
2851         /* set save/restore list size */
2852         list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
2853         list_size = list_size >> 1;
2854         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2855                 adev->gfx.rlc.reg_restore_list_size);
2856         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA), list_size);
2857
2858         /* write the starting offsets to RLC scratch ram */
2859         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2860                 adev->gfx.rlc.starting_offsets_start);
2861         for (i = 0; i < ARRAY_SIZE(indirect_start_offsets); i++)
2862                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA),
2863                        indirect_start_offsets[i]);
2864
2865         /* load unique indirect regs*/
2866         for (i = 0; i < ARRAY_SIZE(unique_indirect_regs); i++) {
2867                 if (unique_indirect_regs[i] != 0) {
2868                         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_ADDR_0)
2869                                + GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[i],
2870                                unique_indirect_regs[i] & 0x3FFFF);
2871
2872                         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_DATA_0)
2873                                + GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[i],
2874                                unique_indirect_regs[i] >> 20);
2875                 }
2876         }
2877
2878         kfree(register_list_format);
2879         return 0;
2880 }
2881
2882 static void gfx_v9_0_enable_save_restore_machine(struct amdgpu_device *adev)
2883 {
2884         WREG32_FIELD15(GC, 0, RLC_SRM_CNTL, SRM_ENABLE, 1);
2885 }
2886
2887 static void pwr_10_0_gfxip_control_over_cgpg(struct amdgpu_device *adev,
2888                                              bool enable)
2889 {
2890         uint32_t data = 0;
2891         uint32_t default_data = 0;
2892
2893         default_data = data = RREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS));
2894         if (enable) {
2895                 /* enable GFXIP control over CGPG */
2896                 data |= PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK;
2897                 if(default_data != data)
2898                         WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2899
2900                 /* update status */
2901                 data &= ~PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK;
2902                 data |= (2 << PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT);
2903                 if(default_data != data)
2904                         WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2905         } else {
2906                 /* restore GFXIP control over GCPG */
2907                 data &= ~PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK;
2908                 if(default_data != data)
2909                         WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2910         }
2911 }
2912
2913 static void gfx_v9_0_init_gfx_power_gating(struct amdgpu_device *adev)
2914 {
2915         uint32_t data = 0;
2916
2917         if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
2918                               AMD_PG_SUPPORT_GFX_SMG |
2919                               AMD_PG_SUPPORT_GFX_DMG)) {
2920                 /* init IDLE_POLL_COUNT = 60 */
2921                 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL));
2922                 data &= ~CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT_MASK;
2923                 data |= (0x60 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
2924                 WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL), data);
2925
2926                 /* init RLC PG Delay */
2927                 data = 0;
2928                 data |= (0x10 << RLC_PG_DELAY__POWER_UP_DELAY__SHIFT);
2929                 data |= (0x10 << RLC_PG_DELAY__POWER_DOWN_DELAY__SHIFT);
2930                 data |= (0x10 << RLC_PG_DELAY__CMD_PROPAGATE_DELAY__SHIFT);
2931                 data |= (0x40 << RLC_PG_DELAY__MEM_SLEEP_DELAY__SHIFT);
2932                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY), data);
2933
2934                 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2));
2935                 data &= ~RLC_PG_DELAY_2__SERDES_CMD_DELAY_MASK;
2936                 data |= (0x4 << RLC_PG_DELAY_2__SERDES_CMD_DELAY__SHIFT);
2937                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2), data);
2938
2939                 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3));
2940                 data &= ~RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG_MASK;
2941                 data |= (0xff << RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG__SHIFT);
2942                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3), data);
2943
2944                 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL));
2945                 data &= ~RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD_MASK;
2946
2947                 /* program GRBM_REG_SAVE_GFX_IDLE_THRESHOLD to 0x55f0 */
2948                 data |= (0x55f0 << RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD__SHIFT);
2949                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL), data);
2950                 if (amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 3, 0))
2951                         pwr_10_0_gfxip_control_over_cgpg(adev, true);
2952         }
2953 }
2954
2955 static void gfx_v9_0_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
2956                                                 bool enable)
2957 {
2958         uint32_t data = 0;
2959         uint32_t default_data = 0;
2960
2961         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2962         data = REG_SET_FIELD(data, RLC_PG_CNTL,
2963                              SMU_CLK_SLOWDOWN_ON_PU_ENABLE,
2964                              enable ? 1 : 0);
2965         if (default_data != data)
2966                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2967 }
2968
2969 static void gfx_v9_0_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
2970                                                 bool enable)
2971 {
2972         uint32_t data = 0;
2973         uint32_t default_data = 0;
2974
2975         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2976         data = REG_SET_FIELD(data, RLC_PG_CNTL,
2977                              SMU_CLK_SLOWDOWN_ON_PD_ENABLE,
2978                              enable ? 1 : 0);
2979         if(default_data != data)
2980                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2981 }
2982
2983 static void gfx_v9_0_enable_cp_power_gating(struct amdgpu_device *adev,
2984                                         bool enable)
2985 {
2986         uint32_t data = 0;
2987         uint32_t default_data = 0;
2988
2989         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2990         data = REG_SET_FIELD(data, RLC_PG_CNTL,
2991                              CP_PG_DISABLE,
2992                              enable ? 0 : 1);
2993         if(default_data != data)
2994                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2995 }
2996
2997 static void gfx_v9_0_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
2998                                                 bool enable)
2999 {
3000         uint32_t data, default_data;
3001
3002         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
3003         data = REG_SET_FIELD(data, RLC_PG_CNTL,
3004                              GFX_POWER_GATING_ENABLE,
3005                              enable ? 1 : 0);
3006         if(default_data != data)
3007                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
3008 }
3009
3010 static void gfx_v9_0_enable_gfx_pipeline_powergating(struct amdgpu_device *adev,
3011                                                 bool enable)
3012 {
3013         uint32_t data, default_data;
3014
3015         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
3016         data = REG_SET_FIELD(data, RLC_PG_CNTL,
3017                              GFX_PIPELINE_PG_ENABLE,
3018                              enable ? 1 : 0);
3019         if(default_data != data)
3020                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
3021
3022         if (!enable)
3023                 /* read any GFX register to wake up GFX */
3024                 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmDB_RENDER_CONTROL));
3025 }
3026
3027 static void gfx_v9_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
3028                                                        bool enable)
3029 {
3030         uint32_t data, default_data;
3031
3032         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
3033         data = REG_SET_FIELD(data, RLC_PG_CNTL,
3034                              STATIC_PER_CU_PG_ENABLE,
3035                              enable ? 1 : 0);
3036         if(default_data != data)
3037                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
3038 }
3039
3040 static void gfx_v9_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
3041                                                 bool enable)
3042 {
3043         uint32_t data, default_data;
3044
3045         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
3046         data = REG_SET_FIELD(data, RLC_PG_CNTL,
3047                              DYN_PER_CU_PG_ENABLE,
3048                              enable ? 1 : 0);
3049         if(default_data != data)
3050                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
3051 }
3052
3053 static void gfx_v9_0_init_pg(struct amdgpu_device *adev)
3054 {
3055         gfx_v9_0_init_csb(adev);
3056
3057         /*
3058          * Rlc save restore list is workable since v2_1.
3059          * And it's needed by gfxoff feature.
3060          */
3061         if (adev->gfx.rlc.is_rlc_v2_1) {
3062                 if (amdgpu_ip_version(adev, GC_HWIP, 0) ==
3063                             IP_VERSION(9, 2, 1) ||
3064                     (adev->apu_flags & AMD_APU_IS_RAVEN2))
3065                         gfx_v9_1_init_rlc_save_restore_list(adev);
3066                 gfx_v9_0_enable_save_restore_machine(adev);
3067         }
3068
3069         if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
3070                               AMD_PG_SUPPORT_GFX_SMG |
3071                               AMD_PG_SUPPORT_GFX_DMG |
3072                               AMD_PG_SUPPORT_CP |
3073                               AMD_PG_SUPPORT_GDS |
3074                               AMD_PG_SUPPORT_RLC_SMU_HS)) {
3075                 WREG32_SOC15(GC, 0, mmRLC_JUMP_TABLE_RESTORE,
3076                              adev->gfx.rlc.cp_table_gpu_addr >> 8);
3077                 gfx_v9_0_init_gfx_power_gating(adev);
3078         }
3079 }
3080
3081 static void gfx_v9_0_rlc_stop(struct amdgpu_device *adev)
3082 {
3083         WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 0);
3084         gfx_v9_0_enable_gui_idle_interrupt(adev, false);
3085         gfx_v9_0_wait_for_rlc_serdes(adev);
3086 }
3087
3088 static void gfx_v9_0_rlc_reset(struct amdgpu_device *adev)
3089 {
3090         WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
3091         udelay(50);
3092         WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
3093         udelay(50);
3094 }
3095
3096 static void gfx_v9_0_rlc_start(struct amdgpu_device *adev)
3097 {
3098 #ifdef AMDGPU_RLC_DEBUG_RETRY
3099         u32 rlc_ucode_ver;
3100 #endif
3101
3102         WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 1);
3103         udelay(50);
3104
3105         /* carrizo do enable cp interrupt after cp inited */
3106         if (!(adev->flags & AMD_IS_APU)) {
3107                 gfx_v9_0_enable_gui_idle_interrupt(adev, true);
3108                 udelay(50);
3109         }
3110
3111 #ifdef AMDGPU_RLC_DEBUG_RETRY
3112         /* RLC_GPM_GENERAL_6 : RLC Ucode version */
3113         rlc_ucode_ver = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_6);
3114         if(rlc_ucode_ver == 0x108) {
3115                 DRM_INFO("Using rlc debug ucode. mmRLC_GPM_GENERAL_6 ==0x08%x / fw_ver == %i \n",
3116                                 rlc_ucode_ver, adev->gfx.rlc_fw_version);
3117                 /* RLC_GPM_TIMER_INT_3 : Timer interval in RefCLK cycles,
3118                  * default is 0x9C4 to create a 100us interval */
3119                 WREG32_SOC15(GC, 0, mmRLC_GPM_TIMER_INT_3, 0x9C4);
3120                 /* RLC_GPM_GENERAL_12 : Minimum gap between wptr and rptr
3121                  * to disable the page fault retry interrupts, default is
3122                  * 0x100 (256) */
3123                 WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_12, 0x100);
3124         }
3125 #endif
3126 }
3127
3128 static int gfx_v9_0_rlc_load_microcode(struct amdgpu_device *adev)
3129 {
3130         const struct rlc_firmware_header_v2_0 *hdr;
3131         const __le32 *fw_data;
3132         unsigned i, fw_size;
3133
3134         if (!adev->gfx.rlc_fw)
3135                 return -EINVAL;
3136
3137         hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
3138         amdgpu_ucode_print_rlc_hdr(&hdr->header);
3139
3140         fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
3141                            le32_to_cpu(hdr->header.ucode_array_offset_bytes));
3142         fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
3143
3144         WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR,
3145                         RLCG_UCODE_LOADING_START_ADDRESS);
3146         for (i = 0; i < fw_size; i++)
3147                 WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
3148         WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
3149
3150         return 0;
3151 }
3152
3153 static int gfx_v9_0_rlc_resume(struct amdgpu_device *adev)
3154 {
3155         int r;
3156
3157         if (amdgpu_sriov_vf(adev)) {
3158                 gfx_v9_0_init_csb(adev);
3159                 return 0;
3160         }
3161
3162         adev->gfx.rlc.funcs->stop(adev);
3163
3164         /* disable CG */
3165         WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, 0);
3166
3167         gfx_v9_0_init_pg(adev);
3168
3169         if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
3170                 /* legacy rlc firmware loading */
3171                 r = gfx_v9_0_rlc_load_microcode(adev);
3172                 if (r)
3173                         return r;
3174         }
3175
3176         switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
3177         case IP_VERSION(9, 2, 2):
3178         case IP_VERSION(9, 1, 0):
3179                 gfx_v9_0_init_lbpw(adev);
3180                 if (amdgpu_lbpw == 0)
3181                         gfx_v9_0_enable_lbpw(adev, false);
3182                 else
3183                         gfx_v9_0_enable_lbpw(adev, true);
3184                 break;
3185         case IP_VERSION(9, 4, 0):
3186                 gfx_v9_4_init_lbpw(adev);
3187                 if (amdgpu_lbpw > 0)
3188                         gfx_v9_0_enable_lbpw(adev, true);
3189                 else
3190                         gfx_v9_0_enable_lbpw(adev, false);
3191                 break;
3192         default:
3193                 break;
3194         }
3195
3196         gfx_v9_0_update_spm_vmid_internal(adev, 0xf);
3197
3198         adev->gfx.rlc.funcs->start(adev);
3199
3200         return 0;
3201 }
3202
3203 static void gfx_v9_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
3204 {
3205         u32 tmp = RREG32_SOC15(GC, 0, mmCP_ME_CNTL);
3206
3207         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_INVALIDATE_ICACHE, enable ? 0 : 1);
3208         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_INVALIDATE_ICACHE, enable ? 0 : 1);
3209         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_INVALIDATE_ICACHE, enable ? 0 : 1);
3210         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_PIPE0_RESET, enable ? 0 : 1);
3211         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_PIPE1_RESET, enable ? 0 : 1);
3212         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_PIPE0_RESET, enable ? 0 : 1);
3213         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_PIPE1_RESET, enable ? 0 : 1);
3214         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_PIPE0_RESET, enable ? 0 : 1);
3215         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_PIPE1_RESET, enable ? 0 : 1);
3216         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, enable ? 0 : 1);
3217         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, enable ? 0 : 1);
3218         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, enable ? 0 : 1);
3219         WREG32_SOC15_RLC(GC, 0, mmCP_ME_CNTL, tmp);
3220         udelay(50);
3221 }
3222
3223 static int gfx_v9_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
3224 {
3225         const struct gfx_firmware_header_v1_0 *pfp_hdr;
3226         const struct gfx_firmware_header_v1_0 *ce_hdr;
3227         const struct gfx_firmware_header_v1_0 *me_hdr;
3228         const __le32 *fw_data;
3229         unsigned i, fw_size;
3230
3231         if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
3232                 return -EINVAL;
3233
3234         pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
3235                 adev->gfx.pfp_fw->data;
3236         ce_hdr = (const struct gfx_firmware_header_v1_0 *)
3237                 adev->gfx.ce_fw->data;
3238         me_hdr = (const struct gfx_firmware_header_v1_0 *)
3239                 adev->gfx.me_fw->data;
3240
3241         amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
3242         amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
3243         amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
3244
3245         gfx_v9_0_cp_gfx_enable(adev, false);
3246
3247         /* PFP */
3248         fw_data = (const __le32 *)
3249                 (adev->gfx.pfp_fw->data +
3250                  le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
3251         fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
3252         WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, 0);
3253         for (i = 0; i < fw_size; i++)
3254                 WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
3255         WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
3256
3257         /* CE */
3258         fw_data = (const __le32 *)
3259                 (adev->gfx.ce_fw->data +
3260                  le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
3261         fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
3262         WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, 0);
3263         for (i = 0; i < fw_size; i++)
3264                 WREG32_SOC15(GC, 0, mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
3265         WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
3266
3267         /* ME */
3268         fw_data = (const __le32 *)
3269                 (adev->gfx.me_fw->data +
3270                  le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
3271         fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
3272         WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, 0);
3273         for (i = 0; i < fw_size; i++)
3274                 WREG32_SOC15(GC, 0, mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
3275         WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version);
3276
3277         return 0;
3278 }
3279
3280 static int gfx_v9_0_cp_gfx_start(struct amdgpu_device *adev)
3281 {
3282         struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
3283         const struct cs_section_def *sect = NULL;
3284         const struct cs_extent_def *ext = NULL;
3285         int r, i, tmp;
3286
3287         /* init the CP */
3288         WREG32_SOC15(GC, 0, mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
3289         WREG32_SOC15(GC, 0, mmCP_DEVICE_ID, 1);
3290
3291         gfx_v9_0_cp_gfx_enable(adev, true);
3292
3293         /* Now only limit the quirk on the APU gfx9 series and already
3294          * confirmed that the APU gfx10/gfx11 needn't such update.
3295          */
3296         if (adev->flags & AMD_IS_APU &&
3297                         adev->in_s3 && !pm_resume_via_firmware()) {
3298                 DRM_INFO("Will skip the CSB packet resubmit\n");
3299                 return 0;
3300         }
3301         r = amdgpu_ring_alloc(ring, gfx_v9_0_get_csb_size(adev) + 4 + 3);
3302         if (r) {
3303                 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
3304                 return r;
3305         }
3306
3307         amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3308         amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3309
3310         amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
3311         amdgpu_ring_write(ring, 0x80000000);
3312         amdgpu_ring_write(ring, 0x80000000);
3313
3314         for (sect = gfx9_cs_data; sect->section != NULL; ++sect) {
3315                 for (ext = sect->section; ext->extent != NULL; ++ext) {
3316                         if (sect->id == SECT_CONTEXT) {
3317                                 amdgpu_ring_write(ring,
3318                                        PACKET3(PACKET3_SET_CONTEXT_REG,
3319                                                ext->reg_count));
3320                                 amdgpu_ring_write(ring,
3321                                        ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
3322                                 for (i = 0; i < ext->reg_count; i++)
3323                                         amdgpu_ring_write(ring, ext->extent[i]);
3324                         }
3325                 }
3326         }
3327
3328         amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3329         amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
3330
3331         amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3332         amdgpu_ring_write(ring, 0);
3333
3334         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
3335         amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
3336         amdgpu_ring_write(ring, 0x8000);
3337         amdgpu_ring_write(ring, 0x8000);
3338
3339         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG,1));
3340         tmp = (PACKET3_SET_UCONFIG_REG_INDEX_TYPE |
3341                 (SOC15_REG_OFFSET(GC, 0, mmVGT_INDEX_TYPE) - PACKET3_SET_UCONFIG_REG_START));
3342         amdgpu_ring_write(ring, tmp);
3343         amdgpu_ring_write(ring, 0);
3344
3345         amdgpu_ring_commit(ring);
3346
3347         return 0;
3348 }
3349
3350 static int gfx_v9_0_cp_gfx_resume(struct amdgpu_device *adev)
3351 {
3352         struct amdgpu_ring *ring;
3353         u32 tmp;
3354         u32 rb_bufsz;
3355         u64 rb_addr, rptr_addr, wptr_gpu_addr;
3356
3357         /* Set the write pointer delay */
3358         WREG32_SOC15(GC, 0, mmCP_RB_WPTR_DELAY, 0);
3359
3360         /* set the RB to use vmid 0 */
3361         WREG32_SOC15(GC, 0, mmCP_RB_VMID, 0);
3362
3363         /* Set ring buffer size */
3364         ring = &adev->gfx.gfx_ring[0];
3365         rb_bufsz = order_base_2(ring->ring_size / 8);
3366         tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
3367         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
3368 #ifdef __BIG_ENDIAN
3369         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
3370 #endif
3371         WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp);
3372
3373         /* Initialize the ring buffer's write pointers */
3374         ring->wptr = 0;
3375         WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
3376         WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
3377
3378         /* set the wb address whether it's enabled or not */
3379         rptr_addr = ring->rptr_gpu_addr;
3380         WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
3381         WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & CP_RB_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK);
3382
3383         wptr_gpu_addr = ring->wptr_gpu_addr;
3384         WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr));
3385         WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr));
3386
3387         mdelay(1);
3388         WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp);
3389
3390         rb_addr = ring->gpu_addr >> 8;
3391         WREG32_SOC15(GC, 0, mmCP_RB0_BASE, rb_addr);
3392         WREG32_SOC15(GC, 0, mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
3393
3394         tmp = RREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL);
3395         if (ring->use_doorbell) {
3396                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3397                                     DOORBELL_OFFSET, ring->doorbell_index);
3398                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3399                                     DOORBELL_EN, 1);
3400         } else {
3401                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0);
3402         }
3403         WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL, tmp);
3404
3405         tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
3406                         DOORBELL_RANGE_LOWER, ring->doorbell_index);
3407         WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
3408
3409         WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_UPPER,
3410                        CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
3411
3412
3413         /* start the ring */
3414         gfx_v9_0_cp_gfx_start(adev);
3415
3416         return 0;
3417 }
3418
3419 static void gfx_v9_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
3420 {
3421         if (enable) {
3422                 WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL, 0);
3423         } else {
3424                 WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL,
3425                                  (CP_MEC_CNTL__MEC_INVALIDATE_ICACHE_MASK |
3426                                   CP_MEC_CNTL__MEC_ME1_PIPE0_RESET_MASK |
3427                                   CP_MEC_CNTL__MEC_ME1_PIPE1_RESET_MASK |
3428                                   CP_MEC_CNTL__MEC_ME1_PIPE2_RESET_MASK |
3429                                   CP_MEC_CNTL__MEC_ME1_PIPE3_RESET_MASK |
3430                                   CP_MEC_CNTL__MEC_ME2_PIPE0_RESET_MASK |
3431                                   CP_MEC_CNTL__MEC_ME2_PIPE1_RESET_MASK |
3432                                   CP_MEC_CNTL__MEC_ME1_HALT_MASK |
3433                                   CP_MEC_CNTL__MEC_ME2_HALT_MASK));
3434                 adev->gfx.kiq[0].ring.sched.ready = false;
3435         }
3436         udelay(50);
3437 }
3438
3439 static int gfx_v9_0_cp_compute_load_microcode(struct amdgpu_device *adev)
3440 {
3441         const struct gfx_firmware_header_v1_0 *mec_hdr;
3442         const __le32 *fw_data;
3443         unsigned i;
3444         u32 tmp;
3445
3446         if (!adev->gfx.mec_fw)
3447                 return -EINVAL;
3448
3449         gfx_v9_0_cp_compute_enable(adev, false);
3450
3451         mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
3452         amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
3453
3454         fw_data = (const __le32 *)
3455                 (adev->gfx.mec_fw->data +
3456                  le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
3457         tmp = 0;
3458         tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, VMID, 0);
3459         tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0);
3460         WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_CNTL, tmp);
3461
3462         WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_LO,
3463                 adev->gfx.mec.mec_fw_gpu_addr & 0xFFFFF000);
3464         WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_HI,
3465                 upper_32_bits(adev->gfx.mec.mec_fw_gpu_addr));
3466
3467         /* MEC1 */
3468         WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR,
3469                          mec_hdr->jt_offset);
3470         for (i = 0; i < mec_hdr->jt_size; i++)
3471                 WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_DATA,
3472                         le32_to_cpup(fw_data + mec_hdr->jt_offset + i));
3473
3474         WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR,
3475                         adev->gfx.mec_fw_version);
3476         /* Todo : Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */
3477
3478         return 0;
3479 }
3480
3481 /* KIQ functions */
3482 static void gfx_v9_0_kiq_setting(struct amdgpu_ring *ring)
3483 {
3484         uint32_t tmp;
3485         struct amdgpu_device *adev = ring->adev;
3486
3487         /* tell RLC which is KIQ queue */
3488         tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS);
3489         tmp &= 0xffffff00;
3490         tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
3491         WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
3492         tmp |= 0x80;
3493         WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
3494 }
3495
3496 static void gfx_v9_0_mqd_set_priority(struct amdgpu_ring *ring, struct v9_mqd *mqd)
3497 {
3498         struct amdgpu_device *adev = ring->adev;
3499
3500         if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
3501                 if (amdgpu_gfx_is_high_priority_compute_queue(adev, ring)) {
3502                         mqd->cp_hqd_pipe_priority = AMDGPU_GFX_PIPE_PRIO_HIGH;
3503                         mqd->cp_hqd_queue_priority =
3504                                 AMDGPU_GFX_QUEUE_PRIORITY_MAXIMUM;
3505                 }
3506         }
3507 }
3508
3509 static int gfx_v9_0_mqd_init(struct amdgpu_ring *ring)
3510 {
3511         struct amdgpu_device *adev = ring->adev;
3512         struct v9_mqd *mqd = ring->mqd_ptr;
3513         uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
3514         uint32_t tmp;
3515
3516         mqd->header = 0xC0310800;
3517         mqd->compute_pipelinestat_enable = 0x00000001;
3518         mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
3519         mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
3520         mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
3521         mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
3522         mqd->compute_static_thread_mgmt_se4 = 0xffffffff;
3523         mqd->compute_static_thread_mgmt_se5 = 0xffffffff;
3524         mqd->compute_static_thread_mgmt_se6 = 0xffffffff;
3525         mqd->compute_static_thread_mgmt_se7 = 0xffffffff;
3526         mqd->compute_misc_reserved = 0x00000003;
3527
3528         mqd->dynamic_cu_mask_addr_lo =
3529                 lower_32_bits(ring->mqd_gpu_addr
3530                               + offsetof(struct v9_mqd_allocation, dynamic_cu_mask));
3531         mqd->dynamic_cu_mask_addr_hi =
3532                 upper_32_bits(ring->mqd_gpu_addr
3533                               + offsetof(struct v9_mqd_allocation, dynamic_cu_mask));
3534
3535         eop_base_addr = ring->eop_gpu_addr >> 8;
3536         mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
3537         mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
3538
3539         /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
3540         tmp = RREG32_SOC15(GC, 0, mmCP_HQD_EOP_CONTROL);
3541         tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
3542                         (order_base_2(GFX9_MEC_HPD_SIZE / 4) - 1));
3543
3544         mqd->cp_hqd_eop_control = tmp;
3545
3546         /* enable doorbell? */
3547         tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
3548
3549         if (ring->use_doorbell) {
3550                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3551                                     DOORBELL_OFFSET, ring->doorbell_index);
3552                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3553                                     DOORBELL_EN, 1);
3554                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3555                                     DOORBELL_SOURCE, 0);
3556                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3557                                     DOORBELL_HIT, 0);
3558         } else {
3559                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3560                                          DOORBELL_EN, 0);
3561         }
3562
3563         mqd->cp_hqd_pq_doorbell_control = tmp;
3564
3565         /* disable the queue if it's active */
3566         ring->wptr = 0;
3567         mqd->cp_hqd_dequeue_request = 0;
3568         mqd->cp_hqd_pq_rptr = 0;
3569         mqd->cp_hqd_pq_wptr_lo = 0;
3570         mqd->cp_hqd_pq_wptr_hi = 0;
3571
3572         /* set the pointer to the MQD */
3573         mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
3574         mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
3575
3576         /* set MQD vmid to 0 */
3577         tmp = RREG32_SOC15(GC, 0, mmCP_MQD_CONTROL);
3578         tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
3579         mqd->cp_mqd_control = tmp;
3580
3581         /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3582         hqd_gpu_addr = ring->gpu_addr >> 8;
3583         mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
3584         mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
3585
3586         /* set up the HQD, this is similar to CP_RB0_CNTL */
3587         tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_CONTROL);
3588         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
3589                             (order_base_2(ring->ring_size / 4) - 1));
3590         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
3591                         (order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1));
3592 #ifdef __BIG_ENDIAN
3593         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
3594 #endif
3595         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
3596         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
3597         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
3598         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
3599         mqd->cp_hqd_pq_control = tmp;
3600
3601         /* set the wb address whether it's enabled or not */
3602         wb_gpu_addr = ring->rptr_gpu_addr;
3603         mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
3604         mqd->cp_hqd_pq_rptr_report_addr_hi =
3605                 upper_32_bits(wb_gpu_addr) & 0xffff;
3606
3607         /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
3608         wb_gpu_addr = ring->wptr_gpu_addr;
3609         mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
3610         mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
3611
3612         /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3613         ring->wptr = 0;
3614         mqd->cp_hqd_pq_rptr = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR);
3615
3616         /* set the vmid for the queue */
3617         mqd->cp_hqd_vmid = 0;
3618
3619         tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PERSISTENT_STATE);
3620         tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
3621         mqd->cp_hqd_persistent_state = tmp;
3622
3623         /* set MIN_IB_AVAIL_SIZE */
3624         tmp = RREG32_SOC15(GC, 0, mmCP_HQD_IB_CONTROL);
3625         tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
3626         mqd->cp_hqd_ib_control = tmp;
3627
3628         /* set static priority for a queue/ring */
3629         gfx_v9_0_mqd_set_priority(ring, mqd);
3630         mqd->cp_hqd_quantum = RREG32_SOC15(GC, 0, mmCP_HQD_QUANTUM);
3631
3632         /* map_queues packet doesn't need activate the queue,
3633          * so only kiq need set this field.
3634          */
3635         if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)
3636                 mqd->cp_hqd_active = 1;
3637
3638         return 0;
3639 }
3640
3641 static int gfx_v9_0_kiq_init_register(struct amdgpu_ring *ring)
3642 {
3643         struct amdgpu_device *adev = ring->adev;
3644         struct v9_mqd *mqd = ring->mqd_ptr;
3645         int j;
3646
3647         /* disable wptr polling */
3648         WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
3649
3650         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR,
3651                mqd->cp_hqd_eop_base_addr_lo);
3652         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI,
3653                mqd->cp_hqd_eop_base_addr_hi);
3654
3655         /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
3656         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_CONTROL,
3657                mqd->cp_hqd_eop_control);
3658
3659         /* enable doorbell? */
3660         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
3661                mqd->cp_hqd_pq_doorbell_control);
3662
3663         /* disable the queue if it's active */
3664         if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) {
3665                 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
3666                 for (j = 0; j < adev->usec_timeout; j++) {
3667                         if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
3668                                 break;
3669                         udelay(1);
3670                 }
3671                 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST,
3672                        mqd->cp_hqd_dequeue_request);
3673                 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR,
3674                        mqd->cp_hqd_pq_rptr);
3675                 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO,
3676                        mqd->cp_hqd_pq_wptr_lo);
3677                 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI,
3678                        mqd->cp_hqd_pq_wptr_hi);
3679         }
3680
3681         /* set the pointer to the MQD */
3682         WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR,
3683                mqd->cp_mqd_base_addr_lo);
3684         WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR_HI,
3685                mqd->cp_mqd_base_addr_hi);
3686
3687         /* set MQD vmid to 0 */
3688         WREG32_SOC15_RLC(GC, 0, mmCP_MQD_CONTROL,
3689                mqd->cp_mqd_control);
3690
3691         /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3692         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE,
3693                mqd->cp_hqd_pq_base_lo);
3694         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE_HI,
3695                mqd->cp_hqd_pq_base_hi);
3696
3697         /* set up the HQD, this is similar to CP_RB0_CNTL */
3698         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_CONTROL,
3699                mqd->cp_hqd_pq_control);
3700
3701         /* set the wb address whether it's enabled or not */
3702         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR,
3703                                 mqd->cp_hqd_pq_rptr_report_addr_lo);
3704         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
3705                                 mqd->cp_hqd_pq_rptr_report_addr_hi);
3706
3707         /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
3708         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR,
3709                mqd->cp_hqd_pq_wptr_poll_addr_lo);
3710         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
3711                mqd->cp_hqd_pq_wptr_poll_addr_hi);
3712
3713         /* enable the doorbell if requested */
3714         if (ring->use_doorbell) {
3715                 WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_LOWER,
3716                                         (adev->doorbell_index.kiq * 2) << 2);
3717                 /* If GC has entered CGPG, ringing doorbell > first page
3718                  * doesn't wakeup GC. Enlarge CP_MEC_DOORBELL_RANGE_UPPER to
3719                  * workaround this issue. And this change has to align with firmware
3720                  * update.
3721                  */
3722                 if (check_if_enlarge_doorbell_range(adev))
3723                         WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER,
3724                                         (adev->doorbell.size - 4));
3725                 else
3726                         WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER,
3727                                         (adev->doorbell_index.userqueue_end * 2) << 2);
3728         }
3729
3730         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
3731                mqd->cp_hqd_pq_doorbell_control);
3732
3733         /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3734         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO,
3735                mqd->cp_hqd_pq_wptr_lo);
3736         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI,
3737                mqd->cp_hqd_pq_wptr_hi);
3738
3739         /* set the vmid for the queue */
3740         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_VMID, mqd->cp_hqd_vmid);
3741
3742         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE,
3743                mqd->cp_hqd_persistent_state);
3744
3745         /* activate the queue */
3746         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE,
3747                mqd->cp_hqd_active);
3748
3749         if (ring->use_doorbell)
3750                 WREG32_FIELD15(GC, 0, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
3751
3752         return 0;
3753 }
3754
3755 static int gfx_v9_0_kiq_fini_register(struct amdgpu_ring *ring)
3756 {
3757         struct amdgpu_device *adev = ring->adev;
3758         int j;
3759
3760         /* disable the queue if it's active */
3761         if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) {
3762
3763                 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
3764
3765                 for (j = 0; j < adev->usec_timeout; j++) {
3766                         if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
3767                                 break;
3768                         udelay(1);
3769                 }
3770
3771                 if (j == AMDGPU_MAX_USEC_TIMEOUT) {
3772                         DRM_DEBUG("KIQ dequeue request failed.\n");
3773
3774                         /* Manual disable if dequeue request times out */
3775                         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE, 0);
3776                 }
3777
3778                 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST,
3779                       0);
3780         }
3781
3782         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IQ_TIMER, 0);
3783         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IB_CONTROL, 0);
3784         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE, 0);
3785         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0x40000000);
3786         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0);
3787         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR, 0);
3788         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI, 0);
3789         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO, 0);
3790
3791         return 0;
3792 }
3793
3794 static int gfx_v9_0_kiq_init_queue(struct amdgpu_ring *ring)
3795 {
3796         struct amdgpu_device *adev = ring->adev;
3797         struct v9_mqd *mqd = ring->mqd_ptr;
3798         struct v9_mqd *tmp_mqd;
3799
3800         gfx_v9_0_kiq_setting(ring);
3801
3802         /* GPU could be in bad state during probe, driver trigger the reset
3803          * after load the SMU, in this case , the mqd is not be initialized.
3804          * driver need to re-init the mqd.
3805          * check mqd->cp_hqd_pq_control since this value should not be 0
3806          */
3807         tmp_mqd = (struct v9_mqd *)adev->gfx.kiq[0].mqd_backup;
3808         if (amdgpu_in_reset(adev) && tmp_mqd->cp_hqd_pq_control){
3809                 /* for GPU_RESET case , reset MQD to a clean status */
3810                 if (adev->gfx.kiq[0].mqd_backup)
3811                         memcpy(mqd, adev->gfx.kiq[0].mqd_backup, sizeof(struct v9_mqd_allocation));
3812
3813                 /* reset ring buffer */
3814                 ring->wptr = 0;
3815                 amdgpu_ring_clear_ring(ring);
3816
3817                 mutex_lock(&adev->srbm_mutex);
3818                 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0, 0);
3819                 gfx_v9_0_kiq_init_register(ring);
3820                 soc15_grbm_select(adev, 0, 0, 0, 0, 0);
3821                 mutex_unlock(&adev->srbm_mutex);
3822         } else {
3823                 memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation));
3824                 ((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
3825                 ((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
3826                 if (amdgpu_sriov_vf(adev) && adev->in_suspend)
3827                         amdgpu_ring_clear_ring(ring);
3828                 mutex_lock(&adev->srbm_mutex);
3829                 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0, 0);
3830                 gfx_v9_0_mqd_init(ring);
3831                 gfx_v9_0_kiq_init_register(ring);
3832                 soc15_grbm_select(adev, 0, 0, 0, 0, 0);
3833                 mutex_unlock(&adev->srbm_mutex);
3834
3835                 if (adev->gfx.kiq[0].mqd_backup)
3836                         memcpy(adev->gfx.kiq[0].mqd_backup, mqd, sizeof(struct v9_mqd_allocation));
3837         }
3838
3839         return 0;
3840 }
3841
3842 static int gfx_v9_0_kcq_init_queue(struct amdgpu_ring *ring, bool restore)
3843 {
3844         struct amdgpu_device *adev = ring->adev;
3845         struct v9_mqd *mqd = ring->mqd_ptr;
3846         int mqd_idx = ring - &adev->gfx.compute_ring[0];
3847         struct v9_mqd *tmp_mqd;
3848
3849         /* Same as above kiq init, driver need to re-init the mqd if mqd->cp_hqd_pq_control
3850          * is not be initialized before
3851          */
3852         tmp_mqd = (struct v9_mqd *)adev->gfx.mec.mqd_backup[mqd_idx];
3853
3854         if (!restore && (!tmp_mqd->cp_hqd_pq_control ||
3855             (!amdgpu_in_reset(adev) && !adev->in_suspend))) {
3856                 memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation));
3857                 ((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
3858                 ((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
3859                 mutex_lock(&adev->srbm_mutex);
3860                 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0, 0);
3861                 gfx_v9_0_mqd_init(ring);
3862                 soc15_grbm_select(adev, 0, 0, 0, 0, 0);
3863                 mutex_unlock(&adev->srbm_mutex);
3864
3865                 if (adev->gfx.mec.mqd_backup[mqd_idx])
3866                         memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation));
3867         } else {
3868                 /* restore MQD to a clean status */
3869                 if (adev->gfx.mec.mqd_backup[mqd_idx])
3870                         memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation));
3871                 /* reset ring buffer */
3872                 ring->wptr = 0;
3873                 atomic64_set((atomic64_t *)ring->wptr_cpu_addr, 0);
3874                 amdgpu_ring_clear_ring(ring);
3875         }
3876
3877         return 0;
3878 }
3879
3880 static int gfx_v9_0_kiq_resume(struct amdgpu_device *adev)
3881 {
3882         struct amdgpu_ring *ring;
3883         int r;
3884
3885         ring = &adev->gfx.kiq[0].ring;
3886
3887         r = amdgpu_bo_reserve(ring->mqd_obj, false);
3888         if (unlikely(r != 0))
3889                 return r;
3890
3891         r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
3892         if (unlikely(r != 0)) {
3893                 amdgpu_bo_unreserve(ring->mqd_obj);
3894                 return r;
3895         }
3896
3897         gfx_v9_0_kiq_init_queue(ring);
3898         amdgpu_bo_kunmap(ring->mqd_obj);
3899         ring->mqd_ptr = NULL;
3900         amdgpu_bo_unreserve(ring->mqd_obj);
3901         return 0;
3902 }
3903
3904 static int gfx_v9_0_kcq_resume(struct amdgpu_device *adev)
3905 {
3906         struct amdgpu_ring *ring = NULL;
3907         int r = 0, i;
3908
3909         gfx_v9_0_cp_compute_enable(adev, true);
3910
3911         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3912                 ring = &adev->gfx.compute_ring[i];
3913
3914                 r = amdgpu_bo_reserve(ring->mqd_obj, false);
3915                 if (unlikely(r != 0))
3916                         goto done;
3917                 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
3918                 if (!r) {
3919                         r = gfx_v9_0_kcq_init_queue(ring, false);
3920                         amdgpu_bo_kunmap(ring->mqd_obj);
3921                         ring->mqd_ptr = NULL;
3922                 }
3923                 amdgpu_bo_unreserve(ring->mqd_obj);
3924                 if (r)
3925                         goto done;
3926         }
3927
3928         r = amdgpu_gfx_enable_kcq(adev, 0);
3929 done:
3930         return r;
3931 }
3932
3933 static int gfx_v9_0_cp_resume(struct amdgpu_device *adev)
3934 {
3935         int r, i;
3936         struct amdgpu_ring *ring;
3937
3938         if (!(adev->flags & AMD_IS_APU))
3939                 gfx_v9_0_enable_gui_idle_interrupt(adev, false);
3940
3941         if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
3942                 if (adev->gfx.num_gfx_rings) {
3943                         /* legacy firmware loading */
3944                         r = gfx_v9_0_cp_gfx_load_microcode(adev);
3945                         if (r)
3946                                 return r;
3947                 }
3948
3949                 r = gfx_v9_0_cp_compute_load_microcode(adev);
3950                 if (r)
3951                         return r;
3952         }
3953
3954         if (adev->gfx.num_gfx_rings)
3955                 gfx_v9_0_cp_gfx_enable(adev, false);
3956         gfx_v9_0_cp_compute_enable(adev, false);
3957
3958         r = gfx_v9_0_kiq_resume(adev);
3959         if (r)
3960                 return r;
3961
3962         if (adev->gfx.num_gfx_rings) {
3963                 r = gfx_v9_0_cp_gfx_resume(adev);
3964                 if (r)
3965                         return r;
3966         }
3967
3968         r = gfx_v9_0_kcq_resume(adev);
3969         if (r)
3970                 return r;
3971
3972         if (adev->gfx.num_gfx_rings) {
3973                 ring = &adev->gfx.gfx_ring[0];
3974                 r = amdgpu_ring_test_helper(ring);
3975                 if (r)
3976                         return r;
3977         }
3978
3979         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3980                 ring = &adev->gfx.compute_ring[i];
3981                 amdgpu_ring_test_helper(ring);
3982         }
3983
3984         gfx_v9_0_enable_gui_idle_interrupt(adev, true);
3985
3986         return 0;
3987 }
3988
3989 static void gfx_v9_0_init_tcp_config(struct amdgpu_device *adev)
3990 {
3991         u32 tmp;
3992
3993         if (amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 4, 1) &&
3994             amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 4, 2))
3995                 return;
3996
3997         tmp = RREG32_SOC15(GC, 0, mmTCP_ADDR_CONFIG);
3998         tmp = REG_SET_FIELD(tmp, TCP_ADDR_CONFIG, ENABLE64KHASH,
3999                                 adev->df.hash_status.hash_64k);
4000         tmp = REG_SET_FIELD(tmp, TCP_ADDR_CONFIG, ENABLE2MHASH,
4001                                 adev->df.hash_status.hash_2m);
4002         tmp = REG_SET_FIELD(tmp, TCP_ADDR_CONFIG, ENABLE1GHASH,
4003                                 adev->df.hash_status.hash_1g);
4004         WREG32_SOC15(GC, 0, mmTCP_ADDR_CONFIG, tmp);
4005 }
4006
4007 static void gfx_v9_0_cp_enable(struct amdgpu_device *adev, bool enable)
4008 {
4009         if (adev->gfx.num_gfx_rings)
4010                 gfx_v9_0_cp_gfx_enable(adev, enable);
4011         gfx_v9_0_cp_compute_enable(adev, enable);
4012 }
4013
4014 static int gfx_v9_0_hw_init(struct amdgpu_ip_block *ip_block)
4015 {
4016         int r;
4017         struct amdgpu_device *adev = ip_block->adev;
4018
4019         amdgpu_gfx_cleaner_shader_init(adev, adev->gfx.cleaner_shader_size,
4020                                        adev->gfx.cleaner_shader_ptr);
4021
4022         if (!amdgpu_sriov_vf(adev))
4023                 gfx_v9_0_init_golden_registers(adev);
4024
4025         gfx_v9_0_constants_init(adev);
4026
4027         gfx_v9_0_init_tcp_config(adev);
4028
4029         r = adev->gfx.rlc.funcs->resume(adev);
4030         if (r)
4031                 return r;
4032
4033         r = gfx_v9_0_cp_resume(adev);
4034         if (r)
4035                 return r;
4036
4037         if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 2))
4038                 gfx_v9_4_2_set_power_brake_sequence(adev);
4039
4040         return r;
4041 }
4042
4043 static int gfx_v9_0_hw_fini(struct amdgpu_ip_block *ip_block)
4044 {
4045         struct amdgpu_device *adev = ip_block->adev;
4046
4047         if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
4048                 amdgpu_irq_put(adev, &adev->gfx.cp_ecc_error_irq, 0);
4049         amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
4050         amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
4051         amdgpu_irq_put(adev, &adev->gfx.bad_op_irq, 0);
4052
4053         /* DF freeze and kcq disable will fail */
4054         if (!amdgpu_ras_intr_triggered())
4055                 /* disable KCQ to avoid CPC touch memory not valid anymore */
4056                 amdgpu_gfx_disable_kcq(adev, 0);
4057
4058         if (amdgpu_sriov_vf(adev)) {
4059                 gfx_v9_0_cp_gfx_enable(adev, false);
4060                 /* must disable polling for SRIOV when hw finished, otherwise
4061                  * CPC engine may still keep fetching WB address which is already
4062                  * invalid after sw finished and trigger DMAR reading error in
4063                  * hypervisor side.
4064                  */
4065                 WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
4066                 return 0;
4067         }
4068
4069         /* Use deinitialize sequence from CAIL when unbinding device from driver,
4070          * otherwise KIQ is hanging when binding back
4071          */
4072         if (!amdgpu_in_reset(adev) && !adev->in_suspend) {
4073                 mutex_lock(&adev->srbm_mutex);
4074                 soc15_grbm_select(adev, adev->gfx.kiq[0].ring.me,
4075                                 adev->gfx.kiq[0].ring.pipe,
4076                                 adev->gfx.kiq[0].ring.queue, 0, 0);
4077                 gfx_v9_0_kiq_fini_register(&adev->gfx.kiq[0].ring);
4078                 soc15_grbm_select(adev, 0, 0, 0, 0, 0);
4079                 mutex_unlock(&adev->srbm_mutex);
4080         }
4081
4082         gfx_v9_0_cp_enable(adev, false);
4083
4084         /* Skip stopping RLC with A+A reset or when RLC controls GFX clock */
4085         if ((adev->gmc.xgmi.connected_to_cpu && amdgpu_in_reset(adev)) ||
4086             (amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(9, 4, 2))) {
4087                 dev_dbg(adev->dev, "Skipping RLC halt\n");
4088                 return 0;
4089         }
4090
4091         adev->gfx.rlc.funcs->stop(adev);
4092         return 0;
4093 }
4094
4095 static int gfx_v9_0_suspend(struct amdgpu_ip_block *ip_block)
4096 {
4097         return gfx_v9_0_hw_fini(ip_block);
4098 }
4099
4100 static int gfx_v9_0_resume(struct amdgpu_ip_block *ip_block)
4101 {
4102         return gfx_v9_0_hw_init(ip_block);
4103 }
4104
4105 static bool gfx_v9_0_is_idle(void *handle)
4106 {
4107         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4108
4109         if (REG_GET_FIELD(RREG32_SOC15(GC, 0, mmGRBM_STATUS),
4110                                 GRBM_STATUS, GUI_ACTIVE))
4111                 return false;
4112         else
4113                 return true;
4114 }
4115
4116 static int gfx_v9_0_wait_for_idle(struct amdgpu_ip_block *ip_block)
4117 {
4118         unsigned i;
4119         struct amdgpu_device *adev = ip_block->adev;
4120
4121         for (i = 0; i < adev->usec_timeout; i++) {
4122                 if (gfx_v9_0_is_idle(adev))
4123                         return 0;
4124                 udelay(1);
4125         }
4126         return -ETIMEDOUT;
4127 }
4128
4129 static int gfx_v9_0_soft_reset(struct amdgpu_ip_block *ip_block)
4130 {
4131         u32 grbm_soft_reset = 0;
4132         u32 tmp;
4133         struct amdgpu_device *adev = ip_block->adev;
4134
4135         /* GRBM_STATUS */
4136         tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS);
4137         if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
4138                    GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
4139                    GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
4140                    GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
4141                    GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
4142                    GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK)) {
4143                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4144                                                 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
4145                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4146                                                 GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
4147         }
4148
4149         if (tmp & (GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
4150                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4151                                                 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
4152         }
4153
4154         /* GRBM_STATUS2 */
4155         tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS2);
4156         if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
4157                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4158                                                 GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
4159
4160
4161         if (grbm_soft_reset) {
4162                 /* stop the rlc */
4163                 adev->gfx.rlc.funcs->stop(adev);
4164
4165                 if (adev->gfx.num_gfx_rings)
4166                         /* Disable GFX parsing/prefetching */
4167                         gfx_v9_0_cp_gfx_enable(adev, false);
4168
4169                 /* Disable MEC parsing/prefetching */
4170                 gfx_v9_0_cp_compute_enable(adev, false);
4171
4172                 if (grbm_soft_reset) {
4173                         tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
4174                         tmp |= grbm_soft_reset;
4175                         dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
4176                         WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
4177                         tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
4178
4179                         udelay(50);
4180
4181                         tmp &= ~grbm_soft_reset;
4182                         WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
4183                         tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
4184                 }
4185
4186                 /* Wait a little for things to settle down */
4187                 udelay(50);
4188         }
4189         return 0;
4190 }
4191
4192 static uint64_t gfx_v9_0_kiq_read_clock(struct amdgpu_device *adev)
4193 {
4194         signed long r, cnt = 0;
4195         unsigned long flags;
4196         uint32_t seq, reg_val_offs = 0;
4197         uint64_t value = 0;
4198         struct amdgpu_kiq *kiq = &adev->gfx.kiq[0];
4199         struct amdgpu_ring *ring = &kiq->ring;
4200
4201         BUG_ON(!ring->funcs->emit_rreg);
4202
4203         spin_lock_irqsave(&kiq->ring_lock, flags);
4204         if (amdgpu_device_wb_get(adev, &reg_val_offs)) {
4205                 pr_err("critical bug! too many kiq readers\n");
4206                 goto failed_unlock;
4207         }
4208         amdgpu_ring_alloc(ring, 32);
4209         amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
4210         amdgpu_ring_write(ring, 9 |     /* src: register*/
4211                                 (5 << 8) |      /* dst: memory */
4212                                 (1 << 16) |     /* count sel */
4213                                 (1 << 20));     /* write confirm */
4214         amdgpu_ring_write(ring, 0);
4215         amdgpu_ring_write(ring, 0);
4216         amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
4217                                 reg_val_offs * 4));
4218         amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
4219                                 reg_val_offs * 4));
4220         r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT);
4221         if (r)
4222                 goto failed_undo;
4223
4224         amdgpu_ring_commit(ring);
4225         spin_unlock_irqrestore(&kiq->ring_lock, flags);
4226
4227         r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
4228
4229         /* don't wait anymore for gpu reset case because this way may
4230          * block gpu_recover() routine forever, e.g. this virt_kiq_rreg
4231          * is triggered in TTM and ttm_bo_lock_delayed_workqueue() will
4232          * never return if we keep waiting in virt_kiq_rreg, which cause
4233          * gpu_recover() hang there.
4234          *
4235          * also don't wait anymore for IRQ context
4236          * */
4237         if (r < 1 && (amdgpu_in_reset(adev)))
4238                 goto failed_kiq_read;
4239
4240         might_sleep();
4241         while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) {
4242                 msleep(MAX_KIQ_REG_BAILOUT_INTERVAL);
4243                 r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
4244         }
4245
4246         if (cnt > MAX_KIQ_REG_TRY)
4247                 goto failed_kiq_read;
4248
4249         mb();
4250         value = (uint64_t)adev->wb.wb[reg_val_offs] |
4251                 (uint64_t)adev->wb.wb[reg_val_offs + 1 ] << 32ULL;
4252         amdgpu_device_wb_free(adev, reg_val_offs);
4253         return value;
4254
4255 failed_undo:
4256         amdgpu_ring_undo(ring);
4257 failed_unlock:
4258         spin_unlock_irqrestore(&kiq->ring_lock, flags);
4259 failed_kiq_read:
4260         if (reg_val_offs)
4261                 amdgpu_device_wb_free(adev, reg_val_offs);
4262         pr_err("failed to read gpu clock\n");
4263         return ~0;
4264 }
4265
4266 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev)
4267 {
4268         uint64_t clock, clock_lo, clock_hi, hi_check;
4269
4270         switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
4271         case IP_VERSION(9, 3, 0):
4272                 preempt_disable();
4273                 clock_hi = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_UPPER_Renoir);
4274                 clock_lo = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_LOWER_Renoir);
4275                 hi_check = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_UPPER_Renoir);
4276                 /* The SMUIO TSC clock frequency is 100MHz, which sets 32-bit carry over
4277                  * roughly every 42 seconds.
4278                  */
4279                 if (hi_check != clock_hi) {
4280                         clock_lo = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_LOWER_Renoir);
4281                         clock_hi = hi_check;
4282                 }
4283                 preempt_enable();
4284                 clock = clock_lo | (clock_hi << 32ULL);
4285                 break;
4286         default:
4287                 amdgpu_gfx_off_ctrl(adev, false);
4288                 mutex_lock(&adev->gfx.gpu_clock_mutex);
4289                 if (amdgpu_ip_version(adev, GC_HWIP, 0) ==
4290                             IP_VERSION(9, 0, 1) &&
4291                     amdgpu_sriov_runtime(adev)) {
4292                         clock = gfx_v9_0_kiq_read_clock(adev);
4293                 } else {
4294                         WREG32_SOC15(GC, 0, mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
4295                         clock = (uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_LSB) |
4296                                 ((uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
4297                 }
4298                 mutex_unlock(&adev->gfx.gpu_clock_mutex);
4299                 amdgpu_gfx_off_ctrl(adev, true);
4300                 break;
4301         }
4302         return clock;
4303 }
4304
4305 static void gfx_v9_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
4306                                           uint32_t vmid,
4307                                           uint32_t gds_base, uint32_t gds_size,
4308                                           uint32_t gws_base, uint32_t gws_size,
4309                                           uint32_t oa_base, uint32_t oa_size)
4310 {
4311         struct amdgpu_device *adev = ring->adev;
4312
4313         /* GDS Base */
4314         gfx_v9_0_write_data_to_reg(ring, 0, false,
4315                                    SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_BASE) + 2 * vmid,
4316                                    gds_base);
4317
4318         /* GDS Size */
4319         gfx_v9_0_write_data_to_reg(ring, 0, false,
4320                                    SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE) + 2 * vmid,
4321                                    gds_size);
4322
4323         /* GWS */
4324         gfx_v9_0_write_data_to_reg(ring, 0, false,
4325                                    SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID0) + vmid,
4326                                    gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
4327
4328         /* OA */
4329         gfx_v9_0_write_data_to_reg(ring, 0, false,
4330                                    SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID0) + vmid,
4331                                    (1 << (oa_size + oa_base)) - (1 << oa_base));
4332 }
4333
4334 static const u32 vgpr_init_compute_shader[] =
4335 {
4336         0xb07c0000, 0xbe8000ff,
4337         0x000000f8, 0xbf110800,
4338         0x7e000280, 0x7e020280,
4339         0x7e040280, 0x7e060280,
4340         0x7e080280, 0x7e0a0280,
4341         0x7e0c0280, 0x7e0e0280,
4342         0x80808800, 0xbe803200,
4343         0xbf84fff5, 0xbf9c0000,
4344         0xd28c0001, 0x0001007f,
4345         0xd28d0001, 0x0002027e,
4346         0x10020288, 0xb8810904,
4347         0xb7814000, 0xd1196a01,
4348         0x00000301, 0xbe800087,
4349         0xbefc00c1, 0xd89c4000,
4350         0x00020201, 0xd89cc080,
4351         0x00040401, 0x320202ff,
4352         0x00000800, 0x80808100,
4353         0xbf84fff8, 0x7e020280,
4354         0xbf810000, 0x00000000,
4355 };
4356
4357 static const u32 sgpr_init_compute_shader[] =
4358 {
4359         0xb07c0000, 0xbe8000ff,
4360         0x0000005f, 0xbee50080,
4361         0xbe812c65, 0xbe822c65,
4362         0xbe832c65, 0xbe842c65,
4363         0xbe852c65, 0xb77c0005,
4364         0x80808500, 0xbf84fff8,
4365         0xbe800080, 0xbf810000,
4366 };
4367
4368 static const u32 vgpr_init_compute_shader_arcturus[] = {
4369         0xd3d94000, 0x18000080, 0xd3d94001, 0x18000080, 0xd3d94002, 0x18000080,
4370         0xd3d94003, 0x18000080, 0xd3d94004, 0x18000080, 0xd3d94005, 0x18000080,
4371         0xd3d94006, 0x18000080, 0xd3d94007, 0x18000080, 0xd3d94008, 0x18000080,
4372         0xd3d94009, 0x18000080, 0xd3d9400a, 0x18000080, 0xd3d9400b, 0x18000080,
4373         0xd3d9400c, 0x18000080, 0xd3d9400d, 0x18000080, 0xd3d9400e, 0x18000080,
4374         0xd3d9400f, 0x18000080, 0xd3d94010, 0x18000080, 0xd3d94011, 0x18000080,
4375         0xd3d94012, 0x18000080, 0xd3d94013, 0x18000080, 0xd3d94014, 0x18000080,
4376         0xd3d94015, 0x18000080, 0xd3d94016, 0x18000080, 0xd3d94017, 0x18000080,
4377         0xd3d94018, 0x18000080, 0xd3d94019, 0x18000080, 0xd3d9401a, 0x18000080,
4378         0xd3d9401b, 0x18000080, 0xd3d9401c, 0x18000080, 0xd3d9401d, 0x18000080,
4379         0xd3d9401e, 0x18000080, 0xd3d9401f, 0x18000080, 0xd3d94020, 0x18000080,
4380         0xd3d94021, 0x18000080, 0xd3d94022, 0x18000080, 0xd3d94023, 0x18000080,
4381         0xd3d94024, 0x18000080, 0xd3d94025, 0x18000080, 0xd3d94026, 0x18000080,
4382         0xd3d94027, 0x18000080, 0xd3d94028, 0x18000080, 0xd3d94029, 0x18000080,
4383         0xd3d9402a, 0x18000080, 0xd3d9402b, 0x18000080, 0xd3d9402c, 0x18000080,
4384         0xd3d9402d, 0x18000080, 0xd3d9402e, 0x18000080, 0xd3d9402f, 0x18000080,
4385         0xd3d94030, 0x18000080, 0xd3d94031, 0x18000080, 0xd3d94032, 0x18000080,
4386         0xd3d94033, 0x18000080, 0xd3d94034, 0x18000080, 0xd3d94035, 0x18000080,
4387         0xd3d94036, 0x18000080, 0xd3d94037, 0x18000080, 0xd3d94038, 0x18000080,
4388         0xd3d94039, 0x18000080, 0xd3d9403a, 0x18000080, 0xd3d9403b, 0x18000080,
4389         0xd3d9403c, 0x18000080, 0xd3d9403d, 0x18000080, 0xd3d9403e, 0x18000080,
4390         0xd3d9403f, 0x18000080, 0xd3d94040, 0x18000080, 0xd3d94041, 0x18000080,
4391         0xd3d94042, 0x18000080, 0xd3d94043, 0x18000080, 0xd3d94044, 0x18000080,
4392         0xd3d94045, 0x18000080, 0xd3d94046, 0x18000080, 0xd3d94047, 0x18000080,
4393         0xd3d94048, 0x18000080, 0xd3d94049, 0x18000080, 0xd3d9404a, 0x18000080,
4394         0xd3d9404b, 0x18000080, 0xd3d9404c, 0x18000080, 0xd3d9404d, 0x18000080,
4395         0xd3d9404e, 0x18000080, 0xd3d9404f, 0x18000080, 0xd3d94050, 0x18000080,
4396         0xd3d94051, 0x18000080, 0xd3d94052, 0x18000080, 0xd3d94053, 0x18000080,
4397         0xd3d94054, 0x18000080, 0xd3d94055, 0x18000080, 0xd3d94056, 0x18000080,
4398         0xd3d94057, 0x18000080, 0xd3d94058, 0x18000080, 0xd3d94059, 0x18000080,
4399         0xd3d9405a, 0x18000080, 0xd3d9405b, 0x18000080, 0xd3d9405c, 0x18000080,
4400         0xd3d9405d, 0x18000080, 0xd3d9405e, 0x18000080, 0xd3d9405f, 0x18000080,
4401         0xd3d94060, 0x18000080, 0xd3d94061, 0x18000080, 0xd3d94062, 0x18000080,
4402         0xd3d94063, 0x18000080, 0xd3d94064, 0x18000080, 0xd3d94065, 0x18000080,
4403         0xd3d94066, 0x18000080, 0xd3d94067, 0x18000080, 0xd3d94068, 0x18000080,
4404         0xd3d94069, 0x18000080, 0xd3d9406a, 0x18000080, 0xd3d9406b, 0x18000080,
4405         0xd3d9406c, 0x18000080, 0xd3d9406d, 0x18000080, 0xd3d9406e, 0x18000080,
4406         0xd3d9406f, 0x18000080, 0xd3d94070, 0x18000080, 0xd3d94071, 0x18000080,
4407         0xd3d94072, 0x18000080, 0xd3d94073, 0x18000080, 0xd3d94074, 0x18000080,
4408         0xd3d94075, 0x18000080, 0xd3d94076, 0x18000080, 0xd3d94077, 0x18000080,
4409         0xd3d94078, 0x18000080, 0xd3d94079, 0x18000080, 0xd3d9407a, 0x18000080,
4410         0xd3d9407b, 0x18000080, 0xd3d9407c, 0x18000080, 0xd3d9407d, 0x18000080,
4411         0xd3d9407e, 0x18000080, 0xd3d9407f, 0x18000080, 0xd3d94080, 0x18000080,
4412         0xd3d94081, 0x18000080, 0xd3d94082, 0x18000080, 0xd3d94083, 0x18000080,
4413         0xd3d94084, 0x18000080, 0xd3d94085, 0x18000080, 0xd3d94086, 0x18000080,
4414         0xd3d94087, 0x18000080, 0xd3d94088, 0x18000080, 0xd3d94089, 0x18000080,
4415         0xd3d9408a, 0x18000080, 0xd3d9408b, 0x18000080, 0xd3d9408c, 0x18000080,
4416         0xd3d9408d, 0x18000080, 0xd3d9408e, 0x18000080, 0xd3d9408f, 0x18000080,
4417         0xd3d94090, 0x18000080, 0xd3d94091, 0x18000080, 0xd3d94092, 0x18000080,
4418         0xd3d94093, 0x18000080, 0xd3d94094, 0x18000080, 0xd3d94095, 0x18000080,
4419         0xd3d94096, 0x18000080, 0xd3d94097, 0x18000080, 0xd3d94098, 0x18000080,
4420         0xd3d94099, 0x18000080, 0xd3d9409a, 0x18000080, 0xd3d9409b, 0x18000080,
4421         0xd3d9409c, 0x18000080, 0xd3d9409d, 0x18000080, 0xd3d9409e, 0x18000080,
4422         0xd3d9409f, 0x18000080, 0xd3d940a0, 0x18000080, 0xd3d940a1, 0x18000080,
4423         0xd3d940a2, 0x18000080, 0xd3d940a3, 0x18000080, 0xd3d940a4, 0x18000080,
4424         0xd3d940a5, 0x18000080, 0xd3d940a6, 0x18000080, 0xd3d940a7, 0x18000080,
4425         0xd3d940a8, 0x18000080, 0xd3d940a9, 0x18000080, 0xd3d940aa, 0x18000080,
4426         0xd3d940ab, 0x18000080, 0xd3d940ac, 0x18000080, 0xd3d940ad, 0x18000080,
4427         0xd3d940ae, 0x18000080, 0xd3d940af, 0x18000080, 0xd3d940b0, 0x18000080,
4428         0xd3d940b1, 0x18000080, 0xd3d940b2, 0x18000080, 0xd3d940b3, 0x18000080,
4429         0xd3d940b4, 0x18000080, 0xd3d940b5, 0x18000080, 0xd3d940b6, 0x18000080,
4430         0xd3d940b7, 0x18000080, 0xd3d940b8, 0x18000080, 0xd3d940b9, 0x18000080,
4431         0xd3d940ba, 0x18000080, 0xd3d940bb, 0x18000080, 0xd3d940bc, 0x18000080,
4432         0xd3d940bd, 0x18000080, 0xd3d940be, 0x18000080, 0xd3d940bf, 0x18000080,
4433         0xd3d940c0, 0x18000080, 0xd3d940c1, 0x18000080, 0xd3d940c2, 0x18000080,
4434         0xd3d940c3, 0x18000080, 0xd3d940c4, 0x18000080, 0xd3d940c5, 0x18000080,
4435         0xd3d940c6, 0x18000080, 0xd3d940c7, 0x18000080, 0xd3d940c8, 0x18000080,
4436         0xd3d940c9, 0x18000080, 0xd3d940ca, 0x18000080, 0xd3d940cb, 0x18000080,
4437         0xd3d940cc, 0x18000080, 0xd3d940cd, 0x18000080, 0xd3d940ce, 0x18000080,
4438         0xd3d940cf, 0x18000080, 0xd3d940d0, 0x18000080, 0xd3d940d1, 0x18000080,
4439         0xd3d940d2, 0x18000080, 0xd3d940d3, 0x18000080, 0xd3d940d4, 0x18000080,
4440         0xd3d940d5, 0x18000080, 0xd3d940d6, 0x18000080, 0xd3d940d7, 0x18000080,
4441         0xd3d940d8, 0x18000080, 0xd3d940d9, 0x18000080, 0xd3d940da, 0x18000080,
4442         0xd3d940db, 0x18000080, 0xd3d940dc, 0x18000080, 0xd3d940dd, 0x18000080,
4443         0xd3d940de, 0x18000080, 0xd3d940df, 0x18000080, 0xd3d940e0, 0x18000080,
4444         0xd3d940e1, 0x18000080, 0xd3d940e2, 0x18000080, 0xd3d940e3, 0x18000080,
4445         0xd3d940e4, 0x18000080, 0xd3d940e5, 0x18000080, 0xd3d940e6, 0x18000080,
4446         0xd3d940e7, 0x18000080, 0xd3d940e8, 0x18000080, 0xd3d940e9, 0x18000080,
4447         0xd3d940ea, 0x18000080, 0xd3d940eb, 0x18000080, 0xd3d940ec, 0x18000080,
4448         0xd3d940ed, 0x18000080, 0xd3d940ee, 0x18000080, 0xd3d940ef, 0x18000080,
4449         0xd3d940f0, 0x18000080, 0xd3d940f1, 0x18000080, 0xd3d940f2, 0x18000080,
4450         0xd3d940f3, 0x18000080, 0xd3d940f4, 0x18000080, 0xd3d940f5, 0x18000080,
4451         0xd3d940f6, 0x18000080, 0xd3d940f7, 0x18000080, 0xd3d940f8, 0x18000080,
4452         0xd3d940f9, 0x18000080, 0xd3d940fa, 0x18000080, 0xd3d940fb, 0x18000080,
4453         0xd3d940fc, 0x18000080, 0xd3d940fd, 0x18000080, 0xd3d940fe, 0x18000080,
4454         0xd3d940ff, 0x18000080, 0xb07c0000, 0xbe8a00ff, 0x000000f8, 0xbf11080a,
4455         0x7e000280, 0x7e020280, 0x7e040280, 0x7e060280, 0x7e080280, 0x7e0a0280,
4456         0x7e0c0280, 0x7e0e0280, 0x808a880a, 0xbe80320a, 0xbf84fff5, 0xbf9c0000,
4457         0xd28c0001, 0x0001007f, 0xd28d0001, 0x0002027e, 0x10020288, 0xb88b0904,
4458         0xb78b4000, 0xd1196a01, 0x00001701, 0xbe8a0087, 0xbefc00c1, 0xd89c4000,
4459         0x00020201, 0xd89cc080, 0x00040401, 0x320202ff, 0x00000800, 0x808a810a,
4460         0xbf84fff8, 0xbf810000,
4461 };
4462
4463 /* When below register arrays changed, please update gpr_reg_size,
4464   and sec_ded_counter_reg_size in function gfx_v9_0_do_edc_gpr_workarounds,
4465   to cover all gfx9 ASICs */
4466 static const struct soc15_reg_entry vgpr_init_regs[] = {
4467    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
4468    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 },
4469    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 4 },
4470    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
4471    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x3f },
4472    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x400000 },  /* 64KB LDS */
4473    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff },
4474    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff },
4475    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff },
4476    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff },
4477    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0xffffffff },
4478    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0xffffffff },
4479    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0xffffffff },
4480    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0xffffffff },
4481 };
4482
4483 static const struct soc15_reg_entry vgpr_init_regs_arcturus[] = {
4484    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
4485    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 },
4486    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 4 },
4487    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
4488    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0xbf },
4489    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x400000 },  /* 64KB LDS */
4490    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff },
4491    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff },
4492    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff },
4493    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff },
4494    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0xffffffff },
4495    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0xffffffff },
4496    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0xffffffff },
4497    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0xffffffff },
4498 };
4499
4500 static const struct soc15_reg_entry sgpr1_init_regs[] = {
4501    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
4502    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 },
4503    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 8 },
4504    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
4505    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x240 }, /* (80 GPRS) */
4506    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x0 },
4507    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0x000000ff },
4508    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0x000000ff },
4509    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0x000000ff },
4510    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0x000000ff },
4511    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0x000000ff },
4512    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0x000000ff },
4513    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0x000000ff },
4514    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0x000000ff },
4515 };
4516
4517 static const struct soc15_reg_entry sgpr2_init_regs[] = {
4518    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
4519    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 },
4520    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 8 },
4521    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
4522    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x240 }, /* (80 GPRS) */
4523    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x0 },
4524    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0x0000ff00 },
4525    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0x0000ff00 },
4526    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0x0000ff00 },
4527    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0x0000ff00 },
4528    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0x0000ff00 },
4529    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0x0000ff00 },
4530    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0x0000ff00 },
4531    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0x0000ff00 },
4532 };
4533
4534 static const struct soc15_reg_entry gfx_v9_0_edc_counter_regs[] = {
4535    { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT), 0, 1, 1},
4536    { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT), 0, 1, 1},
4537    { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT), 0, 1, 1},
4538    { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT), 0, 1, 1},
4539    { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT), 0, 1, 1},
4540    { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT), 0, 1, 1},
4541    { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT), 0, 1, 1},
4542    { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT), 0, 1, 1},
4543    { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT), 0, 1, 1},
4544    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT), 0, 1, 1},
4545    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_GRBM_CNT), 0, 1, 1},
4546    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_DED), 0, 1, 1},
4547    { SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT), 0, 4, 1},
4548    { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 0, 4, 6},
4549    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_DED_CNT), 0, 4, 16},
4550    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_INFO), 0, 4, 16},
4551    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_SEC_CNT), 0, 4, 16},
4552    { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 1, 16},
4553    { SOC15_REG_ENTRY(GC, 0, mmTCP_ATC_EDC_GATCL1_CNT), 0, 4, 16},
4554    { SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT), 0, 4, 16},
4555    { SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 0, 4, 16},
4556    { SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 0, 4, 16},
4557    { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 0, 4, 6},
4558    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 0, 4, 16},
4559    { SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 0, 4, 16},
4560    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), 0, 1, 1},
4561    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 0, 1, 1},
4562    { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 1, 32},
4563    { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 1, 32},
4564    { SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT), 0, 1, 72},
4565    { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0, 1, 16},
4566    { SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT), 0, 1, 2},
4567    { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 0, 4, 6},
4568 };
4569
4570 static int gfx_v9_0_do_edc_gds_workarounds(struct amdgpu_device *adev)
4571 {
4572         struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
4573         int i, r;
4574
4575         /* only support when RAS is enabled */
4576         if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
4577                 return 0;
4578
4579         r = amdgpu_ring_alloc(ring, 7);
4580         if (r) {
4581                 DRM_ERROR("amdgpu: GDS workarounds failed to lock ring %s (%d).\n",
4582                         ring->name, r);
4583                 return r;
4584         }
4585
4586         WREG32_SOC15(GC, 0, mmGDS_VMID0_BASE, 0x00000000);
4587         WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, adev->gds.gds_size);
4588
4589         amdgpu_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
4590         amdgpu_ring_write(ring, (PACKET3_DMA_DATA_CP_SYNC |
4591                                 PACKET3_DMA_DATA_DST_SEL(1) |
4592                                 PACKET3_DMA_DATA_SRC_SEL(2) |
4593                                 PACKET3_DMA_DATA_ENGINE(0)));
4594         amdgpu_ring_write(ring, 0);
4595         amdgpu_ring_write(ring, 0);
4596         amdgpu_ring_write(ring, 0);
4597         amdgpu_ring_write(ring, 0);
4598         amdgpu_ring_write(ring, PACKET3_DMA_DATA_CMD_RAW_WAIT |
4599                                 adev->gds.gds_size);
4600
4601         amdgpu_ring_commit(ring);
4602
4603         for (i = 0; i < adev->usec_timeout; i++) {
4604                 if (ring->wptr == gfx_v9_0_ring_get_rptr_compute(ring))
4605                         break;
4606                 udelay(1);
4607         }
4608
4609         if (i >= adev->usec_timeout)
4610                 r = -ETIMEDOUT;
4611
4612         WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, 0x00000000);
4613
4614         return r;
4615 }
4616
4617 static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
4618 {
4619         struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
4620         struct amdgpu_ib ib;
4621         struct dma_fence *f = NULL;
4622         int r, i;
4623         unsigned total_size, vgpr_offset, sgpr_offset;
4624         u64 gpu_addr;
4625
4626         int compute_dim_x = adev->gfx.config.max_shader_engines *
4627                                                 adev->gfx.config.max_cu_per_sh *
4628                                                 adev->gfx.config.max_sh_per_se;
4629         int sgpr_work_group_size = 5;
4630         int gpr_reg_size = adev->gfx.config.max_shader_engines + 6;
4631         int vgpr_init_shader_size;
4632         const u32 *vgpr_init_shader_ptr;
4633         const struct soc15_reg_entry *vgpr_init_regs_ptr;
4634
4635         /* only support when RAS is enabled */
4636         if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
4637                 return 0;
4638
4639         /* bail if the compute ring is not ready */
4640         if (!ring->sched.ready)
4641                 return 0;
4642
4643         if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 1)) {
4644                 vgpr_init_shader_ptr = vgpr_init_compute_shader_arcturus;
4645                 vgpr_init_shader_size = sizeof(vgpr_init_compute_shader_arcturus);
4646                 vgpr_init_regs_ptr = vgpr_init_regs_arcturus;
4647         } else {
4648                 vgpr_init_shader_ptr = vgpr_init_compute_shader;
4649                 vgpr_init_shader_size = sizeof(vgpr_init_compute_shader);
4650                 vgpr_init_regs_ptr = vgpr_init_regs;
4651         }
4652
4653         total_size =
4654                 (gpr_reg_size * 3 + 4 + 5 + 2) * 4; /* VGPRS */
4655         total_size +=
4656                 (gpr_reg_size * 3 + 4 + 5 + 2) * 4; /* SGPRS1 */
4657         total_size +=
4658                 (gpr_reg_size * 3 + 4 + 5 + 2) * 4; /* SGPRS2 */
4659         total_size = ALIGN(total_size, 256);
4660         vgpr_offset = total_size;
4661         total_size += ALIGN(vgpr_init_shader_size, 256);
4662         sgpr_offset = total_size;
4663         total_size += sizeof(sgpr_init_compute_shader);
4664
4665         /* allocate an indirect buffer to put the commands in */
4666         memset(&ib, 0, sizeof(ib));
4667         r = amdgpu_ib_get(adev, NULL, total_size,
4668                                         AMDGPU_IB_POOL_DIRECT, &ib);
4669         if (r) {
4670                 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
4671                 return r;
4672         }
4673
4674         /* load the compute shaders */
4675         for (i = 0; i < vgpr_init_shader_size/sizeof(u32); i++)
4676                 ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_shader_ptr[i];
4677
4678         for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
4679                 ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
4680
4681         /* init the ib length to 0 */
4682         ib.length_dw = 0;
4683
4684         /* VGPR */
4685         /* write the register state for the compute dispatch */
4686         for (i = 0; i < gpr_reg_size; i++) {
4687                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
4688                 ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(vgpr_init_regs_ptr[i])
4689                                                                 - PACKET3_SET_SH_REG_START;
4690                 ib.ptr[ib.length_dw++] = vgpr_init_regs_ptr[i].reg_value;
4691         }
4692         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
4693         gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
4694         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
4695         ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
4696                                                         - PACKET3_SET_SH_REG_START;
4697         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
4698         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
4699
4700         /* write dispatch packet */
4701         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
4702         ib.ptr[ib.length_dw++] = compute_dim_x * 2; /* x */
4703         ib.ptr[ib.length_dw++] = 1; /* y */
4704         ib.ptr[ib.length_dw++] = 1; /* z */
4705         ib.ptr[ib.length_dw++] =
4706                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
4707
4708         /* write CS partial flush packet */
4709         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
4710         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
4711
4712         /* SGPR1 */
4713         /* write the register state for the compute dispatch */
4714         for (i = 0; i < gpr_reg_size; i++) {
4715                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
4716                 ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(sgpr1_init_regs[i])
4717                                                                 - PACKET3_SET_SH_REG_START;
4718                 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i].reg_value;
4719         }
4720         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
4721         gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
4722         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
4723         ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
4724                                                         - PACKET3_SET_SH_REG_START;
4725         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
4726         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
4727
4728         /* write dispatch packet */
4729         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
4730         ib.ptr[ib.length_dw++] = compute_dim_x / 2 * sgpr_work_group_size; /* x */
4731         ib.ptr[ib.length_dw++] = 1; /* y */
4732         ib.ptr[ib.length_dw++] = 1; /* z */
4733         ib.ptr[ib.length_dw++] =
4734                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
4735
4736         /* write CS partial flush packet */
4737         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
4738         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
4739
4740         /* SGPR2 */
4741         /* write the register state for the compute dispatch */
4742         for (i = 0; i < gpr_reg_size; i++) {
4743                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
4744                 ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(sgpr2_init_regs[i])
4745                                                                 - PACKET3_SET_SH_REG_START;
4746                 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i].reg_value;
4747         }
4748         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
4749         gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
4750         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
4751         ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
4752                                                         - PACKET3_SET_SH_REG_START;
4753         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
4754         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
4755
4756         /* write dispatch packet */
4757         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
4758         ib.ptr[ib.length_dw++] = compute_dim_x / 2 * sgpr_work_group_size; /* x */
4759         ib.ptr[ib.length_dw++] = 1; /* y */
4760         ib.ptr[ib.length_dw++] = 1; /* z */
4761         ib.ptr[ib.length_dw++] =
4762                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
4763
4764         /* write CS partial flush packet */
4765         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
4766         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
4767
4768         /* shedule the ib on the ring */
4769         r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
4770         if (r) {
4771                 DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
4772                 goto fail;
4773         }
4774
4775         /* wait for the GPU to finish processing the IB */
4776         r = dma_fence_wait(f, false);
4777         if (r) {
4778                 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
4779                 goto fail;
4780         }
4781
4782 fail:
4783         amdgpu_ib_free(adev, &ib, NULL);
4784         dma_fence_put(f);
4785
4786         return r;
4787 }
4788
4789 static int gfx_v9_0_early_init(struct amdgpu_ip_block *ip_block)
4790 {
4791         struct amdgpu_device *adev = ip_block->adev;
4792
4793         adev->gfx.funcs = &gfx_v9_0_gfx_funcs;
4794
4795         if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 1) ||
4796             amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 2))
4797                 adev->gfx.num_gfx_rings = 0;
4798         else
4799                 adev->gfx.num_gfx_rings = GFX9_NUM_GFX_RINGS;
4800         adev->gfx.xcc_mask = 1;
4801         adev->gfx.num_compute_rings = min(amdgpu_gfx_get_num_kcq(adev),
4802                                           AMDGPU_MAX_COMPUTE_RINGS);
4803         gfx_v9_0_set_kiq_pm4_funcs(adev);
4804         gfx_v9_0_set_ring_funcs(adev);
4805         gfx_v9_0_set_irq_funcs(adev);
4806         gfx_v9_0_set_gds_init(adev);
4807         gfx_v9_0_set_rlc_funcs(adev);
4808
4809         /* init rlcg reg access ctrl */
4810         gfx_v9_0_init_rlcg_reg_access_ctrl(adev);
4811
4812         return gfx_v9_0_init_microcode(adev);
4813 }
4814
4815 static int gfx_v9_0_ecc_late_init(struct amdgpu_ip_block *ip_block)
4816 {
4817         struct amdgpu_device *adev = ip_block->adev;
4818         int r;
4819
4820         /*
4821          * Temp workaround to fix the issue that CP firmware fails to
4822          * update read pointer when CPDMA is writing clearing operation
4823          * to GDS in suspend/resume sequence on several cards. So just
4824          * limit this operation in cold boot sequence.
4825          */
4826         if ((!adev->in_suspend) &&
4827             (adev->gds.gds_size)) {
4828                 r = gfx_v9_0_do_edc_gds_workarounds(adev);
4829                 if (r)
4830                         return r;
4831         }
4832
4833         /* requires IBs so do in late init after IB pool is initialized */
4834         if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 2))
4835                 r = gfx_v9_4_2_do_edc_gpr_workarounds(adev);
4836         else
4837                 r = gfx_v9_0_do_edc_gpr_workarounds(adev);
4838
4839         if (r)
4840                 return r;
4841
4842         if (adev->gfx.ras &&
4843             adev->gfx.ras->enable_watchdog_timer)
4844                 adev->gfx.ras->enable_watchdog_timer(adev);
4845
4846         return 0;
4847 }
4848
4849 static int gfx_v9_0_late_init(struct amdgpu_ip_block *ip_block)
4850 {
4851         struct amdgpu_device *adev = ip_block->adev;
4852         int r;
4853
4854         r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
4855         if (r)
4856                 return r;
4857
4858         r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
4859         if (r)
4860                 return r;
4861
4862         r = amdgpu_irq_get(adev, &adev->gfx.bad_op_irq, 0);
4863         if (r)
4864                 return r;
4865
4866         r = gfx_v9_0_ecc_late_init(ip_block);
4867         if (r)
4868                 return r;
4869
4870         if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 2))
4871                 gfx_v9_4_2_debug_trap_config_init(adev,
4872                         adev->vm_manager.first_kfd_vmid, AMDGPU_NUM_VMID);
4873         else
4874                 gfx_v9_0_debug_trap_config_init(adev,
4875                         adev->vm_manager.first_kfd_vmid, AMDGPU_NUM_VMID);
4876
4877         return 0;
4878 }
4879
4880 static bool gfx_v9_0_is_rlc_enabled(struct amdgpu_device *adev)
4881 {
4882         uint32_t rlc_setting;
4883
4884         /* if RLC is not enabled, do nothing */
4885         rlc_setting = RREG32_SOC15(GC, 0, mmRLC_CNTL);
4886         if (!(rlc_setting & RLC_CNTL__RLC_ENABLE_F32_MASK))
4887                 return false;
4888
4889         return true;
4890 }
4891
4892 static void gfx_v9_0_set_safe_mode(struct amdgpu_device *adev, int xcc_id)
4893 {
4894         uint32_t data;
4895         unsigned i;
4896
4897         data = RLC_SAFE_MODE__CMD_MASK;
4898         data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
4899         WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data);
4900
4901         /* wait for RLC_SAFE_MODE */
4902         for (i = 0; i < adev->usec_timeout; i++) {
4903                 if (!REG_GET_FIELD(RREG32_SOC15(GC, 0, mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
4904                         break;
4905                 udelay(1);
4906         }
4907 }
4908
4909 static void gfx_v9_0_unset_safe_mode(struct amdgpu_device *adev, int xcc_id)
4910 {
4911         uint32_t data;
4912
4913         data = RLC_SAFE_MODE__CMD_MASK;
4914         WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data);
4915 }
4916
4917 static void gfx_v9_0_update_gfx_cg_power_gating(struct amdgpu_device *adev,
4918                                                 bool enable)
4919 {
4920         amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
4921
4922         if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
4923                 gfx_v9_0_enable_gfx_cg_power_gating(adev, true);
4924                 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
4925                         gfx_v9_0_enable_gfx_pipeline_powergating(adev, true);
4926         } else {
4927                 gfx_v9_0_enable_gfx_cg_power_gating(adev, false);
4928                 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
4929                         gfx_v9_0_enable_gfx_pipeline_powergating(adev, false);
4930         }
4931
4932         amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
4933 }
4934
4935 static void gfx_v9_0_update_gfx_mg_power_gating(struct amdgpu_device *adev,
4936                                                 bool enable)
4937 {
4938         /* TODO: double check if we need to perform under safe mode */
4939         /* gfx_v9_0_enter_rlc_safe_mode(adev); */
4940
4941         if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
4942                 gfx_v9_0_enable_gfx_static_mg_power_gating(adev, true);
4943         else
4944                 gfx_v9_0_enable_gfx_static_mg_power_gating(adev, false);
4945
4946         if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
4947                 gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, true);
4948         else
4949                 gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, false);
4950
4951         /* gfx_v9_0_exit_rlc_safe_mode(adev); */
4952 }
4953
4954 static void gfx_v9_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
4955                                                       bool enable)
4956 {
4957         uint32_t data, def;
4958
4959         amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
4960
4961         /* It is disabled by HW by default */
4962         if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
4963                 /* 1 - RLC_CGTT_MGCG_OVERRIDE */
4964                 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4965
4966                 if (amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 2, 1))
4967                         data &= ~RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK;
4968
4969                 data &= ~(RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
4970                           RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK |
4971                           RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK);
4972
4973                 /* only for Vega10 & Raven1 */
4974                 data |= RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK;
4975
4976                 if (def != data)
4977                         WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4978
4979                 /* MGLS is a global flag to control all MGLS in GFX */
4980                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
4981                         /* 2 - RLC memory Light sleep */
4982                         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
4983                                 def = data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
4984                                 data |= RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
4985                                 if (def != data)
4986                                         WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data);
4987                         }
4988                         /* 3 - CP memory Light sleep */
4989                         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
4990                                 def = data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
4991                                 data |= CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
4992                                 if (def != data)
4993                                         WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data);
4994                         }
4995                 }
4996         } else {
4997                 /* 1 - MGCG_OVERRIDE */
4998                 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4999
5000                 if (amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 2, 1))
5001                         data |= RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK;
5002
5003                 data |= (RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK |
5004                          RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
5005                          RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK |
5006                          RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK);
5007
5008                 if (def != data)
5009                         WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
5010
5011                 /* 2 - disable MGLS in RLC */
5012                 data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
5013                 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
5014                         data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
5015                         WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data);
5016                 }
5017
5018                 /* 3 - disable MGLS in CP */
5019                 data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
5020                 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
5021                         data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
5022                         WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data);
5023                 }
5024         }
5025
5026         amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
5027 }
5028
5029 static void gfx_v9_0_update_3d_clock_gating(struct amdgpu_device *adev,
5030                                            bool enable)
5031 {
5032         uint32_t data, def;
5033
5034         if (!adev->gfx.num_gfx_rings)
5035                 return;
5036
5037         amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
5038
5039         /* Enable 3D CGCG/CGLS */
5040         if (enable) {
5041                 /* write cmd to clear cgcg/cgls ov */
5042                 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
5043                 /* unset CGCG override */
5044                 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_GFX3D_CG_OVERRIDE_MASK;
5045                 /* update CGCG and CGLS override bits */
5046                 if (def != data)
5047                         WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
5048
5049                 /* enable 3Dcgcg FSM(0x0000363f) */
5050                 def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
5051
5052                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG)
5053                         data = (0x36 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
5054                                 RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK;
5055                 else
5056                         data = 0x0 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT;
5057
5058                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS)
5059                         data |= (0x000F << RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
5060                                 RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK;
5061                 if (def != data)
5062                         WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data);
5063
5064                 /* set IDLE_POLL_COUNT(0x00900100) */
5065                 def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL);
5066                 data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
5067                         (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
5068                 if (def != data)
5069                         WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data);
5070         } else {
5071                 /* Disable CGCG/CGLS */
5072                 def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
5073                 /* disable cgcg, cgls should be disabled */
5074                 data &= ~(RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK |
5075                           RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK);
5076                 /* disable cgcg and cgls in FSM */
5077                 if (def != data)
5078                         WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data);
5079         }
5080
5081         amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
5082 }
5083
5084 static void gfx_v9_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
5085                                                       bool enable)
5086 {
5087         uint32_t def, data;
5088
5089         amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
5090
5091         if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
5092                 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
5093                 /* unset CGCG override */
5094                 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGCG_OVERRIDE_MASK;
5095                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
5096                         data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
5097                 else
5098                         data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
5099                 /* update CGCG and CGLS override bits */
5100                 if (def != data)
5101                         WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
5102
5103                 /* enable cgcg FSM(0x0000363F) */
5104                 def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
5105
5106                 if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 1))
5107                         data = (0x2000 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
5108                                 RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
5109                 else
5110                         data = (0x36 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
5111                                 RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
5112                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
5113                         data |= (0x000F << RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
5114                                 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5115                 if (def != data)
5116                         WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data);
5117
5118                 /* set IDLE_POLL_COUNT(0x00900100) */
5119                 def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL);
5120                 data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
5121                         (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
5122                 if (def != data)
5123                         WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data);
5124         } else {
5125                 def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
5126                 /* reset CGCG/CGLS bits */
5127                 data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
5128                 /* disable cgcg and cgls in FSM */
5129                 if (def != data)
5130                         WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data);
5131         }
5132
5133         amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
5134 }
5135
5136 static int gfx_v9_0_update_gfx_clock_gating(struct amdgpu_device *adev,
5137                                             bool enable)
5138 {
5139         if (enable) {
5140                 /* CGCG/CGLS should be enabled after MGCG/MGLS
5141                  * ===  MGCG + MGLS ===
5142                  */
5143                 gfx_v9_0_update_medium_grain_clock_gating(adev, enable);
5144                 /* ===  CGCG /CGLS for GFX 3D Only === */
5145                 gfx_v9_0_update_3d_clock_gating(adev, enable);
5146                 /* ===  CGCG + CGLS === */
5147                 gfx_v9_0_update_coarse_grain_clock_gating(adev, enable);
5148         } else {
5149                 /* CGCG/CGLS should be disabled before MGCG/MGLS
5150                  * ===  CGCG + CGLS ===
5151                  */
5152                 gfx_v9_0_update_coarse_grain_clock_gating(adev, enable);
5153                 /* ===  CGCG /CGLS for GFX 3D Only === */
5154                 gfx_v9_0_update_3d_clock_gating(adev, enable);
5155                 /* ===  MGCG + MGLS === */
5156                 gfx_v9_0_update_medium_grain_clock_gating(adev, enable);
5157         }
5158         return 0;
5159 }
5160
5161 static void gfx_v9_0_update_spm_vmid_internal(struct amdgpu_device *adev,
5162                                               unsigned int vmid)
5163 {
5164         u32 reg, data;
5165
5166         reg = SOC15_REG_OFFSET(GC, 0, mmRLC_SPM_MC_CNTL);
5167         if (amdgpu_sriov_is_pp_one_vf(adev))
5168                 data = RREG32_NO_KIQ(reg);
5169         else
5170                 data = RREG32_SOC15(GC, 0, mmRLC_SPM_MC_CNTL);
5171
5172         data &= ~RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK;
5173         data |= (vmid & RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK) << RLC_SPM_MC_CNTL__RLC_SPM_VMID__SHIFT;
5174
5175         if (amdgpu_sriov_is_pp_one_vf(adev))
5176                 WREG32_SOC15_NO_KIQ(GC, 0, mmRLC_SPM_MC_CNTL, data);
5177         else
5178                 WREG32_SOC15(GC, 0, mmRLC_SPM_MC_CNTL, data);
5179 }
5180
5181 static void gfx_v9_0_update_spm_vmid(struct amdgpu_device *adev, struct amdgpu_ring *ring, unsigned int vmid)
5182 {
5183         amdgpu_gfx_off_ctrl(adev, false);
5184
5185         gfx_v9_0_update_spm_vmid_internal(adev, vmid);
5186
5187         amdgpu_gfx_off_ctrl(adev, true);
5188 }
5189
5190 static bool gfx_v9_0_check_rlcg_range(struct amdgpu_device *adev,
5191                                         uint32_t offset,
5192                                         struct soc15_reg_rlcg *entries, int arr_size)
5193 {
5194         int i;
5195         uint32_t reg;
5196
5197         if (!entries)
5198                 return false;
5199
5200         for (i = 0; i < arr_size; i++) {
5201                 const struct soc15_reg_rlcg *entry;
5202
5203                 entry = &entries[i];
5204                 reg = adev->reg_offset[entry->hwip][entry->instance][entry->segment] + entry->reg;
5205                 if (offset == reg)
5206                         return true;
5207         }
5208
5209         return false;
5210 }
5211
5212 static bool gfx_v9_0_is_rlcg_access_range(struct amdgpu_device *adev, u32 offset)
5213 {
5214         return gfx_v9_0_check_rlcg_range(adev, offset,
5215                                         (void *)rlcg_access_gc_9_0,
5216                                         ARRAY_SIZE(rlcg_access_gc_9_0));
5217 }
5218
5219 static const struct amdgpu_rlc_funcs gfx_v9_0_rlc_funcs = {
5220         .is_rlc_enabled = gfx_v9_0_is_rlc_enabled,
5221         .set_safe_mode = gfx_v9_0_set_safe_mode,
5222         .unset_safe_mode = gfx_v9_0_unset_safe_mode,
5223         .init = gfx_v9_0_rlc_init,
5224         .get_csb_size = gfx_v9_0_get_csb_size,
5225         .get_csb_buffer = gfx_v9_0_get_csb_buffer,
5226         .get_cp_table_num = gfx_v9_0_cp_jump_table_num,
5227         .resume = gfx_v9_0_rlc_resume,
5228         .stop = gfx_v9_0_rlc_stop,
5229         .reset = gfx_v9_0_rlc_reset,
5230         .start = gfx_v9_0_rlc_start,
5231         .update_spm_vmid = gfx_v9_0_update_spm_vmid,
5232         .is_rlcg_access_range = gfx_v9_0_is_rlcg_access_range,
5233 };
5234
5235 static int gfx_v9_0_set_powergating_state(void *handle,
5236                                           enum amd_powergating_state state)
5237 {
5238         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5239         bool enable = (state == AMD_PG_STATE_GATE);
5240
5241         switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
5242         case IP_VERSION(9, 2, 2):
5243         case IP_VERSION(9, 1, 0):
5244         case IP_VERSION(9, 3, 0):
5245                 if (!enable)
5246                         amdgpu_gfx_off_ctrl(adev, false);
5247
5248                 if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
5249                         gfx_v9_0_enable_sck_slow_down_on_power_up(adev, true);
5250                         gfx_v9_0_enable_sck_slow_down_on_power_down(adev, true);
5251                 } else {
5252                         gfx_v9_0_enable_sck_slow_down_on_power_up(adev, false);
5253                         gfx_v9_0_enable_sck_slow_down_on_power_down(adev, false);
5254                 }
5255
5256                 if (adev->pg_flags & AMD_PG_SUPPORT_CP)
5257                         gfx_v9_0_enable_cp_power_gating(adev, true);
5258                 else
5259                         gfx_v9_0_enable_cp_power_gating(adev, false);
5260
5261                 /* update gfx cgpg state */
5262                 gfx_v9_0_update_gfx_cg_power_gating(adev, enable);
5263
5264                 /* update mgcg state */
5265                 gfx_v9_0_update_gfx_mg_power_gating(adev, enable);
5266
5267                 if (enable)
5268                         amdgpu_gfx_off_ctrl(adev, true);
5269                 break;
5270         case IP_VERSION(9, 2, 1):
5271                 amdgpu_gfx_off_ctrl(adev, enable);
5272                 break;
5273         default:
5274                 break;
5275         }
5276
5277         return 0;
5278 }
5279
5280 static int gfx_v9_0_set_clockgating_state(void *handle,
5281                                           enum amd_clockgating_state state)
5282 {
5283         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5284
5285         if (amdgpu_sriov_vf(adev))
5286                 return 0;
5287
5288         switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
5289         case IP_VERSION(9, 0, 1):
5290         case IP_VERSION(9, 2, 1):
5291         case IP_VERSION(9, 4, 0):
5292         case IP_VERSION(9, 2, 2):
5293         case IP_VERSION(9, 1, 0):
5294         case IP_VERSION(9, 4, 1):
5295         case IP_VERSION(9, 3, 0):
5296         case IP_VERSION(9, 4, 2):
5297                 gfx_v9_0_update_gfx_clock_gating(adev,
5298                                                  state == AMD_CG_STATE_GATE);
5299                 break;
5300         default:
5301                 break;
5302         }
5303         return 0;
5304 }
5305
5306 static void gfx_v9_0_get_clockgating_state(void *handle, u64 *flags)
5307 {
5308         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5309         int data;
5310
5311         if (amdgpu_sriov_vf(adev))
5312                 *flags = 0;
5313
5314         /* AMD_CG_SUPPORT_GFX_MGCG */
5315         data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE));
5316         if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK))
5317                 *flags |= AMD_CG_SUPPORT_GFX_MGCG;
5318
5319         /* AMD_CG_SUPPORT_GFX_CGCG */
5320         data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL));
5321         if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
5322                 *flags |= AMD_CG_SUPPORT_GFX_CGCG;
5323
5324         /* AMD_CG_SUPPORT_GFX_CGLS */
5325         if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
5326                 *flags |= AMD_CG_SUPPORT_GFX_CGLS;
5327
5328         /* AMD_CG_SUPPORT_GFX_RLC_LS */
5329         data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_MEM_SLP_CNTL));
5330         if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK)
5331                 *flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS;
5332
5333         /* AMD_CG_SUPPORT_GFX_CP_LS */
5334         data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmCP_MEM_SLP_CNTL));
5335         if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK)
5336                 *flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS;
5337
5338         if (amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 4, 1)) {
5339                 /* AMD_CG_SUPPORT_GFX_3D_CGCG */
5340                 data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D));
5341                 if (data & RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK)
5342                         *flags |= AMD_CG_SUPPORT_GFX_3D_CGCG;
5343
5344                 /* AMD_CG_SUPPORT_GFX_3D_CGLS */
5345                 if (data & RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK)
5346                         *flags |= AMD_CG_SUPPORT_GFX_3D_CGLS;
5347         }
5348 }
5349
5350 static u64 gfx_v9_0_ring_get_rptr_gfx(struct amdgpu_ring *ring)
5351 {
5352         return *ring->rptr_cpu_addr; /* gfx9 is 32bit rptr*/
5353 }
5354
5355 static u64 gfx_v9_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
5356 {
5357         struct amdgpu_device *adev = ring->adev;
5358         u64 wptr;
5359
5360         /* XXX check if swapping is necessary on BE */
5361         if (ring->use_doorbell) {
5362                 wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr);
5363         } else {
5364                 wptr = RREG32_SOC15(GC, 0, mmCP_RB0_WPTR);
5365                 wptr += (u64)RREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI) << 32;
5366         }
5367
5368         return wptr;
5369 }
5370
5371 static void gfx_v9_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
5372 {
5373         struct amdgpu_device *adev = ring->adev;
5374
5375         if (ring->use_doorbell) {
5376                 /* XXX check if swapping is necessary on BE */
5377                 atomic64_set((atomic64_t *)ring->wptr_cpu_addr, ring->wptr);
5378                 WDOORBELL64(ring->doorbell_index, ring->wptr);
5379         } else {
5380                 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
5381                 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
5382         }
5383 }
5384
5385 static void gfx_v9_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
5386 {
5387         struct amdgpu_device *adev = ring->adev;
5388         u32 ref_and_mask, reg_mem_engine;
5389         const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio.hdp_flush_reg;
5390
5391         if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
5392                 switch (ring->me) {
5393                 case 1:
5394                         ref_and_mask = nbio_hf_reg->ref_and_mask_cp2 << ring->pipe;
5395                         break;
5396                 case 2:
5397                         ref_and_mask = nbio_hf_reg->ref_and_mask_cp6 << ring->pipe;
5398                         break;
5399                 default:
5400                         return;
5401                 }
5402                 reg_mem_engine = 0;
5403         } else {
5404                 ref_and_mask = nbio_hf_reg->ref_and_mask_cp0;
5405                 reg_mem_engine = 1; /* pfp */
5406         }
5407
5408         gfx_v9_0_wait_reg_mem(ring, reg_mem_engine, 0, 1,
5409                               adev->nbio.funcs->get_hdp_flush_req_offset(adev),
5410                               adev->nbio.funcs->get_hdp_flush_done_offset(adev),
5411                               ref_and_mask, ref_and_mask, 0x20);
5412 }
5413
5414 static void gfx_v9_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
5415                                         struct amdgpu_job *job,
5416                                         struct amdgpu_ib *ib,
5417                                         uint32_t flags)
5418 {
5419         unsigned vmid = AMDGPU_JOB_GET_VMID(job);
5420         u32 header, control = 0;
5421
5422         if (ib->flags & AMDGPU_IB_FLAG_CE)
5423                 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
5424         else
5425                 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
5426
5427         control |= ib->length_dw | (vmid << 24);
5428
5429         if (ib->flags & AMDGPU_IB_FLAG_PREEMPT) {
5430                 control |= INDIRECT_BUFFER_PRE_ENB(1);
5431
5432                 if (flags & AMDGPU_IB_PREEMPTED)
5433                         control |= INDIRECT_BUFFER_PRE_RESUME(1);
5434
5435                 if (!(ib->flags & AMDGPU_IB_FLAG_CE) && vmid)
5436                         gfx_v9_0_ring_emit_de_meta(ring,
5437                                                    (!amdgpu_sriov_vf(ring->adev) &&
5438                                                    flags & AMDGPU_IB_PREEMPTED) ?
5439                                                    true : false,
5440                                                    job->gds_size > 0 && job->gds_base != 0);
5441         }
5442
5443         amdgpu_ring_write(ring, header);
5444         BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
5445         amdgpu_ring_write(ring,
5446 #ifdef __BIG_ENDIAN
5447                 (2 << 0) |
5448 #endif
5449                 lower_32_bits(ib->gpu_addr));
5450         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
5451         amdgpu_ring_ib_on_emit_cntl(ring);
5452         amdgpu_ring_write(ring, control);
5453 }
5454
5455 static void gfx_v9_0_ring_patch_cntl(struct amdgpu_ring *ring,
5456                                      unsigned offset)
5457 {
5458         u32 control = ring->ring[offset];
5459
5460         control |= INDIRECT_BUFFER_PRE_RESUME(1);
5461         ring->ring[offset] = control;
5462 }
5463
5464 static void gfx_v9_0_ring_patch_ce_meta(struct amdgpu_ring *ring,
5465                                         unsigned offset)
5466 {
5467         struct amdgpu_device *adev = ring->adev;
5468         void *ce_payload_cpu_addr;
5469         uint64_t payload_offset, payload_size;
5470
5471         payload_size = sizeof(struct v9_ce_ib_state);
5472
5473         if (ring->is_mes_queue) {
5474                 payload_offset = offsetof(struct amdgpu_mes_ctx_meta_data,
5475                                           gfx[0].gfx_meta_data) +
5476                         offsetof(struct v9_gfx_meta_data, ce_payload);
5477                 ce_payload_cpu_addr =
5478                         amdgpu_mes_ctx_get_offs_cpu_addr(ring, payload_offset);
5479         } else {
5480                 payload_offset = offsetof(struct v9_gfx_meta_data, ce_payload);
5481                 ce_payload_cpu_addr = adev->virt.csa_cpu_addr + payload_offset;
5482         }
5483
5484         if (offset + (payload_size >> 2) <= ring->buf_mask + 1) {
5485                 memcpy((void *)&ring->ring[offset], ce_payload_cpu_addr, payload_size);
5486         } else {
5487                 memcpy((void *)&ring->ring[offset], ce_payload_cpu_addr,
5488                        (ring->buf_mask + 1 - offset) << 2);
5489                 payload_size -= (ring->buf_mask + 1 - offset) << 2;
5490                 memcpy((void *)&ring->ring[0],
5491                        ce_payload_cpu_addr + ((ring->buf_mask + 1 - offset) << 2),
5492                        payload_size);
5493         }
5494 }
5495
5496 static void gfx_v9_0_ring_patch_de_meta(struct amdgpu_ring *ring,
5497                                         unsigned offset)
5498 {
5499         struct amdgpu_device *adev = ring->adev;
5500         void *de_payload_cpu_addr;
5501         uint64_t payload_offset, payload_size;
5502
5503         payload_size = sizeof(struct v9_de_ib_state);
5504
5505         if (ring->is_mes_queue) {
5506                 payload_offset = offsetof(struct amdgpu_mes_ctx_meta_data,
5507                                           gfx[0].gfx_meta_data) +
5508                         offsetof(struct v9_gfx_meta_data, de_payload);
5509                 de_payload_cpu_addr =
5510                         amdgpu_mes_ctx_get_offs_cpu_addr(ring, payload_offset);
5511         } else {
5512                 payload_offset = offsetof(struct v9_gfx_meta_data, de_payload);
5513                 de_payload_cpu_addr = adev->virt.csa_cpu_addr + payload_offset;
5514         }
5515
5516         ((struct v9_de_ib_state *)de_payload_cpu_addr)->ib_completion_status =
5517                 IB_COMPLETION_STATUS_PREEMPTED;
5518
5519         if (offset + (payload_size >> 2) <= ring->buf_mask + 1) {
5520                 memcpy((void *)&ring->ring[offset], de_payload_cpu_addr, payload_size);
5521         } else {
5522                 memcpy((void *)&ring->ring[offset], de_payload_cpu_addr,
5523                        (ring->buf_mask + 1 - offset) << 2);
5524                 payload_size -= (ring->buf_mask + 1 - offset) << 2;
5525                 memcpy((void *)&ring->ring[0],
5526                        de_payload_cpu_addr + ((ring->buf_mask + 1 - offset) << 2),
5527                        payload_size);
5528         }
5529 }
5530
5531 static void gfx_v9_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
5532                                           struct amdgpu_job *job,
5533                                           struct amdgpu_ib *ib,
5534                                           uint32_t flags)
5535 {
5536         unsigned vmid = AMDGPU_JOB_GET_VMID(job);
5537         u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
5538
5539         /* Currently, there is a high possibility to get wave ID mismatch
5540          * between ME and GDS, leading to a hw deadlock, because ME generates
5541          * different wave IDs than the GDS expects. This situation happens
5542          * randomly when at least 5 compute pipes use GDS ordered append.
5543          * The wave IDs generated by ME are also wrong after suspend/resume.
5544          * Those are probably bugs somewhere else in the kernel driver.
5545          *
5546          * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and
5547          * GDS to 0 for this ring (me/pipe).
5548          */
5549         if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) {
5550                 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
5551                 amdgpu_ring_write(ring, mmGDS_COMPUTE_MAX_WAVE_ID);
5552                 amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id);
5553         }
5554
5555         amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
5556         BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
5557         amdgpu_ring_write(ring,
5558 #ifdef __BIG_ENDIAN
5559                                 (2 << 0) |
5560 #endif
5561                                 lower_32_bits(ib->gpu_addr));
5562         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
5563         amdgpu_ring_write(ring, control);
5564 }
5565
5566 static void gfx_v9_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
5567                                      u64 seq, unsigned flags)
5568 {
5569         bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
5570         bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
5571         bool writeback = flags & AMDGPU_FENCE_FLAG_TC_WB_ONLY;
5572         bool exec = flags & AMDGPU_FENCE_FLAG_EXEC;
5573         uint32_t dw2 = 0;
5574
5575         /* RELEASE_MEM - flush caches, send int */
5576         amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6));
5577
5578         if (writeback) {
5579                 dw2 = EOP_TC_NC_ACTION_EN;
5580         } else {
5581                 dw2 = EOP_TCL1_ACTION_EN | EOP_TC_ACTION_EN |
5582                                 EOP_TC_MD_ACTION_EN;
5583         }
5584         dw2 |= EOP_TC_WB_ACTION_EN | EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
5585                                 EVENT_INDEX(5);
5586         if (exec)
5587                 dw2 |= EOP_EXEC;
5588
5589         amdgpu_ring_write(ring, dw2);
5590         amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
5591
5592         /*
5593          * the address should be Qword aligned if 64bit write, Dword
5594          * aligned if only send 32bit data low (discard data high)
5595          */
5596         if (write64bit)
5597                 BUG_ON(addr & 0x7);
5598         else
5599                 BUG_ON(addr & 0x3);
5600         amdgpu_ring_write(ring, lower_32_bits(addr));
5601         amdgpu_ring_write(ring, upper_32_bits(addr));
5602         amdgpu_ring_write(ring, lower_32_bits(seq));
5603         amdgpu_ring_write(ring, upper_32_bits(seq));
5604         amdgpu_ring_write(ring, 0);
5605 }
5606
5607 static void gfx_v9_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
5608 {
5609         int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
5610         uint32_t seq = ring->fence_drv.sync_seq;
5611         uint64_t addr = ring->fence_drv.gpu_addr;
5612
5613         gfx_v9_0_wait_reg_mem(ring, usepfp, 1, 0,
5614                               lower_32_bits(addr), upper_32_bits(addr),
5615                               seq, 0xffffffff, 4);
5616 }
5617
5618 static void gfx_v9_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
5619                                         unsigned vmid, uint64_t pd_addr)
5620 {
5621         amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
5622
5623         /* compute doesn't have PFP */
5624         if (ring->funcs->type == AMDGPU_RING_TYPE_GFX) {
5625                 /* sync PFP to ME, otherwise we might get invalid PFP reads */
5626                 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
5627                 amdgpu_ring_write(ring, 0x0);
5628         }
5629 }
5630
5631 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
5632 {
5633         return *ring->rptr_cpu_addr; /* gfx9 hardware is 32bit rptr */
5634 }
5635
5636 static u64 gfx_v9_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
5637 {
5638         u64 wptr;
5639
5640         /* XXX check if swapping is necessary on BE */
5641         if (ring->use_doorbell)
5642                 wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr);
5643         else
5644                 BUG();
5645         return wptr;
5646 }
5647
5648 static void gfx_v9_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
5649 {
5650         struct amdgpu_device *adev = ring->adev;
5651
5652         /* XXX check if swapping is necessary on BE */
5653         if (ring->use_doorbell) {
5654                 atomic64_set((atomic64_t *)ring->wptr_cpu_addr, ring->wptr);
5655                 WDOORBELL64(ring->doorbell_index, ring->wptr);
5656         } else{
5657                 BUG(); /* only DOORBELL method supported on gfx9 now */
5658         }
5659 }
5660
5661 static void gfx_v9_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
5662                                          u64 seq, unsigned int flags)
5663 {
5664         struct amdgpu_device *adev = ring->adev;
5665
5666         /* we only allocate 32bit for each seq wb address */
5667         BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
5668
5669         /* write fence seq to the "addr" */
5670         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5671         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5672                                  WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
5673         amdgpu_ring_write(ring, lower_32_bits(addr));
5674         amdgpu_ring_write(ring, upper_32_bits(addr));
5675         amdgpu_ring_write(ring, lower_32_bits(seq));
5676
5677         if (flags & AMDGPU_FENCE_FLAG_INT) {
5678                 /* set register to trigger INT */
5679                 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5680                 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5681                                          WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
5682                 amdgpu_ring_write(ring, SOC15_REG_OFFSET(GC, 0, mmCPC_INT_STATUS));
5683                 amdgpu_ring_write(ring, 0);
5684                 amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
5685         }
5686 }
5687
5688 static void gfx_v9_ring_emit_sb(struct amdgpu_ring *ring)
5689 {
5690         amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
5691         amdgpu_ring_write(ring, 0);
5692 }
5693
5694 static void gfx_v9_0_ring_emit_ce_meta(struct amdgpu_ring *ring, bool resume)
5695 {
5696         struct amdgpu_device *adev = ring->adev;
5697         struct v9_ce_ib_state ce_payload = {0};
5698         uint64_t offset, ce_payload_gpu_addr;
5699         void *ce_payload_cpu_addr;
5700         int cnt;
5701
5702         cnt = (sizeof(ce_payload) >> 2) + 4 - 2;
5703
5704         if (ring->is_mes_queue) {
5705                 offset = offsetof(struct amdgpu_mes_ctx_meta_data,
5706                                   gfx[0].gfx_meta_data) +
5707                         offsetof(struct v9_gfx_meta_data, ce_payload);
5708                 ce_payload_gpu_addr =
5709                         amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
5710                 ce_payload_cpu_addr =
5711                         amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset);
5712         } else {
5713                 offset = offsetof(struct v9_gfx_meta_data, ce_payload);
5714                 ce_payload_gpu_addr = amdgpu_csa_vaddr(ring->adev) + offset;
5715                 ce_payload_cpu_addr = adev->virt.csa_cpu_addr + offset;
5716         }
5717
5718         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
5719         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
5720                                  WRITE_DATA_DST_SEL(8) |
5721                                  WR_CONFIRM) |
5722                                  WRITE_DATA_CACHE_POLICY(0));
5723         amdgpu_ring_write(ring, lower_32_bits(ce_payload_gpu_addr));
5724         amdgpu_ring_write(ring, upper_32_bits(ce_payload_gpu_addr));
5725
5726         amdgpu_ring_ib_on_emit_ce(ring);
5727
5728         if (resume)
5729                 amdgpu_ring_write_multiple(ring, ce_payload_cpu_addr,
5730                                            sizeof(ce_payload) >> 2);
5731         else
5732                 amdgpu_ring_write_multiple(ring, (void *)&ce_payload,
5733                                            sizeof(ce_payload) >> 2);
5734 }
5735
5736 static int gfx_v9_0_ring_preempt_ib(struct amdgpu_ring *ring)
5737 {
5738         int i, r = 0;
5739         struct amdgpu_device *adev = ring->adev;
5740         struct amdgpu_kiq *kiq = &adev->gfx.kiq[0];
5741         struct amdgpu_ring *kiq_ring = &kiq->ring;
5742         unsigned long flags;
5743
5744         if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues)
5745                 return -EINVAL;
5746
5747         spin_lock_irqsave(&kiq->ring_lock, flags);
5748
5749         if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size)) {
5750                 spin_unlock_irqrestore(&kiq->ring_lock, flags);
5751                 return -ENOMEM;
5752         }
5753
5754         /* assert preemption condition */
5755         amdgpu_ring_set_preempt_cond_exec(ring, false);
5756
5757         ring->trail_seq += 1;
5758         amdgpu_ring_alloc(ring, 13);
5759         gfx_v9_0_ring_emit_fence(ring, ring->trail_fence_gpu_addr,
5760                                  ring->trail_seq, AMDGPU_FENCE_FLAG_EXEC | AMDGPU_FENCE_FLAG_INT);
5761
5762         /* assert IB preemption, emit the trailing fence */
5763         kiq->pmf->kiq_unmap_queues(kiq_ring, ring, PREEMPT_QUEUES_NO_UNMAP,
5764                                    ring->trail_fence_gpu_addr,
5765                                    ring->trail_seq);
5766
5767         amdgpu_ring_commit(kiq_ring);
5768         spin_unlock_irqrestore(&kiq->ring_lock, flags);
5769
5770         /* poll the trailing fence */
5771         for (i = 0; i < adev->usec_timeout; i++) {
5772                 if (ring->trail_seq ==
5773                         le32_to_cpu(*ring->trail_fence_cpu_addr))
5774                         break;
5775                 udelay(1);
5776         }
5777
5778         if (i >= adev->usec_timeout) {
5779                 r = -EINVAL;
5780                 DRM_WARN("ring %d timeout to preempt ib\n", ring->idx);
5781         }
5782
5783         /*reset the CP_VMID_PREEMPT after trailing fence*/
5784         amdgpu_ring_emit_wreg(ring,
5785                               SOC15_REG_OFFSET(GC, 0, mmCP_VMID_PREEMPT),
5786                               0x0);
5787         amdgpu_ring_commit(ring);
5788
5789         /* deassert preemption condition */
5790         amdgpu_ring_set_preempt_cond_exec(ring, true);
5791         return r;
5792 }
5793
5794 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume, bool usegds)
5795 {
5796         struct amdgpu_device *adev = ring->adev;
5797         struct v9_de_ib_state de_payload = {0};
5798         uint64_t offset, gds_addr, de_payload_gpu_addr;
5799         void *de_payload_cpu_addr;
5800         int cnt;
5801
5802         if (ring->is_mes_queue) {
5803                 offset = offsetof(struct amdgpu_mes_ctx_meta_data,
5804                                   gfx[0].gfx_meta_data) +
5805                         offsetof(struct v9_gfx_meta_data, de_payload);
5806                 de_payload_gpu_addr =
5807                         amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
5808                 de_payload_cpu_addr =
5809                         amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset);
5810
5811                 offset = offsetof(struct amdgpu_mes_ctx_meta_data,
5812                                   gfx[0].gds_backup) +
5813                         offsetof(struct v9_gfx_meta_data, de_payload);
5814                 gds_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
5815         } else {
5816                 offset = offsetof(struct v9_gfx_meta_data, de_payload);
5817                 de_payload_gpu_addr = amdgpu_csa_vaddr(ring->adev) + offset;
5818                 de_payload_cpu_addr = adev->virt.csa_cpu_addr + offset;
5819
5820                 gds_addr = ALIGN(amdgpu_csa_vaddr(ring->adev) +
5821                                  AMDGPU_CSA_SIZE - adev->gds.gds_size,
5822                                  PAGE_SIZE);
5823         }
5824
5825         if (usegds) {
5826                 de_payload.gds_backup_addrlo = lower_32_bits(gds_addr);
5827                 de_payload.gds_backup_addrhi = upper_32_bits(gds_addr);
5828         }
5829
5830         cnt = (sizeof(de_payload) >> 2) + 4 - 2;
5831         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
5832         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
5833                                  WRITE_DATA_DST_SEL(8) |
5834                                  WR_CONFIRM) |
5835                                  WRITE_DATA_CACHE_POLICY(0));
5836         amdgpu_ring_write(ring, lower_32_bits(de_payload_gpu_addr));
5837         amdgpu_ring_write(ring, upper_32_bits(de_payload_gpu_addr));
5838
5839         amdgpu_ring_ib_on_emit_de(ring);
5840         if (resume)
5841                 amdgpu_ring_write_multiple(ring, de_payload_cpu_addr,
5842                                            sizeof(de_payload) >> 2);
5843         else
5844                 amdgpu_ring_write_multiple(ring, (void *)&de_payload,
5845                                            sizeof(de_payload) >> 2);
5846 }
5847
5848 static void gfx_v9_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start,
5849                                    bool secure)
5850 {
5851         uint32_t v = secure ? FRAME_TMZ : 0;
5852
5853         amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0));
5854         amdgpu_ring_write(ring, v | FRAME_CMD(start ? 0 : 1));
5855 }
5856
5857 static void gfx_v9_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
5858 {
5859         uint32_t dw2 = 0;
5860
5861         gfx_v9_0_ring_emit_ce_meta(ring,
5862                                    (!amdgpu_sriov_vf(ring->adev) &&
5863                                    flags & AMDGPU_IB_PREEMPTED) ? true : false);
5864
5865         dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
5866         if (flags & AMDGPU_HAVE_CTX_SWITCH) {
5867                 /* set load_global_config & load_global_uconfig */
5868                 dw2 |= 0x8001;
5869                 /* set load_cs_sh_regs */
5870                 dw2 |= 0x01000000;
5871                 /* set load_per_context_state & load_gfx_sh_regs for GFX */
5872                 dw2 |= 0x10002;
5873
5874                 /* set load_ce_ram if preamble presented */
5875                 if (AMDGPU_PREAMBLE_IB_PRESENT & flags)
5876                         dw2 |= 0x10000000;
5877         } else {
5878                 /* still load_ce_ram if this is the first time preamble presented
5879                  * although there is no context switch happens.
5880                  */
5881                 if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags)
5882                         dw2 |= 0x10000000;
5883         }
5884
5885         amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
5886         amdgpu_ring_write(ring, dw2);
5887         amdgpu_ring_write(ring, 0);
5888 }
5889
5890 static unsigned gfx_v9_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring,
5891                                                   uint64_t addr)
5892 {
5893         unsigned ret;
5894         amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
5895         amdgpu_ring_write(ring, lower_32_bits(addr));
5896         amdgpu_ring_write(ring, upper_32_bits(addr));
5897         /* discard following DWs if *cond_exec_gpu_addr==0 */
5898         amdgpu_ring_write(ring, 0);
5899         ret = ring->wptr & ring->buf_mask;
5900         /* patch dummy value later */
5901         amdgpu_ring_write(ring, 0);
5902         return ret;
5903 }
5904
5905 static void gfx_v9_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg,
5906                                     uint32_t reg_val_offs)
5907 {
5908         struct amdgpu_device *adev = ring->adev;
5909
5910         amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
5911         amdgpu_ring_write(ring, 0 |     /* src: register*/
5912                                 (5 << 8) |      /* dst: memory */
5913                                 (1 << 20));     /* write confirm */
5914         amdgpu_ring_write(ring, reg);
5915         amdgpu_ring_write(ring, 0);
5916         amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
5917                                 reg_val_offs * 4));
5918         amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
5919                                 reg_val_offs * 4));
5920 }
5921
5922 static void gfx_v9_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
5923                                     uint32_t val)
5924 {
5925         uint32_t cmd = 0;
5926
5927         switch (ring->funcs->type) {
5928         case AMDGPU_RING_TYPE_GFX:
5929                 cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM;
5930                 break;
5931         case AMDGPU_RING_TYPE_KIQ:
5932                 cmd = (1 << 16); /* no inc addr */
5933                 break;
5934         default:
5935                 cmd = WR_CONFIRM;
5936                 break;
5937         }
5938         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5939         amdgpu_ring_write(ring, cmd);
5940         amdgpu_ring_write(ring, reg);
5941         amdgpu_ring_write(ring, 0);
5942         amdgpu_ring_write(ring, val);
5943 }
5944
5945 static void gfx_v9_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
5946                                         uint32_t val, uint32_t mask)
5947 {
5948         gfx_v9_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20);
5949 }
5950
5951 static void gfx_v9_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
5952                                                   uint32_t reg0, uint32_t reg1,
5953                                                   uint32_t ref, uint32_t mask)
5954 {
5955         int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
5956         struct amdgpu_device *adev = ring->adev;
5957         bool fw_version_ok = (ring->funcs->type == AMDGPU_RING_TYPE_GFX) ?
5958                 adev->gfx.me_fw_write_wait : adev->gfx.mec_fw_write_wait;
5959
5960         if (fw_version_ok)
5961                 gfx_v9_0_wait_reg_mem(ring, usepfp, 0, 1, reg0, reg1,
5962                                       ref, mask, 0x20);
5963         else
5964                 amdgpu_ring_emit_reg_write_reg_wait_helper(ring, reg0, reg1,
5965                                                            ref, mask);
5966 }
5967
5968 static void gfx_v9_0_ring_soft_recovery(struct amdgpu_ring *ring, unsigned vmid)
5969 {
5970         struct amdgpu_device *adev = ring->adev;
5971         uint32_t value = 0;
5972
5973         value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03);
5974         value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01);
5975         value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1);
5976         value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid);
5977         amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
5978         WREG32_SOC15(GC, 0, mmSQ_CMD, value);
5979         amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
5980 }
5981
5982 static void gfx_v9_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
5983                                                  enum amdgpu_interrupt_state state)
5984 {
5985         switch (state) {
5986         case AMDGPU_IRQ_STATE_DISABLE:
5987         case AMDGPU_IRQ_STATE_ENABLE:
5988                 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5989                                TIME_STAMP_INT_ENABLE,
5990                                state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
5991                 break;
5992         default:
5993                 break;
5994         }
5995 }
5996
5997 static void gfx_v9_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
5998                                                      int me, int pipe,
5999                                                      enum amdgpu_interrupt_state state)
6000 {
6001         u32 mec_int_cntl, mec_int_cntl_reg;
6002
6003         /*
6004          * amdgpu controls only the first MEC. That's why this function only
6005          * handles the setting of interrupts for this specific MEC. All other
6006          * pipes' interrupts are set by amdkfd.
6007          */
6008
6009         if (me == 1) {
6010                 switch (pipe) {
6011                 case 0:
6012                         mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE0_INT_CNTL);
6013                         break;
6014                 case 1:
6015                         mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE1_INT_CNTL);
6016                         break;
6017                 case 2:
6018                         mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE2_INT_CNTL);
6019                         break;
6020                 case 3:
6021                         mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE3_INT_CNTL);
6022                         break;
6023                 default:
6024                         DRM_DEBUG("invalid pipe %d\n", pipe);
6025                         return;
6026                 }
6027         } else {
6028                 DRM_DEBUG("invalid me %d\n", me);
6029                 return;
6030         }
6031
6032         switch (state) {
6033         case AMDGPU_IRQ_STATE_DISABLE:
6034                 mec_int_cntl = RREG32_SOC15_IP(GC,mec_int_cntl_reg);
6035                 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
6036                                              TIME_STAMP_INT_ENABLE, 0);
6037                 WREG32_SOC15_IP(GC, mec_int_cntl_reg, mec_int_cntl);
6038                 break;
6039         case AMDGPU_IRQ_STATE_ENABLE:
6040                 mec_int_cntl = RREG32_SOC15_IP(GC, mec_int_cntl_reg);
6041                 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
6042                                              TIME_STAMP_INT_ENABLE, 1);
6043                 WREG32_SOC15_IP(GC, mec_int_cntl_reg, mec_int_cntl);
6044                 break;
6045         default:
6046                 break;
6047         }
6048 }
6049
6050 static u32 gfx_v9_0_get_cpc_int_cntl(struct amdgpu_device *adev,
6051                                      int me, int pipe)
6052 {
6053         /*
6054          * amdgpu controls only the first MEC. That's why this function only
6055          * handles the setting of interrupts for this specific MEC. All other
6056          * pipes' interrupts are set by amdkfd.
6057          */
6058         if (me != 1)
6059                 return 0;
6060
6061         switch (pipe) {
6062         case 0:
6063                 return SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE0_INT_CNTL);
6064         case 1:
6065                 return SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE1_INT_CNTL);
6066         case 2:
6067                 return SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE2_INT_CNTL);
6068         case 3:
6069                 return SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE3_INT_CNTL);
6070         default:
6071                 return 0;
6072         }
6073 }
6074
6075 static int gfx_v9_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
6076                                              struct amdgpu_irq_src *source,
6077                                              unsigned type,
6078                                              enum amdgpu_interrupt_state state)
6079 {
6080         u32 cp_int_cntl_reg, cp_int_cntl;
6081         int i, j;
6082
6083         switch (state) {
6084         case AMDGPU_IRQ_STATE_DISABLE:
6085         case AMDGPU_IRQ_STATE_ENABLE:
6086                 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
6087                                PRIV_REG_INT_ENABLE,
6088                                state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
6089                 for (i = 0; i < adev->gfx.mec.num_mec; i++) {
6090                         for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) {
6091                                 /* MECs start at 1 */
6092                                 cp_int_cntl_reg = gfx_v9_0_get_cpc_int_cntl(adev, i + 1, j);
6093
6094                                 if (cp_int_cntl_reg) {
6095                                         cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg);
6096                                         cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_ME1_PIPE0_INT_CNTL,
6097                                                                     PRIV_REG_INT_ENABLE,
6098                                                                     state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
6099                                         WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl);
6100                                 }
6101                         }
6102                 }
6103                 break;
6104         default:
6105                 break;
6106         }
6107
6108         return 0;
6109 }
6110
6111 static int gfx_v9_0_set_bad_op_fault_state(struct amdgpu_device *adev,
6112                                            struct amdgpu_irq_src *source,
6113                                            unsigned type,
6114                                            enum amdgpu_interrupt_state state)
6115 {
6116         u32 cp_int_cntl_reg, cp_int_cntl;
6117         int i, j;
6118
6119         switch (state) {
6120         case AMDGPU_IRQ_STATE_DISABLE:
6121         case AMDGPU_IRQ_STATE_ENABLE:
6122                 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
6123                                OPCODE_ERROR_INT_ENABLE,
6124                                state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
6125                 for (i = 0; i < adev->gfx.mec.num_mec; i++) {
6126                         for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) {
6127                                 /* MECs start at 1 */
6128                                 cp_int_cntl_reg = gfx_v9_0_get_cpc_int_cntl(adev, i + 1, j);
6129
6130                                 if (cp_int_cntl_reg) {
6131                                         cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg);
6132                                         cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_ME1_PIPE0_INT_CNTL,
6133                                                                     OPCODE_ERROR_INT_ENABLE,
6134                                                                     state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
6135                                         WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl);
6136                                 }
6137                         }
6138                 }
6139                 break;
6140         default:
6141                 break;
6142         }
6143
6144         return 0;
6145 }
6146
6147 static int gfx_v9_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
6148                                               struct amdgpu_irq_src *source,
6149                                               unsigned type,
6150                                               enum amdgpu_interrupt_state state)
6151 {
6152         switch (state) {
6153         case AMDGPU_IRQ_STATE_DISABLE:
6154         case AMDGPU_IRQ_STATE_ENABLE:
6155                 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
6156                                PRIV_INSTR_INT_ENABLE,
6157                                state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
6158                 break;
6159         default:
6160                 break;
6161         }
6162
6163         return 0;
6164 }
6165
6166 #define ENABLE_ECC_ON_ME_PIPE(me, pipe)                         \
6167         WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\
6168                         CP_ECC_ERROR_INT_ENABLE, 1)
6169
6170 #define DISABLE_ECC_ON_ME_PIPE(me, pipe)                        \
6171         WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\
6172                         CP_ECC_ERROR_INT_ENABLE, 0)
6173
6174 static int gfx_v9_0_set_cp_ecc_error_state(struct amdgpu_device *adev,
6175                                               struct amdgpu_irq_src *source,
6176                                               unsigned type,
6177                                               enum amdgpu_interrupt_state state)
6178 {
6179         switch (state) {
6180         case AMDGPU_IRQ_STATE_DISABLE:
6181                 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
6182                                 CP_ECC_ERROR_INT_ENABLE, 0);
6183                 DISABLE_ECC_ON_ME_PIPE(1, 0);
6184                 DISABLE_ECC_ON_ME_PIPE(1, 1);
6185                 DISABLE_ECC_ON_ME_PIPE(1, 2);
6186                 DISABLE_ECC_ON_ME_PIPE(1, 3);
6187                 break;
6188
6189         case AMDGPU_IRQ_STATE_ENABLE:
6190                 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
6191                                 CP_ECC_ERROR_INT_ENABLE, 1);
6192                 ENABLE_ECC_ON_ME_PIPE(1, 0);
6193                 ENABLE_ECC_ON_ME_PIPE(1, 1);
6194                 ENABLE_ECC_ON_ME_PIPE(1, 2);
6195                 ENABLE_ECC_ON_ME_PIPE(1, 3);
6196                 break;
6197         default:
6198                 break;
6199         }
6200
6201         return 0;
6202 }
6203
6204
6205 static int gfx_v9_0_set_eop_interrupt_state(struct amdgpu_device *adev,
6206                                             struct amdgpu_irq_src *src,
6207                                             unsigned type,
6208                                             enum amdgpu_interrupt_state state)
6209 {
6210         switch (type) {
6211         case AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP:
6212                 gfx_v9_0_set_gfx_eop_interrupt_state(adev, state);
6213                 break;
6214         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
6215                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
6216                 break;
6217         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
6218                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
6219                 break;
6220         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
6221                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
6222                 break;
6223         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
6224                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
6225                 break;
6226         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
6227                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
6228                 break;
6229         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
6230                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
6231                 break;
6232         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
6233                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
6234                 break;
6235         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
6236                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
6237                 break;
6238         default:
6239                 break;
6240         }
6241         return 0;
6242 }
6243
6244 static int gfx_v9_0_eop_irq(struct amdgpu_device *adev,
6245                             struct amdgpu_irq_src *source,
6246                             struct amdgpu_iv_entry *entry)
6247 {
6248         int i;
6249         u8 me_id, pipe_id, queue_id;
6250         struct amdgpu_ring *ring;
6251
6252         DRM_DEBUG("IH: CP EOP\n");
6253         me_id = (entry->ring_id & 0x0c) >> 2;
6254         pipe_id = (entry->ring_id & 0x03) >> 0;
6255         queue_id = (entry->ring_id & 0x70) >> 4;
6256
6257         switch (me_id) {
6258         case 0:
6259                 if (adev->gfx.num_gfx_rings) {
6260                         if (!adev->gfx.mcbp) {
6261                                 amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
6262                         } else if (!amdgpu_mcbp_handle_trailing_fence_irq(&adev->gfx.muxer)) {
6263                                 /* Fence signals are handled on the software rings*/
6264                                 for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++)
6265                                         amdgpu_fence_process(&adev->gfx.sw_gfx_ring[i]);
6266                         }
6267                 }
6268                 break;
6269         case 1:
6270         case 2:
6271                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
6272                         ring = &adev->gfx.compute_ring[i];
6273                         /* Per-queue interrupt is supported for MEC starting from VI.
6274                           * The interrupt can only be enabled/disabled per pipe instead of per queue.
6275                           */
6276                         if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
6277                                 amdgpu_fence_process(ring);
6278                 }
6279                 break;
6280         }
6281         return 0;
6282 }
6283
6284 static void gfx_v9_0_fault(struct amdgpu_device *adev,
6285                            struct amdgpu_iv_entry *entry)
6286 {
6287         u8 me_id, pipe_id, queue_id;
6288         struct amdgpu_ring *ring;
6289         int i;
6290
6291         me_id = (entry->ring_id & 0x0c) >> 2;
6292         pipe_id = (entry->ring_id & 0x03) >> 0;
6293         queue_id = (entry->ring_id & 0x70) >> 4;
6294
6295         switch (me_id) {
6296         case 0:
6297                 drm_sched_fault(&adev->gfx.gfx_ring[0].sched);
6298                 break;
6299         case 1:
6300         case 2:
6301                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
6302                         ring = &adev->gfx.compute_ring[i];
6303                         if (ring->me == me_id && ring->pipe == pipe_id &&
6304                             ring->queue == queue_id)
6305                                 drm_sched_fault(&ring->sched);
6306                 }
6307                 break;
6308         }
6309 }
6310
6311 static int gfx_v9_0_priv_reg_irq(struct amdgpu_device *adev,
6312                                  struct amdgpu_irq_src *source,
6313                                  struct amdgpu_iv_entry *entry)
6314 {
6315         DRM_ERROR("Illegal register access in command stream\n");
6316         gfx_v9_0_fault(adev, entry);
6317         return 0;
6318 }
6319
6320 static int gfx_v9_0_bad_op_irq(struct amdgpu_device *adev,
6321                                struct amdgpu_irq_src *source,
6322                                struct amdgpu_iv_entry *entry)
6323 {
6324         DRM_ERROR("Illegal opcode in command stream\n");
6325         gfx_v9_0_fault(adev, entry);
6326         return 0;
6327 }
6328
6329 static int gfx_v9_0_priv_inst_irq(struct amdgpu_device *adev,
6330                                   struct amdgpu_irq_src *source,
6331                                   struct amdgpu_iv_entry *entry)
6332 {
6333         DRM_ERROR("Illegal instruction in command stream\n");
6334         gfx_v9_0_fault(adev, entry);
6335         return 0;
6336 }
6337
6338
6339 static const struct soc15_ras_field_entry gfx_v9_0_ras_fields[] = {
6340         { "CPC_SCRATCH", SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT),
6341           SOC15_REG_FIELD(CPC_EDC_SCRATCH_CNT, SEC_COUNT),
6342           SOC15_REG_FIELD(CPC_EDC_SCRATCH_CNT, DED_COUNT)
6343         },
6344         { "CPC_UCODE", SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT),
6345           SOC15_REG_FIELD(CPC_EDC_UCODE_CNT, SEC_COUNT),
6346           SOC15_REG_FIELD(CPC_EDC_UCODE_CNT, DED_COUNT)
6347         },
6348         { "CPF_ROQ_ME1", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT),
6349           SOC15_REG_FIELD(CPF_EDC_ROQ_CNT, COUNT_ME1),
6350           0, 0
6351         },
6352         { "CPF_ROQ_ME2", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT),
6353           SOC15_REG_FIELD(CPF_EDC_ROQ_CNT, COUNT_ME2),
6354           0, 0
6355         },
6356         { "CPF_TAG", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT),
6357           SOC15_REG_FIELD(CPF_EDC_TAG_CNT, SEC_COUNT),
6358           SOC15_REG_FIELD(CPF_EDC_TAG_CNT, DED_COUNT)
6359         },
6360         { "CPG_DMA_ROQ", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT),
6361           SOC15_REG_FIELD(CPG_EDC_DMA_CNT, ROQ_COUNT),
6362           0, 0
6363         },
6364         { "CPG_DMA_TAG", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT),
6365           SOC15_REG_FIELD(CPG_EDC_DMA_CNT, TAG_SEC_COUNT),
6366           SOC15_REG_FIELD(CPG_EDC_DMA_CNT, TAG_DED_COUNT)
6367         },
6368         { "CPG_TAG", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT),
6369           SOC15_REG_FIELD(CPG_EDC_TAG_CNT, SEC_COUNT),
6370           SOC15_REG_FIELD(CPG_EDC_TAG_CNT, DED_COUNT)
6371         },
6372         { "DC_CSINVOC", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT),
6373           SOC15_REG_FIELD(DC_EDC_CSINVOC_CNT, COUNT_ME1),
6374           0, 0
6375         },
6376         { "DC_RESTORE", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT),
6377           SOC15_REG_FIELD(DC_EDC_RESTORE_CNT, COUNT_ME1),
6378           0, 0
6379         },
6380         { "DC_STATE", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT),
6381           SOC15_REG_FIELD(DC_EDC_STATE_CNT, COUNT_ME1),
6382           0, 0
6383         },
6384         { "GDS_MEM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT),
6385           SOC15_REG_FIELD(GDS_EDC_CNT, GDS_MEM_SEC),
6386           SOC15_REG_FIELD(GDS_EDC_CNT, GDS_MEM_DED)
6387         },
6388         { "GDS_INPUT_QUEUE", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT),
6389           SOC15_REG_FIELD(GDS_EDC_CNT, GDS_INPUT_QUEUE_SED),
6390           0, 0
6391         },
6392         { "GDS_ME0_CS_PIPE_MEM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT),
6393           SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, ME0_CS_PIPE_MEM_SEC),
6394           SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, ME0_CS_PIPE_MEM_DED)
6395         },
6396         { "GDS_OA_PHY_PHY_CMD_RAM_MEM",
6397           SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT),
6398           SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_CMD_RAM_MEM_SEC),
6399           SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_CMD_RAM_MEM_DED)
6400         },
6401         { "GDS_OA_PHY_PHY_DATA_RAM_MEM",
6402           SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT),
6403           SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_DATA_RAM_MEM_SED),
6404           0, 0
6405         },
6406         { "GDS_OA_PIPE_ME1_PIPE0_PIPE_MEM",
6407           SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
6408           SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE0_PIPE_MEM_SEC),
6409           SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE0_PIPE_MEM_DED)
6410         },
6411         { "GDS_OA_PIPE_ME1_PIPE1_PIPE_MEM",
6412           SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
6413           SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE1_PIPE_MEM_SEC),
6414           SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE1_PIPE_MEM_DED)
6415         },
6416         { "GDS_OA_PIPE_ME1_PIPE2_PIPE_MEM",
6417           SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
6418           SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE2_PIPE_MEM_SEC),
6419           SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE2_PIPE_MEM_DED)
6420         },
6421         { "GDS_OA_PIPE_ME1_PIPE3_PIPE_MEM",
6422           SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
6423           SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE3_PIPE_MEM_SEC),
6424           SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE3_PIPE_MEM_DED)
6425         },
6426         { "SPI_SR_MEM", SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT),
6427           SOC15_REG_FIELD(SPI_EDC_CNT, SPI_SR_MEM_SED_COUNT),
6428           0, 0
6429         },
6430         { "TA_FS_DFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
6431           SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_DFIFO_SEC_COUNT),
6432           SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_DFIFO_DED_COUNT)
6433         },
6434         { "TA_FS_AFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
6435           SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_AFIFO_SED_COUNT),
6436           0, 0
6437         },
6438         { "TA_FL_LFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
6439           SOC15_REG_FIELD(TA_EDC_CNT, TA_FL_LFIFO_SED_COUNT),
6440           0, 0
6441         },
6442         { "TA_FX_LFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
6443           SOC15_REG_FIELD(TA_EDC_CNT, TA_FX_LFIFO_SED_COUNT),
6444           0, 0
6445         },
6446         { "TA_FS_CFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
6447           SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_CFIFO_SED_COUNT),
6448           0, 0
6449         },
6450         { "TCA_HOLE_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT),
6451           SOC15_REG_FIELD(TCA_EDC_CNT, HOLE_FIFO_SED_COUNT),
6452           0, 0
6453         },
6454         { "TCA_REQ_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT),
6455           SOC15_REG_FIELD(TCA_EDC_CNT, REQ_FIFO_SED_COUNT),
6456           0, 0
6457         },
6458         { "TCC_CACHE_DATA", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6459           SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DATA_SEC_COUNT),
6460           SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DATA_DED_COUNT)
6461         },
6462         { "TCC_CACHE_DIRTY", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6463           SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DIRTY_SEC_COUNT),
6464           SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DIRTY_DED_COUNT)
6465         },
6466         { "TCC_HIGH_RATE_TAG", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6467           SOC15_REG_FIELD(TCC_EDC_CNT, HIGH_RATE_TAG_SEC_COUNT),
6468           SOC15_REG_FIELD(TCC_EDC_CNT, HIGH_RATE_TAG_DED_COUNT)
6469         },
6470         { "TCC_LOW_RATE_TAG", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6471           SOC15_REG_FIELD(TCC_EDC_CNT, LOW_RATE_TAG_SEC_COUNT),
6472           SOC15_REG_FIELD(TCC_EDC_CNT, LOW_RATE_TAG_DED_COUNT)
6473         },
6474         { "TCC_SRC_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6475           SOC15_REG_FIELD(TCC_EDC_CNT, SRC_FIFO_SEC_COUNT),
6476           SOC15_REG_FIELD(TCC_EDC_CNT, SRC_FIFO_DED_COUNT)
6477         },
6478         { "TCC_IN_USE_DEC", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6479           SOC15_REG_FIELD(TCC_EDC_CNT, IN_USE_DEC_SED_COUNT),
6480           0, 0
6481         },
6482         { "TCC_IN_USE_TRANSFER", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6483           SOC15_REG_FIELD(TCC_EDC_CNT, IN_USE_TRANSFER_SED_COUNT),
6484           0, 0
6485         },
6486         { "TCC_LATENCY_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6487           SOC15_REG_FIELD(TCC_EDC_CNT, LATENCY_FIFO_SED_COUNT),
6488           0, 0
6489         },
6490         { "TCC_RETURN_DATA", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6491           SOC15_REG_FIELD(TCC_EDC_CNT, RETURN_DATA_SED_COUNT),
6492           0, 0
6493         },
6494         { "TCC_RETURN_CONTROL", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6495           SOC15_REG_FIELD(TCC_EDC_CNT, RETURN_CONTROL_SED_COUNT),
6496           0, 0
6497         },
6498         { "TCC_UC_ATOMIC_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6499           SOC15_REG_FIELD(TCC_EDC_CNT, UC_ATOMIC_FIFO_SED_COUNT),
6500           0, 0
6501         },
6502         { "TCC_WRITE_RETURN", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6503           SOC15_REG_FIELD(TCC_EDC_CNT2, WRITE_RETURN_SED_COUNT),
6504           0, 0
6505         },
6506         { "TCC_WRITE_CACHE_READ", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6507           SOC15_REG_FIELD(TCC_EDC_CNT2, WRITE_CACHE_READ_SED_COUNT),
6508           0, 0
6509         },
6510         { "TCC_SRC_FIFO_NEXT_RAM", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6511           SOC15_REG_FIELD(TCC_EDC_CNT2, SRC_FIFO_NEXT_RAM_SED_COUNT),
6512           0, 0
6513         },
6514         { "TCC_LATENCY_FIFO_NEXT_RAM", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6515           SOC15_REG_FIELD(TCC_EDC_CNT2, LATENCY_FIFO_NEXT_RAM_SED_COUNT),
6516           0, 0
6517         },
6518         { "TCC_CACHE_TAG_PROBE_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6519           SOC15_REG_FIELD(TCC_EDC_CNT2, CACHE_TAG_PROBE_FIFO_SED_COUNT),
6520           0, 0
6521         },
6522         { "TCC_WRRET_TAG_WRITE_RETURN", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6523           SOC15_REG_FIELD(TCC_EDC_CNT2, WRRET_TAG_WRITE_RETURN_SED_COUNT),
6524           0, 0
6525         },
6526         { "TCC_ATOMIC_RETURN_BUFFER", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6527           SOC15_REG_FIELD(TCC_EDC_CNT2, ATOMIC_RETURN_BUFFER_SED_COUNT),
6528           0, 0
6529         },
6530         { "TCI_WRITE_RAM", SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT),
6531           SOC15_REG_FIELD(TCI_EDC_CNT, WRITE_RAM_SED_COUNT),
6532           0, 0
6533         },
6534         { "TCP_CACHE_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6535           SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CACHE_RAM_SEC_COUNT),
6536           SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CACHE_RAM_DED_COUNT)
6537         },
6538         { "TCP_LFIFO_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6539           SOC15_REG_FIELD(TCP_EDC_CNT_NEW, LFIFO_RAM_SEC_COUNT),
6540           SOC15_REG_FIELD(TCP_EDC_CNT_NEW, LFIFO_RAM_DED_COUNT)
6541         },
6542         { "TCP_CMD_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6543           SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CMD_FIFO_SED_COUNT),
6544           0, 0
6545         },
6546         { "TCP_VM_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6547           SOC15_REG_FIELD(TCP_EDC_CNT_NEW, VM_FIFO_SEC_COUNT),
6548           0, 0
6549         },
6550         { "TCP_DB_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6551           SOC15_REG_FIELD(TCP_EDC_CNT_NEW, DB_RAM_SED_COUNT),
6552           0, 0
6553         },
6554         { "TCP_UTCL1_LFIFO0", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6555           SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO0_SEC_COUNT),
6556           SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO0_DED_COUNT)
6557         },
6558         { "TCP_UTCL1_LFIFO1", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6559           SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO1_SEC_COUNT),
6560           SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO1_DED_COUNT)
6561         },
6562         { "TD_SS_FIFO_LO", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT),
6563           SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_LO_SEC_COUNT),
6564           SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_LO_DED_COUNT)
6565         },
6566         { "TD_SS_FIFO_HI", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT),
6567           SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_HI_SEC_COUNT),
6568           SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_HI_DED_COUNT)
6569         },
6570         { "TD_CS_FIFO", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT),
6571           SOC15_REG_FIELD(TD_EDC_CNT, CS_FIFO_SED_COUNT),
6572           0, 0
6573         },
6574         { "SQ_LDS_D", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6575           SOC15_REG_FIELD(SQ_EDC_CNT, LDS_D_SEC_COUNT),
6576           SOC15_REG_FIELD(SQ_EDC_CNT, LDS_D_DED_COUNT)
6577         },
6578         { "SQ_LDS_I", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6579           SOC15_REG_FIELD(SQ_EDC_CNT, LDS_I_SEC_COUNT),
6580           SOC15_REG_FIELD(SQ_EDC_CNT, LDS_I_DED_COUNT)
6581         },
6582         { "SQ_SGPR", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6583           SOC15_REG_FIELD(SQ_EDC_CNT, SGPR_SEC_COUNT),
6584           SOC15_REG_FIELD(SQ_EDC_CNT, SGPR_DED_COUNT)
6585         },
6586         { "SQ_VGPR0", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6587           SOC15_REG_FIELD(SQ_EDC_CNT, VGPR0_SEC_COUNT),
6588           SOC15_REG_FIELD(SQ_EDC_CNT, VGPR0_DED_COUNT)
6589         },
6590         { "SQ_VGPR1", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6591           SOC15_REG_FIELD(SQ_EDC_CNT, VGPR1_SEC_COUNT),
6592           SOC15_REG_FIELD(SQ_EDC_CNT, VGPR1_DED_COUNT)
6593         },
6594         { "SQ_VGPR2", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6595           SOC15_REG_FIELD(SQ_EDC_CNT, VGPR2_SEC_COUNT),
6596           SOC15_REG_FIELD(SQ_EDC_CNT, VGPR2_DED_COUNT)
6597         },
6598         { "SQ_VGPR3", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6599           SOC15_REG_FIELD(SQ_EDC_CNT, VGPR3_SEC_COUNT),
6600           SOC15_REG_FIELD(SQ_EDC_CNT, VGPR3_DED_COUNT)
6601         },
6602         { "SQC_DATA_CU0_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6603           SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_WRITE_DATA_BUF_SEC_COUNT),
6604           SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_WRITE_DATA_BUF_DED_COUNT)
6605         },
6606         { "SQC_DATA_CU0_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6607           SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_UTCL1_LFIFO_SEC_COUNT),
6608           SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_UTCL1_LFIFO_DED_COUNT)
6609         },
6610         { "SQC_DATA_CU1_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6611           SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_WRITE_DATA_BUF_SEC_COUNT),
6612           SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_WRITE_DATA_BUF_DED_COUNT)
6613         },
6614         { "SQC_DATA_CU1_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6615           SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_UTCL1_LFIFO_SEC_COUNT),
6616           SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_UTCL1_LFIFO_DED_COUNT)
6617         },
6618         { "SQC_DATA_CU2_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6619           SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_WRITE_DATA_BUF_SEC_COUNT),
6620           SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_WRITE_DATA_BUF_DED_COUNT)
6621         },
6622         { "SQC_DATA_CU2_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6623           SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_UTCL1_LFIFO_SEC_COUNT),
6624           SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_UTCL1_LFIFO_DED_COUNT)
6625         },
6626         { "SQC_INST_BANKA_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6627           SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_TAG_RAM_SEC_COUNT),
6628           SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_TAG_RAM_DED_COUNT)
6629         },
6630         { "SQC_INST_BANKA_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6631           SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_BANK_RAM_SEC_COUNT),
6632           SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_BANK_RAM_DED_COUNT)
6633         },
6634         { "SQC_DATA_BANKA_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6635           SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_TAG_RAM_SEC_COUNT),
6636           SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_TAG_RAM_DED_COUNT)
6637         },
6638         { "SQC_DATA_BANKA_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6639           SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_BANK_RAM_SEC_COUNT),
6640           SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_BANK_RAM_DED_COUNT)
6641         },
6642         { "SQC_INST_BANKA_UTCL1_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6643           SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_UTCL1_MISS_FIFO_SED_COUNT),
6644           0, 0
6645         },
6646         { "SQC_INST_BANKA_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6647           SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_MISS_FIFO_SED_COUNT),
6648           0, 0
6649         },
6650         { "SQC_DATA_BANKA_HIT_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6651           SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_HIT_FIFO_SED_COUNT),
6652           0, 0
6653         },
6654         { "SQC_DATA_BANKA_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6655           SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_MISS_FIFO_SED_COUNT),
6656           0, 0
6657         },
6658         { "SQC_DATA_BANKA_DIRTY_BIT_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6659           SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_DIRTY_BIT_RAM_SED_COUNT),
6660           0, 0
6661         },
6662         { "SQC_INST_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6663           SOC15_REG_FIELD(SQC_EDC_CNT2, INST_UTCL1_LFIFO_SEC_COUNT),
6664           SOC15_REG_FIELD(SQC_EDC_CNT2, INST_UTCL1_LFIFO_DED_COUNT)
6665         },
6666         { "SQC_INST_BANKB_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6667           SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_TAG_RAM_SEC_COUNT),
6668           SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_TAG_RAM_DED_COUNT)
6669         },
6670         { "SQC_INST_BANKB_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6671           SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_BANK_RAM_SEC_COUNT),
6672           SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_BANK_RAM_DED_COUNT)
6673         },
6674         { "SQC_DATA_BANKB_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6675           SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_TAG_RAM_SEC_COUNT),
6676           SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_TAG_RAM_DED_COUNT)
6677         },
6678         { "SQC_DATA_BANKB_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6679           SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_BANK_RAM_SEC_COUNT),
6680           SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_BANK_RAM_DED_COUNT)
6681         },
6682         { "SQC_INST_BANKB_UTCL1_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6683           SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_UTCL1_MISS_FIFO_SED_COUNT),
6684           0, 0
6685         },
6686         { "SQC_INST_BANKB_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6687           SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_MISS_FIFO_SED_COUNT),
6688           0, 0
6689         },
6690         { "SQC_DATA_BANKB_HIT_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6691           SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_HIT_FIFO_SED_COUNT),
6692           0, 0
6693         },
6694         { "SQC_DATA_BANKB_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6695           SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_MISS_FIFO_SED_COUNT),
6696           0, 0
6697         },
6698         { "SQC_DATA_BANKB_DIRTY_BIT_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6699           SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_DIRTY_BIT_RAM_SED_COUNT),
6700           0, 0
6701         },
6702         { "EA_DRAMRD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6703           SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_CMDMEM_SEC_COUNT),
6704           SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_CMDMEM_DED_COUNT)
6705         },
6706         { "EA_DRAMWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6707           SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_CMDMEM_SEC_COUNT),
6708           SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_CMDMEM_DED_COUNT)
6709         },
6710         { "EA_DRAMWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6711           SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_DATAMEM_SEC_COUNT),
6712           SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_DATAMEM_DED_COUNT)
6713         },
6714         { "EA_RRET_TAGMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6715           SOC15_REG_FIELD(GCEA_EDC_CNT, RRET_TAGMEM_SEC_COUNT),
6716           SOC15_REG_FIELD(GCEA_EDC_CNT, RRET_TAGMEM_DED_COUNT)
6717         },
6718         { "EA_WRET_TAGMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6719           SOC15_REG_FIELD(GCEA_EDC_CNT, WRET_TAGMEM_SEC_COUNT),
6720           SOC15_REG_FIELD(GCEA_EDC_CNT, WRET_TAGMEM_DED_COUNT)
6721         },
6722         { "EA_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6723           SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_PAGEMEM_SED_COUNT),
6724           0, 0
6725         },
6726         { "EA_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6727           SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_PAGEMEM_SED_COUNT),
6728           0, 0
6729         },
6730         { "EA_IORD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6731           SOC15_REG_FIELD(GCEA_EDC_CNT, IORD_CMDMEM_SED_COUNT),
6732           0, 0
6733         },
6734         { "EA_IOWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6735           SOC15_REG_FIELD(GCEA_EDC_CNT, IOWR_CMDMEM_SED_COUNT),
6736           0, 0
6737         },
6738         { "EA_IOWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6739           SOC15_REG_FIELD(GCEA_EDC_CNT, IOWR_DATAMEM_SED_COUNT),
6740           0, 0
6741         },
6742         { "GMIRD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6743           SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_CMDMEM_SEC_COUNT),
6744           SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_CMDMEM_DED_COUNT)
6745         },
6746         { "GMIWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6747           SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_CMDMEM_SEC_COUNT),
6748           SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_CMDMEM_DED_COUNT)
6749         },
6750         { "GMIWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6751           SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_DATAMEM_SEC_COUNT),
6752           SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_DATAMEM_DED_COUNT)
6753         },
6754         { "GMIRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6755           SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_PAGEMEM_SED_COUNT),
6756           0, 0
6757         },
6758         { "GMIWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6759           SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_PAGEMEM_SED_COUNT),
6760           0, 0
6761         },
6762         { "MAM_D0MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6763           SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D0MEM_SED_COUNT),
6764           0, 0
6765         },
6766         { "MAM_D1MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6767           SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D1MEM_SED_COUNT),
6768           0, 0
6769         },
6770         { "MAM_D2MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6771           SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D2MEM_SED_COUNT),
6772           0, 0
6773         },
6774         { "MAM_D3MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6775           SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D3MEM_SED_COUNT),
6776           0, 0
6777         }
6778 };
6779
6780 static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev,
6781                                      void *inject_if, uint32_t instance_mask)
6782 {
6783         struct ras_inject_if *info = (struct ras_inject_if *)inject_if;
6784         int ret;
6785         struct ta_ras_trigger_error_input block_info = { 0 };
6786
6787         if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
6788                 return -EINVAL;
6789
6790         if (info->head.sub_block_index >= ARRAY_SIZE(ras_gfx_subblocks))
6791                 return -EINVAL;
6792
6793         if (!ras_gfx_subblocks[info->head.sub_block_index].name)
6794                 return -EPERM;
6795
6796         if (!(ras_gfx_subblocks[info->head.sub_block_index].hw_supported_error_type &
6797               info->head.type)) {
6798                 DRM_ERROR("GFX Subblock %s, hardware do not support type 0x%x\n",
6799                         ras_gfx_subblocks[info->head.sub_block_index].name,
6800                         info->head.type);
6801                 return -EPERM;
6802         }
6803
6804         if (!(ras_gfx_subblocks[info->head.sub_block_index].sw_supported_error_type &
6805               info->head.type)) {
6806                 DRM_ERROR("GFX Subblock %s, driver do not support type 0x%x\n",
6807                         ras_gfx_subblocks[info->head.sub_block_index].name,
6808                         info->head.type);
6809                 return -EPERM;
6810         }
6811
6812         block_info.block_id = amdgpu_ras_block_to_ta(info->head.block);
6813         block_info.sub_block_index =
6814                 ras_gfx_subblocks[info->head.sub_block_index].ta_subblock;
6815         block_info.inject_error_type = amdgpu_ras_error_to_ta(info->head.type);
6816         block_info.address = info->address;
6817         block_info.value = info->value;
6818
6819         mutex_lock(&adev->grbm_idx_mutex);
6820         ret = psp_ras_trigger_error(&adev->psp, &block_info, instance_mask);
6821         mutex_unlock(&adev->grbm_idx_mutex);
6822
6823         return ret;
6824 }
6825
6826 static const char * const vml2_mems[] = {
6827         "UTC_VML2_BANK_CACHE_0_BIGK_MEM0",
6828         "UTC_VML2_BANK_CACHE_0_BIGK_MEM1",
6829         "UTC_VML2_BANK_CACHE_0_4K_MEM0",
6830         "UTC_VML2_BANK_CACHE_0_4K_MEM1",
6831         "UTC_VML2_BANK_CACHE_1_BIGK_MEM0",
6832         "UTC_VML2_BANK_CACHE_1_BIGK_MEM1",
6833         "UTC_VML2_BANK_CACHE_1_4K_MEM0",
6834         "UTC_VML2_BANK_CACHE_1_4K_MEM1",
6835         "UTC_VML2_BANK_CACHE_2_BIGK_MEM0",
6836         "UTC_VML2_BANK_CACHE_2_BIGK_MEM1",
6837         "UTC_VML2_BANK_CACHE_2_4K_MEM0",
6838         "UTC_VML2_BANK_CACHE_2_4K_MEM1",
6839         "UTC_VML2_BANK_CACHE_3_BIGK_MEM0",
6840         "UTC_VML2_BANK_CACHE_3_BIGK_MEM1",
6841         "UTC_VML2_BANK_CACHE_3_4K_MEM0",
6842         "UTC_VML2_BANK_CACHE_3_4K_MEM1",
6843 };
6844
6845 static const char * const vml2_walker_mems[] = {
6846         "UTC_VML2_CACHE_PDE0_MEM0",
6847         "UTC_VML2_CACHE_PDE0_MEM1",
6848         "UTC_VML2_CACHE_PDE1_MEM0",
6849         "UTC_VML2_CACHE_PDE1_MEM1",
6850         "UTC_VML2_CACHE_PDE2_MEM0",
6851         "UTC_VML2_CACHE_PDE2_MEM1",
6852         "UTC_VML2_RDIF_LOG_FIFO",
6853 };
6854
6855 static const char * const atc_l2_cache_2m_mems[] = {
6856         "UTC_ATCL2_CACHE_2M_BANK0_WAY0_MEM",
6857         "UTC_ATCL2_CACHE_2M_BANK0_WAY1_MEM",
6858         "UTC_ATCL2_CACHE_2M_BANK1_WAY0_MEM",
6859         "UTC_ATCL2_CACHE_2M_BANK1_WAY1_MEM",
6860 };
6861
6862 static const char *atc_l2_cache_4k_mems[] = {
6863         "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM0",
6864         "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM1",
6865         "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM2",
6866         "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM3",
6867         "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM4",
6868         "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM5",
6869         "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM6",
6870         "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM7",
6871         "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM0",
6872         "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM1",
6873         "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM2",
6874         "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM3",
6875         "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM4",
6876         "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM5",
6877         "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM6",
6878         "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM7",
6879         "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM0",
6880         "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM1",
6881         "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM2",
6882         "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM3",
6883         "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM4",
6884         "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM5",
6885         "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM6",
6886         "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM7",
6887         "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM0",
6888         "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM1",
6889         "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM2",
6890         "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM3",
6891         "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM4",
6892         "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM5",
6893         "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM6",
6894         "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM7",
6895 };
6896
6897 static int gfx_v9_0_query_utc_edc_status(struct amdgpu_device *adev,
6898                                          struct ras_err_data *err_data)
6899 {
6900         uint32_t i, data;
6901         uint32_t sec_count, ded_count;
6902
6903         WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
6904         WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT, 0);
6905         WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
6906         WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT, 0);
6907         WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
6908         WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT, 0);
6909         WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
6910         WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT, 0);
6911
6912         for (i = 0; i < ARRAY_SIZE(vml2_mems); i++) {
6913                 WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, i);
6914                 data = RREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT);
6915
6916                 sec_count = REG_GET_FIELD(data, VM_L2_MEM_ECC_CNT, SEC_COUNT);
6917                 if (sec_count) {
6918                         dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6919                                 "SEC %d\n", i, vml2_mems[i], sec_count);
6920                         err_data->ce_count += sec_count;
6921                 }
6922
6923                 ded_count = REG_GET_FIELD(data, VM_L2_MEM_ECC_CNT, DED_COUNT);
6924                 if (ded_count) {
6925                         dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6926                                 "DED %d\n", i, vml2_mems[i], ded_count);
6927                         err_data->ue_count += ded_count;
6928                 }
6929         }
6930
6931         for (i = 0; i < ARRAY_SIZE(vml2_walker_mems); i++) {
6932                 WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, i);
6933                 data = RREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT);
6934
6935                 sec_count = REG_GET_FIELD(data, VM_L2_WALKER_MEM_ECC_CNT,
6936                                                 SEC_COUNT);
6937                 if (sec_count) {
6938                         dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6939                                 "SEC %d\n", i, vml2_walker_mems[i], sec_count);
6940                         err_data->ce_count += sec_count;
6941                 }
6942
6943                 ded_count = REG_GET_FIELD(data, VM_L2_WALKER_MEM_ECC_CNT,
6944                                                 DED_COUNT);
6945                 if (ded_count) {
6946                         dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6947                                 "DED %d\n", i, vml2_walker_mems[i], ded_count);
6948                         err_data->ue_count += ded_count;
6949                 }
6950         }
6951
6952         for (i = 0; i < ARRAY_SIZE(atc_l2_cache_2m_mems); i++) {
6953                 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, i);
6954                 data = RREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT);
6955
6956                 sec_count = (data & 0x00006000L) >> 0xd;
6957                 if (sec_count) {
6958                         dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6959                                 "SEC %d\n", i, atc_l2_cache_2m_mems[i],
6960                                 sec_count);
6961                         err_data->ce_count += sec_count;
6962                 }
6963         }
6964
6965         for (i = 0; i < ARRAY_SIZE(atc_l2_cache_4k_mems); i++) {
6966                 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, i);
6967                 data = RREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT);
6968
6969                 sec_count = (data & 0x00006000L) >> 0xd;
6970                 if (sec_count) {
6971                         dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6972                                 "SEC %d\n", i, atc_l2_cache_4k_mems[i],
6973                                 sec_count);
6974                         err_data->ce_count += sec_count;
6975                 }
6976
6977                 ded_count = (data & 0x00018000L) >> 0xf;
6978                 if (ded_count) {
6979                         dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6980                                 "DED %d\n", i, atc_l2_cache_4k_mems[i],
6981                                 ded_count);
6982                         err_data->ue_count += ded_count;
6983                 }
6984         }
6985
6986         WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
6987         WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
6988         WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
6989         WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
6990
6991         return 0;
6992 }
6993
6994 static int gfx_v9_0_ras_error_count(struct amdgpu_device *adev,
6995         const struct soc15_reg_entry *reg,
6996         uint32_t se_id, uint32_t inst_id, uint32_t value,
6997         uint32_t *sec_count, uint32_t *ded_count)
6998 {
6999         uint32_t i;
7000         uint32_t sec_cnt, ded_cnt;
7001
7002         for (i = 0; i < ARRAY_SIZE(gfx_v9_0_ras_fields); i++) {
7003                 if(gfx_v9_0_ras_fields[i].reg_offset != reg->reg_offset ||
7004                         gfx_v9_0_ras_fields[i].seg != reg->seg ||
7005                         gfx_v9_0_ras_fields[i].inst != reg->inst)
7006                         continue;
7007
7008                 sec_cnt = (value &
7009                                 gfx_v9_0_ras_fields[i].sec_count_mask) >>
7010                                 gfx_v9_0_ras_fields[i].sec_count_shift;
7011                 if (sec_cnt) {
7012                         dev_info(adev->dev, "GFX SubBlock %s, "
7013                                 "Instance[%d][%d], SEC %d\n",
7014                                 gfx_v9_0_ras_fields[i].name,
7015                                 se_id, inst_id,
7016                                 sec_cnt);
7017                         *sec_count += sec_cnt;
7018                 }
7019
7020                 ded_cnt = (value &
7021                                 gfx_v9_0_ras_fields[i].ded_count_mask) >>
7022                                 gfx_v9_0_ras_fields[i].ded_count_shift;
7023                 if (ded_cnt) {
7024                         dev_info(adev->dev, "GFX SubBlock %s, "
7025                                 "Instance[%d][%d], DED %d\n",
7026                                 gfx_v9_0_ras_fields[i].name,
7027                                 se_id, inst_id,
7028                                 ded_cnt);
7029                         *ded_count += ded_cnt;
7030                 }
7031         }
7032
7033         return 0;
7034 }
7035
7036 static void gfx_v9_0_reset_ras_error_count(struct amdgpu_device *adev)
7037 {
7038         int i, j, k;
7039
7040         if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
7041                 return;
7042
7043         /* read back registers to clear the counters */
7044         mutex_lock(&adev->grbm_idx_mutex);
7045         for (i = 0; i < ARRAY_SIZE(gfx_v9_0_edc_counter_regs); i++) {
7046                 for (j = 0; j < gfx_v9_0_edc_counter_regs[i].se_num; j++) {
7047                         for (k = 0; k < gfx_v9_0_edc_counter_regs[i].instance; k++) {
7048                                 amdgpu_gfx_select_se_sh(adev, j, 0x0, k, 0);
7049                                 RREG32(SOC15_REG_ENTRY_OFFSET(gfx_v9_0_edc_counter_regs[i]));
7050                         }
7051                 }
7052         }
7053         WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, 0xe0000000);
7054         mutex_unlock(&adev->grbm_idx_mutex);
7055
7056         WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
7057         WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT, 0);
7058         WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
7059         WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT, 0);
7060         WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
7061         WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT, 0);
7062         WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
7063         WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT, 0);
7064
7065         for (i = 0; i < ARRAY_SIZE(vml2_mems); i++) {
7066                 WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, i);
7067                 RREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT);
7068         }
7069
7070         for (i = 0; i < ARRAY_SIZE(vml2_walker_mems); i++) {
7071                 WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, i);
7072                 RREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT);
7073         }
7074
7075         for (i = 0; i < ARRAY_SIZE(atc_l2_cache_2m_mems); i++) {
7076                 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, i);
7077                 RREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT);
7078         }
7079
7080         for (i = 0; i < ARRAY_SIZE(atc_l2_cache_4k_mems); i++) {
7081                 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, i);
7082                 RREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT);
7083         }
7084
7085         WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
7086         WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
7087         WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
7088         WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
7089 }
7090
7091 static void gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev,
7092                                           void *ras_error_status)
7093 {
7094         struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
7095         uint32_t sec_count = 0, ded_count = 0;
7096         uint32_t i, j, k;
7097         uint32_t reg_value;
7098
7099         if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
7100                 return;
7101
7102         err_data->ue_count = 0;
7103         err_data->ce_count = 0;
7104
7105         mutex_lock(&adev->grbm_idx_mutex);
7106
7107         for (i = 0; i < ARRAY_SIZE(gfx_v9_0_edc_counter_regs); i++) {
7108                 for (j = 0; j < gfx_v9_0_edc_counter_regs[i].se_num; j++) {
7109                         for (k = 0; k < gfx_v9_0_edc_counter_regs[i].instance; k++) {
7110                                 amdgpu_gfx_select_se_sh(adev, j, 0, k, 0);
7111                                 reg_value =
7112                                         RREG32(SOC15_REG_ENTRY_OFFSET(gfx_v9_0_edc_counter_regs[i]));
7113                                 if (reg_value)
7114                                         gfx_v9_0_ras_error_count(adev,
7115                                                 &gfx_v9_0_edc_counter_regs[i],
7116                                                 j, k, reg_value,
7117                                                 &sec_count, &ded_count);
7118                         }
7119                 }
7120         }
7121
7122         err_data->ce_count += sec_count;
7123         err_data->ue_count += ded_count;
7124
7125         amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
7126         mutex_unlock(&adev->grbm_idx_mutex);
7127
7128         gfx_v9_0_query_utc_edc_status(adev, err_data);
7129 }
7130
7131 static void gfx_v9_0_emit_mem_sync(struct amdgpu_ring *ring)
7132 {
7133         const unsigned int cp_coher_cntl =
7134                         PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_SH_ICACHE_ACTION_ENA(1) |
7135                         PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_SH_KCACHE_ACTION_ENA(1) |
7136                         PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TC_ACTION_ENA(1) |
7137                         PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TCL1_ACTION_ENA(1) |
7138                         PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TC_WB_ACTION_ENA(1);
7139
7140         /* ACQUIRE_MEM -make one or more surfaces valid for use by the subsequent operations */
7141         amdgpu_ring_write(ring, PACKET3(PACKET3_ACQUIRE_MEM, 5));
7142         amdgpu_ring_write(ring, cp_coher_cntl); /* CP_COHER_CNTL */
7143         amdgpu_ring_write(ring, 0xffffffff);  /* CP_COHER_SIZE */
7144         amdgpu_ring_write(ring, 0xffffff);  /* CP_COHER_SIZE_HI */
7145         amdgpu_ring_write(ring, 0); /* CP_COHER_BASE */
7146         amdgpu_ring_write(ring, 0);  /* CP_COHER_BASE_HI */
7147         amdgpu_ring_write(ring, 0x0000000A); /* POLL_INTERVAL */
7148 }
7149
7150 static void gfx_v9_0_emit_wave_limit_cs(struct amdgpu_ring *ring,
7151                                         uint32_t pipe, bool enable)
7152 {
7153         struct amdgpu_device *adev = ring->adev;
7154         uint32_t val;
7155         uint32_t wcl_cs_reg;
7156
7157         /* mmSPI_WCL_PIPE_PERCENT_CS[0-7]_DEFAULT values are same */
7158         val = enable ? 0x1 : mmSPI_WCL_PIPE_PERCENT_CS0_DEFAULT;
7159
7160         switch (pipe) {
7161         case 0:
7162                 wcl_cs_reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_CS0);
7163                 break;
7164         case 1:
7165                 wcl_cs_reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_CS1);
7166                 break;
7167         case 2:
7168                 wcl_cs_reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_CS2);
7169                 break;
7170         case 3:
7171                 wcl_cs_reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_CS3);
7172                 break;
7173         default:
7174                 DRM_DEBUG("invalid pipe %d\n", pipe);
7175                 return;
7176         }
7177
7178         amdgpu_ring_emit_wreg(ring, wcl_cs_reg, val);
7179
7180 }
7181 static void gfx_v9_0_emit_wave_limit(struct amdgpu_ring *ring, bool enable)
7182 {
7183         struct amdgpu_device *adev = ring->adev;
7184         uint32_t val;
7185         int i;
7186
7187
7188         /* mmSPI_WCL_PIPE_PERCENT_GFX is 7 bit multiplier register to limit
7189          * number of gfx waves. Setting 5 bit will make sure gfx only gets
7190          * around 25% of gpu resources.
7191          */
7192         val = enable ? 0x1f : mmSPI_WCL_PIPE_PERCENT_GFX_DEFAULT;
7193         amdgpu_ring_emit_wreg(ring,
7194                               SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_GFX),
7195                               val);
7196
7197         /* Restrict waves for normal/low priority compute queues as well
7198          * to get best QoS for high priority compute jobs.
7199          *
7200          * amdgpu controls only 1st ME(0-3 CS pipes).
7201          */
7202         for (i = 0; i < adev->gfx.mec.num_pipe_per_mec; i++) {
7203                 if (i != ring->pipe)
7204                         gfx_v9_0_emit_wave_limit_cs(ring, i, enable);
7205
7206         }
7207 }
7208
7209 static void gfx_v9_ring_insert_nop(struct amdgpu_ring *ring, uint32_t num_nop)
7210 {
7211         /* Header itself is a NOP packet */
7212         if (num_nop == 1) {
7213                 amdgpu_ring_write(ring, ring->funcs->nop);
7214                 return;
7215         }
7216
7217         /* Max HW optimization till 0x3ffe, followed by remaining one NOP at a time*/
7218         amdgpu_ring_write(ring, PACKET3(PACKET3_NOP, min(num_nop - 2, 0x3ffe)));
7219
7220         /* Header is at index 0, followed by num_nops - 1 NOP packet's */
7221         amdgpu_ring_insert_nop(ring, num_nop - 1);
7222 }
7223
7224 static int gfx_v9_0_reset_kgq(struct amdgpu_ring *ring, unsigned int vmid)
7225 {
7226         struct amdgpu_device *adev = ring->adev;
7227         struct amdgpu_kiq *kiq = &adev->gfx.kiq[0];
7228         struct amdgpu_ring *kiq_ring = &kiq->ring;
7229         unsigned long flags;
7230         u32 tmp;
7231         int r;
7232
7233         if (amdgpu_sriov_vf(adev))
7234                 return -EINVAL;
7235
7236         if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues)
7237                 return -EINVAL;
7238
7239         spin_lock_irqsave(&kiq->ring_lock, flags);
7240
7241         if (amdgpu_ring_alloc(kiq_ring, 5)) {
7242                 spin_unlock_irqrestore(&kiq->ring_lock, flags);
7243                 return -ENOMEM;
7244         }
7245
7246         tmp = REG_SET_FIELD(0, CP_VMID_RESET, RESET_REQUEST, 1 << vmid);
7247         gfx_v9_0_ring_emit_wreg(kiq_ring,
7248                                  SOC15_REG_OFFSET(GC, 0, mmCP_VMID_RESET), tmp);
7249         amdgpu_ring_commit(kiq_ring);
7250
7251         spin_unlock_irqrestore(&kiq->ring_lock, flags);
7252
7253         r = amdgpu_ring_test_ring(kiq_ring);
7254         if (r)
7255                 return r;
7256
7257         if (amdgpu_ring_alloc(ring, 7 + 7 + 5))
7258                 return -ENOMEM;
7259         gfx_v9_0_ring_emit_fence(ring, ring->fence_drv.gpu_addr,
7260                                  ring->fence_drv.sync_seq, AMDGPU_FENCE_FLAG_EXEC);
7261         gfx_v9_0_ring_emit_reg_wait(ring,
7262                                     SOC15_REG_OFFSET(GC, 0, mmCP_VMID_RESET), 0, 0xffff);
7263         gfx_v9_0_ring_emit_wreg(ring,
7264                                 SOC15_REG_OFFSET(GC, 0, mmCP_VMID_RESET), 0);
7265
7266         return amdgpu_ring_test_ring(ring);
7267 }
7268
7269 static int gfx_v9_0_reset_kcq(struct amdgpu_ring *ring,
7270                               unsigned int vmid)
7271 {
7272         struct amdgpu_device *adev = ring->adev;
7273         struct amdgpu_kiq *kiq = &adev->gfx.kiq[0];
7274         struct amdgpu_ring *kiq_ring = &kiq->ring;
7275         unsigned long flags;
7276         int i, r;
7277
7278         if (amdgpu_sriov_vf(adev))
7279                 return -EINVAL;
7280
7281         if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues)
7282                 return -EINVAL;
7283
7284         spin_lock_irqsave(&kiq->ring_lock, flags);
7285
7286         if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size)) {
7287                 spin_unlock_irqrestore(&kiq->ring_lock, flags);
7288                 return -ENOMEM;
7289         }
7290
7291         kiq->pmf->kiq_unmap_queues(kiq_ring, ring, RESET_QUEUES,
7292                                    0, 0);
7293         amdgpu_ring_commit(kiq_ring);
7294
7295         spin_unlock_irqrestore(&kiq->ring_lock, flags);
7296
7297         r = amdgpu_ring_test_ring(kiq_ring);
7298         if (r)
7299                 return r;
7300
7301         /* make sure dequeue is complete*/
7302         amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
7303         mutex_lock(&adev->srbm_mutex);
7304         soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0, 0);
7305         for (i = 0; i < adev->usec_timeout; i++) {
7306                 if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
7307                         break;
7308                 udelay(1);
7309         }
7310         if (i >= adev->usec_timeout)
7311                 r = -ETIMEDOUT;
7312         soc15_grbm_select(adev, 0, 0, 0, 0, 0);
7313         mutex_unlock(&adev->srbm_mutex);
7314         amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
7315         if (r) {
7316                 dev_err(adev->dev, "fail to wait on hqd deactive\n");
7317                 return r;
7318         }
7319
7320         r = amdgpu_bo_reserve(ring->mqd_obj, false);
7321         if (unlikely(r != 0)){
7322                 dev_err(adev->dev, "fail to resv mqd_obj\n");
7323                 return r;
7324         }
7325         r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
7326         if (!r) {
7327                 r = gfx_v9_0_kcq_init_queue(ring, true);
7328                 amdgpu_bo_kunmap(ring->mqd_obj);
7329                 ring->mqd_ptr = NULL;
7330         }
7331         amdgpu_bo_unreserve(ring->mqd_obj);
7332         if (r) {
7333                 dev_err(adev->dev, "fail to unresv mqd_obj\n");
7334                 return r;
7335         }
7336         spin_lock_irqsave(&kiq->ring_lock, flags);
7337         r = amdgpu_ring_alloc(kiq_ring, kiq->pmf->map_queues_size);
7338         if (r) {
7339                 spin_unlock_irqrestore(&kiq->ring_lock, flags);
7340                 return -ENOMEM;
7341         }
7342         kiq->pmf->kiq_map_queues(kiq_ring, ring);
7343         amdgpu_ring_commit(kiq_ring);
7344         spin_unlock_irqrestore(&kiq->ring_lock, flags);
7345         r = amdgpu_ring_test_ring(kiq_ring);
7346         if (r) {
7347                 DRM_ERROR("fail to remap queue\n");
7348                 return r;
7349         }
7350         return amdgpu_ring_test_ring(ring);
7351 }
7352
7353 static void gfx_v9_ip_print(struct amdgpu_ip_block *ip_block, struct drm_printer *p)
7354 {
7355         struct amdgpu_device *adev = ip_block->adev;
7356         uint32_t i, j, k, reg, index = 0;
7357         uint32_t reg_count = ARRAY_SIZE(gc_reg_list_9);
7358
7359         if (!adev->gfx.ip_dump_core)
7360                 return;
7361
7362         for (i = 0; i < reg_count; i++)
7363                 drm_printf(p, "%-50s \t 0x%08x\n",
7364                            gc_reg_list_9[i].reg_name,
7365                            adev->gfx.ip_dump_core[i]);
7366
7367         /* print compute queue registers for all instances */
7368         if (!adev->gfx.ip_dump_compute_queues)
7369                 return;
7370
7371         reg_count = ARRAY_SIZE(gc_cp_reg_list_9);
7372         drm_printf(p, "\nnum_mec: %d num_pipe: %d num_queue: %d\n",
7373                    adev->gfx.mec.num_mec,
7374                    adev->gfx.mec.num_pipe_per_mec,
7375                    adev->gfx.mec.num_queue_per_pipe);
7376
7377         for (i = 0; i < adev->gfx.mec.num_mec; i++) {
7378                 for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) {
7379                         for (k = 0; k < adev->gfx.mec.num_queue_per_pipe; k++) {
7380                                 drm_printf(p, "\nmec %d, pipe %d, queue %d\n", i, j, k);
7381                                 for (reg = 0; reg < reg_count; reg++) {
7382                                         drm_printf(p, "%-50s \t 0x%08x\n",
7383                                                    gc_cp_reg_list_9[reg].reg_name,
7384                                                    adev->gfx.ip_dump_compute_queues[index + reg]);
7385                                 }
7386                                 index += reg_count;
7387                         }
7388                 }
7389         }
7390
7391 }
7392
7393 static void gfx_v9_ip_dump(struct amdgpu_ip_block *ip_block)
7394 {
7395         struct amdgpu_device *adev = ip_block->adev;
7396         uint32_t i, j, k, reg, index = 0;
7397         uint32_t reg_count = ARRAY_SIZE(gc_reg_list_9);
7398
7399         if (!adev->gfx.ip_dump_core || !adev->gfx.num_gfx_rings)
7400                 return;
7401
7402         amdgpu_gfx_off_ctrl(adev, false);
7403         for (i = 0; i < reg_count; i++)
7404                 adev->gfx.ip_dump_core[i] = RREG32(SOC15_REG_ENTRY_OFFSET(gc_reg_list_9[i]));
7405         amdgpu_gfx_off_ctrl(adev, true);
7406
7407         /* dump compute queue registers for all instances */
7408         if (!adev->gfx.ip_dump_compute_queues)
7409                 return;
7410
7411         reg_count = ARRAY_SIZE(gc_cp_reg_list_9);
7412         amdgpu_gfx_off_ctrl(adev, false);
7413         mutex_lock(&adev->srbm_mutex);
7414         for (i = 0; i < adev->gfx.mec.num_mec; i++) {
7415                 for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) {
7416                         for (k = 0; k < adev->gfx.mec.num_queue_per_pipe; k++) {
7417                                 /* ME0 is for GFX so start from 1 for CP */
7418                                 soc15_grbm_select(adev, 1 + i, j, k, 0, 0);
7419
7420                                 for (reg = 0; reg < reg_count; reg++) {
7421                                         adev->gfx.ip_dump_compute_queues[index + reg] =
7422                                                 RREG32(SOC15_REG_ENTRY_OFFSET(
7423                                                         gc_cp_reg_list_9[reg]));
7424                                 }
7425                                 index += reg_count;
7426                         }
7427                 }
7428         }
7429         soc15_grbm_select(adev, 0, 0, 0, 0, 0);
7430         mutex_unlock(&adev->srbm_mutex);
7431         amdgpu_gfx_off_ctrl(adev, true);
7432
7433 }
7434
7435 static void gfx_v9_0_ring_emit_cleaner_shader(struct amdgpu_ring *ring)
7436 {
7437         /* Emit the cleaner shader */
7438         amdgpu_ring_write(ring, PACKET3(PACKET3_RUN_CLEANER_SHADER, 0));
7439         amdgpu_ring_write(ring, 0);  /* RESERVED field, programmed to zero */
7440 }
7441
7442 static const struct amd_ip_funcs gfx_v9_0_ip_funcs = {
7443         .name = "gfx_v9_0",
7444         .early_init = gfx_v9_0_early_init,
7445         .late_init = gfx_v9_0_late_init,
7446         .sw_init = gfx_v9_0_sw_init,
7447         .sw_fini = gfx_v9_0_sw_fini,
7448         .hw_init = gfx_v9_0_hw_init,
7449         .hw_fini = gfx_v9_0_hw_fini,
7450         .suspend = gfx_v9_0_suspend,
7451         .resume = gfx_v9_0_resume,
7452         .is_idle = gfx_v9_0_is_idle,
7453         .wait_for_idle = gfx_v9_0_wait_for_idle,
7454         .soft_reset = gfx_v9_0_soft_reset,
7455         .set_clockgating_state = gfx_v9_0_set_clockgating_state,
7456         .set_powergating_state = gfx_v9_0_set_powergating_state,
7457         .get_clockgating_state = gfx_v9_0_get_clockgating_state,
7458         .dump_ip_state = gfx_v9_ip_dump,
7459         .print_ip_state = gfx_v9_ip_print,
7460 };
7461
7462 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = {
7463         .type = AMDGPU_RING_TYPE_GFX,
7464         .align_mask = 0xff,
7465         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
7466         .support_64bit_ptrs = true,
7467         .secure_submission_supported = true,
7468         .get_rptr = gfx_v9_0_ring_get_rptr_gfx,
7469         .get_wptr = gfx_v9_0_ring_get_wptr_gfx,
7470         .set_wptr = gfx_v9_0_ring_set_wptr_gfx,
7471         .emit_frame_size = /* totally 242 maximum if 16 IBs */
7472                 5 +  /* COND_EXEC */
7473                 7 +  /* PIPELINE_SYNC */
7474                 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
7475                 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
7476                 2 + /* VM_FLUSH */
7477                 8 +  /* FENCE for VM_FLUSH */
7478                 20 + /* GDS switch */
7479                 4 + /* double SWITCH_BUFFER,
7480                        the first COND_EXEC jump to the place just
7481                            prior to this double SWITCH_BUFFER  */
7482                 5 + /* COND_EXEC */
7483                 7 +      /*     HDP_flush */
7484                 4 +      /*     VGT_flush */
7485                 14 + /* CE_META */
7486                 31 + /* DE_META */
7487                 3 + /* CNTX_CTRL */
7488                 5 + /* HDP_INVL */
7489                 8 + 8 + /* FENCE x2 */
7490                 2 + /* SWITCH_BUFFER */
7491                 7 + /* gfx_v9_0_emit_mem_sync */
7492                 2, /* gfx_v9_0_ring_emit_cleaner_shader */
7493         .emit_ib_size = 4, /* gfx_v9_0_ring_emit_ib_gfx */
7494         .emit_ib = gfx_v9_0_ring_emit_ib_gfx,
7495         .emit_fence = gfx_v9_0_ring_emit_fence,
7496         .emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
7497         .emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
7498         .emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
7499         .emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
7500         .test_ring = gfx_v9_0_ring_test_ring,
7501         .insert_nop = gfx_v9_ring_insert_nop,
7502         .pad_ib = amdgpu_ring_generic_pad_ib,
7503         .emit_switch_buffer = gfx_v9_ring_emit_sb,
7504         .emit_cntxcntl = gfx_v9_ring_emit_cntxcntl,
7505         .init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec,
7506         .preempt_ib = gfx_v9_0_ring_preempt_ib,
7507         .emit_frame_cntl = gfx_v9_0_ring_emit_frame_cntl,
7508         .emit_wreg = gfx_v9_0_ring_emit_wreg,
7509         .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
7510         .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
7511         .soft_recovery = gfx_v9_0_ring_soft_recovery,
7512         .emit_mem_sync = gfx_v9_0_emit_mem_sync,
7513         .reset = gfx_v9_0_reset_kgq,
7514         .emit_cleaner_shader = gfx_v9_0_ring_emit_cleaner_shader,
7515         .begin_use = amdgpu_gfx_enforce_isolation_ring_begin_use,
7516         .end_use = amdgpu_gfx_enforce_isolation_ring_end_use,
7517 };
7518
7519 static const struct amdgpu_ring_funcs gfx_v9_0_sw_ring_funcs_gfx = {
7520         .type = AMDGPU_RING_TYPE_GFX,
7521         .align_mask = 0xff,
7522         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
7523         .support_64bit_ptrs = true,
7524         .secure_submission_supported = true,
7525         .get_rptr = amdgpu_sw_ring_get_rptr_gfx,
7526         .get_wptr = amdgpu_sw_ring_get_wptr_gfx,
7527         .set_wptr = amdgpu_sw_ring_set_wptr_gfx,
7528         .emit_frame_size = /* totally 242 maximum if 16 IBs */
7529                 5 +  /* COND_EXEC */
7530                 7 +  /* PIPELINE_SYNC */
7531                 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
7532                 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
7533                 2 + /* VM_FLUSH */
7534                 8 +  /* FENCE for VM_FLUSH */
7535                 20 + /* GDS switch */
7536                 4 + /* double SWITCH_BUFFER,
7537                      * the first COND_EXEC jump to the place just
7538                      * prior to this double SWITCH_BUFFER
7539                      */
7540                 5 + /* COND_EXEC */
7541                 7 +      /*     HDP_flush */
7542                 4 +      /*     VGT_flush */
7543                 14 + /* CE_META */
7544                 31 + /* DE_META */
7545                 3 + /* CNTX_CTRL */
7546                 5 + /* HDP_INVL */
7547                 8 + 8 + /* FENCE x2 */
7548                 2 + /* SWITCH_BUFFER */
7549                 7 + /* gfx_v9_0_emit_mem_sync */
7550                 2, /* gfx_v9_0_ring_emit_cleaner_shader */
7551         .emit_ib_size = 4, /* gfx_v9_0_ring_emit_ib_gfx */
7552         .emit_ib = gfx_v9_0_ring_emit_ib_gfx,
7553         .emit_fence = gfx_v9_0_ring_emit_fence,
7554         .emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
7555         .emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
7556         .emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
7557         .emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
7558         .test_ring = gfx_v9_0_ring_test_ring,
7559         .test_ib = gfx_v9_0_ring_test_ib,
7560         .insert_nop = gfx_v9_ring_insert_nop,
7561         .pad_ib = amdgpu_ring_generic_pad_ib,
7562         .emit_switch_buffer = gfx_v9_ring_emit_sb,
7563         .emit_cntxcntl = gfx_v9_ring_emit_cntxcntl,
7564         .init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec,
7565         .emit_frame_cntl = gfx_v9_0_ring_emit_frame_cntl,
7566         .emit_wreg = gfx_v9_0_ring_emit_wreg,
7567         .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
7568         .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
7569         .soft_recovery = gfx_v9_0_ring_soft_recovery,
7570         .emit_mem_sync = gfx_v9_0_emit_mem_sync,
7571         .patch_cntl = gfx_v9_0_ring_patch_cntl,
7572         .patch_de = gfx_v9_0_ring_patch_de_meta,
7573         .patch_ce = gfx_v9_0_ring_patch_ce_meta,
7574         .emit_cleaner_shader = gfx_v9_0_ring_emit_cleaner_shader,
7575         .begin_use = amdgpu_gfx_enforce_isolation_ring_begin_use,
7576         .end_use = amdgpu_gfx_enforce_isolation_ring_end_use,
7577 };
7578
7579 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = {
7580         .type = AMDGPU_RING_TYPE_COMPUTE,
7581         .align_mask = 0xff,
7582         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
7583         .support_64bit_ptrs = true,
7584         .get_rptr = gfx_v9_0_ring_get_rptr_compute,
7585         .get_wptr = gfx_v9_0_ring_get_wptr_compute,
7586         .set_wptr = gfx_v9_0_ring_set_wptr_compute,
7587         .emit_frame_size =
7588                 20 + /* gfx_v9_0_ring_emit_gds_switch */
7589                 7 + /* gfx_v9_0_ring_emit_hdp_flush */
7590                 5 + /* hdp invalidate */
7591                 7 + /* gfx_v9_0_ring_emit_pipeline_sync */
7592                 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
7593                 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
7594                 8 + 8 + 8 + /* gfx_v9_0_ring_emit_fence x3 for user fence, vm fence */
7595                 7 + /* gfx_v9_0_emit_mem_sync */
7596                 5 + /* gfx_v9_0_emit_wave_limit for updating mmSPI_WCL_PIPE_PERCENT_GFX register */
7597                 15 + /* for updating 3 mmSPI_WCL_PIPE_PERCENT_CS registers */
7598                 2, /* gfx_v9_0_ring_emit_cleaner_shader */
7599         .emit_ib_size = 7, /* gfx_v9_0_ring_emit_ib_compute */
7600         .emit_ib = gfx_v9_0_ring_emit_ib_compute,
7601         .emit_fence = gfx_v9_0_ring_emit_fence,
7602         .emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
7603         .emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
7604         .emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
7605         .emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
7606         .test_ring = gfx_v9_0_ring_test_ring,
7607         .test_ib = gfx_v9_0_ring_test_ib,
7608         .insert_nop = gfx_v9_ring_insert_nop,
7609         .pad_ib = amdgpu_ring_generic_pad_ib,
7610         .emit_wreg = gfx_v9_0_ring_emit_wreg,
7611         .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
7612         .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
7613         .soft_recovery = gfx_v9_0_ring_soft_recovery,
7614         .emit_mem_sync = gfx_v9_0_emit_mem_sync,
7615         .emit_wave_limit = gfx_v9_0_emit_wave_limit,
7616         .reset = gfx_v9_0_reset_kcq,
7617         .emit_cleaner_shader = gfx_v9_0_ring_emit_cleaner_shader,
7618         .begin_use = amdgpu_gfx_enforce_isolation_ring_begin_use,
7619         .end_use = amdgpu_gfx_enforce_isolation_ring_end_use,
7620 };
7621
7622 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = {
7623         .type = AMDGPU_RING_TYPE_KIQ,
7624         .align_mask = 0xff,
7625         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
7626         .support_64bit_ptrs = true,
7627         .get_rptr = gfx_v9_0_ring_get_rptr_compute,
7628         .get_wptr = gfx_v9_0_ring_get_wptr_compute,
7629         .set_wptr = gfx_v9_0_ring_set_wptr_compute,
7630         .emit_frame_size =
7631                 20 + /* gfx_v9_0_ring_emit_gds_switch */
7632                 7 + /* gfx_v9_0_ring_emit_hdp_flush */
7633                 5 + /* hdp invalidate */
7634                 7 + /* gfx_v9_0_ring_emit_pipeline_sync */
7635                 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
7636                 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
7637                 8 + 8 + 8, /* gfx_v9_0_ring_emit_fence_kiq x3 for user fence, vm fence */
7638         .emit_ib_size = 7, /* gfx_v9_0_ring_emit_ib_compute */
7639         .emit_fence = gfx_v9_0_ring_emit_fence_kiq,
7640         .test_ring = gfx_v9_0_ring_test_ring,
7641         .insert_nop = amdgpu_ring_insert_nop,
7642         .pad_ib = amdgpu_ring_generic_pad_ib,
7643         .emit_rreg = gfx_v9_0_ring_emit_rreg,
7644         .emit_wreg = gfx_v9_0_ring_emit_wreg,
7645         .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
7646         .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
7647 };
7648
7649 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev)
7650 {
7651         int i;
7652
7653         adev->gfx.kiq[0].ring.funcs = &gfx_v9_0_ring_funcs_kiq;
7654
7655         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
7656                 adev->gfx.gfx_ring[i].funcs = &gfx_v9_0_ring_funcs_gfx;
7657
7658         if (adev->gfx.mcbp && adev->gfx.num_gfx_rings) {
7659                 for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++)
7660                         adev->gfx.sw_gfx_ring[i].funcs = &gfx_v9_0_sw_ring_funcs_gfx;
7661         }
7662
7663         for (i = 0; i < adev->gfx.num_compute_rings; i++)
7664                 adev->gfx.compute_ring[i].funcs = &gfx_v9_0_ring_funcs_compute;
7665 }
7666
7667 static const struct amdgpu_irq_src_funcs gfx_v9_0_eop_irq_funcs = {
7668         .set = gfx_v9_0_set_eop_interrupt_state,
7669         .process = gfx_v9_0_eop_irq,
7670 };
7671
7672 static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_reg_irq_funcs = {
7673         .set = gfx_v9_0_set_priv_reg_fault_state,
7674         .process = gfx_v9_0_priv_reg_irq,
7675 };
7676
7677 static const struct amdgpu_irq_src_funcs gfx_v9_0_bad_op_irq_funcs = {
7678         .set = gfx_v9_0_set_bad_op_fault_state,
7679         .process = gfx_v9_0_bad_op_irq,
7680 };
7681
7682 static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_inst_irq_funcs = {
7683         .set = gfx_v9_0_set_priv_inst_fault_state,
7684         .process = gfx_v9_0_priv_inst_irq,
7685 };
7686
7687 static const struct amdgpu_irq_src_funcs gfx_v9_0_cp_ecc_error_irq_funcs = {
7688         .set = gfx_v9_0_set_cp_ecc_error_state,
7689         .process = amdgpu_gfx_cp_ecc_error_irq,
7690 };
7691
7692
7693 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev)
7694 {
7695         adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
7696         adev->gfx.eop_irq.funcs = &gfx_v9_0_eop_irq_funcs;
7697
7698         adev->gfx.priv_reg_irq.num_types = 1;
7699         adev->gfx.priv_reg_irq.funcs = &gfx_v9_0_priv_reg_irq_funcs;
7700
7701         adev->gfx.bad_op_irq.num_types = 1;
7702         adev->gfx.bad_op_irq.funcs = &gfx_v9_0_bad_op_irq_funcs;
7703
7704         adev->gfx.priv_inst_irq.num_types = 1;
7705         adev->gfx.priv_inst_irq.funcs = &gfx_v9_0_priv_inst_irq_funcs;
7706
7707         adev->gfx.cp_ecc_error_irq.num_types = 2; /*C5 ECC error and C9 FUE error*/
7708         adev->gfx.cp_ecc_error_irq.funcs = &gfx_v9_0_cp_ecc_error_irq_funcs;
7709 }
7710
7711 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev)
7712 {
7713         switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
7714         case IP_VERSION(9, 0, 1):
7715         case IP_VERSION(9, 2, 1):
7716         case IP_VERSION(9, 4, 0):
7717         case IP_VERSION(9, 2, 2):
7718         case IP_VERSION(9, 1, 0):
7719         case IP_VERSION(9, 4, 1):
7720         case IP_VERSION(9, 3, 0):
7721         case IP_VERSION(9, 4, 2):
7722                 adev->gfx.rlc.funcs = &gfx_v9_0_rlc_funcs;
7723                 break;
7724         default:
7725                 break;
7726         }
7727 }
7728
7729 static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev)
7730 {
7731         /* init asci gds info */
7732         switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
7733         case IP_VERSION(9, 0, 1):
7734         case IP_VERSION(9, 2, 1):
7735         case IP_VERSION(9, 4, 0):
7736                 adev->gds.gds_size = 0x10000;
7737                 break;
7738         case IP_VERSION(9, 2, 2):
7739         case IP_VERSION(9, 1, 0):
7740         case IP_VERSION(9, 4, 1):
7741                 adev->gds.gds_size = 0x1000;
7742                 break;
7743         case IP_VERSION(9, 4, 2):
7744                 /* aldebaran removed all the GDS internal memory,
7745                  * only support GWS opcode in kernel, like barrier
7746                  * semaphore.etc */
7747                 adev->gds.gds_size = 0;
7748                 break;
7749         default:
7750                 adev->gds.gds_size = 0x10000;
7751                 break;
7752         }
7753
7754         switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
7755         case IP_VERSION(9, 0, 1):
7756         case IP_VERSION(9, 4, 0):
7757                 adev->gds.gds_compute_max_wave_id = 0x7ff;
7758                 break;
7759         case IP_VERSION(9, 2, 1):
7760                 adev->gds.gds_compute_max_wave_id = 0x27f;
7761                 break;
7762         case IP_VERSION(9, 2, 2):
7763         case IP_VERSION(9, 1, 0):
7764                 if (adev->apu_flags & AMD_APU_IS_RAVEN2)
7765                         adev->gds.gds_compute_max_wave_id = 0x77; /* raven2 */
7766                 else
7767                         adev->gds.gds_compute_max_wave_id = 0x15f; /* raven1 */
7768                 break;
7769         case IP_VERSION(9, 4, 1):
7770                 adev->gds.gds_compute_max_wave_id = 0xfff;
7771                 break;
7772         case IP_VERSION(9, 4, 2):
7773                 /* deprecated for Aldebaran, no usage at all */
7774                 adev->gds.gds_compute_max_wave_id = 0;
7775                 break;
7776         default:
7777                 /* this really depends on the chip */
7778                 adev->gds.gds_compute_max_wave_id = 0x7ff;
7779                 break;
7780         }
7781
7782         adev->gds.gws_size = 64;
7783         adev->gds.oa_size = 16;
7784 }
7785
7786 static void gfx_v9_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
7787                                                  u32 bitmap)
7788 {
7789         u32 data;
7790
7791         if (!bitmap)
7792                 return;
7793
7794         data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
7795         data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
7796
7797         WREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG, data);
7798 }
7799
7800 static u32 gfx_v9_0_get_cu_active_bitmap(struct amdgpu_device *adev)
7801 {
7802         u32 data, mask;
7803
7804         data = RREG32_SOC15(GC, 0, mmCC_GC_SHADER_ARRAY_CONFIG);
7805         data |= RREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG);
7806
7807         data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
7808         data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
7809
7810         mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh);
7811
7812         return (~data) & mask;
7813 }
7814
7815 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
7816                                  struct amdgpu_cu_info *cu_info)
7817 {
7818         int i, j, k, counter, active_cu_number = 0;
7819         u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
7820         unsigned disable_masks[4 * 4];
7821
7822         if (!adev || !cu_info)
7823                 return -EINVAL;
7824
7825         /*
7826          * 16 comes from bitmap array size 4*4, and it can cover all gfx9 ASICs
7827          */
7828         if (adev->gfx.config.max_shader_engines *
7829                 adev->gfx.config.max_sh_per_se > 16)
7830                 return -EINVAL;
7831
7832         amdgpu_gfx_parse_disable_cu(disable_masks,
7833                                     adev->gfx.config.max_shader_engines,
7834                                     adev->gfx.config.max_sh_per_se);
7835
7836         mutex_lock(&adev->grbm_idx_mutex);
7837         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
7838                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
7839                         mask = 1;
7840                         ao_bitmap = 0;
7841                         counter = 0;
7842                         amdgpu_gfx_select_se_sh(adev, i, j, 0xffffffff, 0);
7843                         gfx_v9_0_set_user_cu_inactive_bitmap(
7844                                 adev, disable_masks[i * adev->gfx.config.max_sh_per_se + j]);
7845                         bitmap = gfx_v9_0_get_cu_active_bitmap(adev);
7846
7847                         /*
7848                          * The bitmap(and ao_cu_bitmap) in cu_info structure is
7849                          * 4x4 size array, and it's usually suitable for Vega
7850                          * ASICs which has 4*2 SE/SH layout.
7851                          * But for Arcturus, SE/SH layout is changed to 8*1.
7852                          * To mostly reduce the impact, we make it compatible
7853                          * with current bitmap array as below:
7854                          *    SE4,SH0 --> bitmap[0][1]
7855                          *    SE5,SH0 --> bitmap[1][1]
7856                          *    SE6,SH0 --> bitmap[2][1]
7857                          *    SE7,SH0 --> bitmap[3][1]
7858                          */
7859                         cu_info->bitmap[0][i % 4][j + i / 4] = bitmap;
7860
7861                         for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
7862                                 if (bitmap & mask) {
7863                                         if (counter < adev->gfx.config.max_cu_per_sh)
7864                                                 ao_bitmap |= mask;
7865                                         counter ++;
7866                                 }
7867                                 mask <<= 1;
7868                         }
7869                         active_cu_number += counter;
7870                         if (i < 2 && j < 2)
7871                                 ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
7872                         cu_info->ao_cu_bitmap[i % 4][j + i / 4] = ao_bitmap;
7873                 }
7874         }
7875         amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
7876         mutex_unlock(&adev->grbm_idx_mutex);
7877
7878         cu_info->number = active_cu_number;
7879         cu_info->ao_cu_mask = ao_cu_mask;
7880         cu_info->simd_per_cu = NUM_SIMD_PER_CU;
7881
7882         return 0;
7883 }
7884
7885 const struct amdgpu_ip_block_version gfx_v9_0_ip_block =
7886 {
7887         .type = AMD_IP_BLOCK_TYPE_GFX,
7888         .major = 9,
7889         .minor = 0,
7890         .rev = 0,
7891         .funcs = &gfx_v9_0_ip_funcs,
7892 };
This page took 0.533874 seconds and 4 git commands to generate.