]> Git Repo - linux.git/blob - drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
Merge tag 'trace-v6.14-3' of git://git.kernel.org/pub/scm/linux/kernel/git/trace...
[linux.git] / drivers / gpu / drm / amd / amdgpu / gfx_v9_0.c
1 /*
2  * Copyright 2016 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23
24 #include <linux/delay.h>
25 #include <linux/kernel.h>
26 #include <linux/firmware.h>
27 #include <linux/module.h>
28 #include <linux/pci.h>
29
30 #include "amdgpu.h"
31 #include "amdgpu_gfx.h"
32 #include "soc15.h"
33 #include "soc15d.h"
34 #include "amdgpu_atomfirmware.h"
35 #include "amdgpu_pm.h"
36
37 #include "gc/gc_9_0_offset.h"
38 #include "gc/gc_9_0_sh_mask.h"
39
40 #include "vega10_enum.h"
41
42 #include "soc15_common.h"
43 #include "clearstate_gfx9.h"
44 #include "v9_structs.h"
45
46 #include "ivsrcid/gfx/irqsrcs_gfx_9_0.h"
47
48 #include "amdgpu_ras.h"
49
50 #include "amdgpu_ring_mux.h"
51 #include "gfx_v9_4.h"
52 #include "gfx_v9_0.h"
53 #include "gfx_v9_0_cleaner_shader.h"
54 #include "gfx_v9_4_2.h"
55
56 #include "asic_reg/pwr/pwr_10_0_offset.h"
57 #include "asic_reg/pwr/pwr_10_0_sh_mask.h"
58 #include "asic_reg/gc/gc_9_0_default.h"
59
60 #define GFX9_NUM_GFX_RINGS     1
61 #define GFX9_NUM_SW_GFX_RINGS  2
62 #define GFX9_MEC_HPD_SIZE 4096
63 #define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L
64 #define RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET 0x00000000L
65
66 #define mmGCEA_PROBE_MAP                        0x070c
67 #define mmGCEA_PROBE_MAP_BASE_IDX               0
68
69 MODULE_FIRMWARE("amdgpu/vega10_ce.bin");
70 MODULE_FIRMWARE("amdgpu/vega10_pfp.bin");
71 MODULE_FIRMWARE("amdgpu/vega10_me.bin");
72 MODULE_FIRMWARE("amdgpu/vega10_mec.bin");
73 MODULE_FIRMWARE("amdgpu/vega10_mec2.bin");
74 MODULE_FIRMWARE("amdgpu/vega10_rlc.bin");
75
76 MODULE_FIRMWARE("amdgpu/vega12_ce.bin");
77 MODULE_FIRMWARE("amdgpu/vega12_pfp.bin");
78 MODULE_FIRMWARE("amdgpu/vega12_me.bin");
79 MODULE_FIRMWARE("amdgpu/vega12_mec.bin");
80 MODULE_FIRMWARE("amdgpu/vega12_mec2.bin");
81 MODULE_FIRMWARE("amdgpu/vega12_rlc.bin");
82
83 MODULE_FIRMWARE("amdgpu/vega20_ce.bin");
84 MODULE_FIRMWARE("amdgpu/vega20_pfp.bin");
85 MODULE_FIRMWARE("amdgpu/vega20_me.bin");
86 MODULE_FIRMWARE("amdgpu/vega20_mec.bin");
87 MODULE_FIRMWARE("amdgpu/vega20_mec2.bin");
88 MODULE_FIRMWARE("amdgpu/vega20_rlc.bin");
89
90 MODULE_FIRMWARE("amdgpu/raven_ce.bin");
91 MODULE_FIRMWARE("amdgpu/raven_pfp.bin");
92 MODULE_FIRMWARE("amdgpu/raven_me.bin");
93 MODULE_FIRMWARE("amdgpu/raven_mec.bin");
94 MODULE_FIRMWARE("amdgpu/raven_mec2.bin");
95 MODULE_FIRMWARE("amdgpu/raven_rlc.bin");
96
97 MODULE_FIRMWARE("amdgpu/picasso_ce.bin");
98 MODULE_FIRMWARE("amdgpu/picasso_pfp.bin");
99 MODULE_FIRMWARE("amdgpu/picasso_me.bin");
100 MODULE_FIRMWARE("amdgpu/picasso_mec.bin");
101 MODULE_FIRMWARE("amdgpu/picasso_mec2.bin");
102 MODULE_FIRMWARE("amdgpu/picasso_rlc.bin");
103 MODULE_FIRMWARE("amdgpu/picasso_rlc_am4.bin");
104
105 MODULE_FIRMWARE("amdgpu/raven2_ce.bin");
106 MODULE_FIRMWARE("amdgpu/raven2_pfp.bin");
107 MODULE_FIRMWARE("amdgpu/raven2_me.bin");
108 MODULE_FIRMWARE("amdgpu/raven2_mec.bin");
109 MODULE_FIRMWARE("amdgpu/raven2_mec2.bin");
110 MODULE_FIRMWARE("amdgpu/raven2_rlc.bin");
111 MODULE_FIRMWARE("amdgpu/raven_kicker_rlc.bin");
112
113 MODULE_FIRMWARE("amdgpu/arcturus_mec.bin");
114 MODULE_FIRMWARE("amdgpu/arcturus_rlc.bin");
115
116 MODULE_FIRMWARE("amdgpu/renoir_ce.bin");
117 MODULE_FIRMWARE("amdgpu/renoir_pfp.bin");
118 MODULE_FIRMWARE("amdgpu/renoir_me.bin");
119 MODULE_FIRMWARE("amdgpu/renoir_mec.bin");
120 MODULE_FIRMWARE("amdgpu/renoir_rlc.bin");
121
122 MODULE_FIRMWARE("amdgpu/green_sardine_ce.bin");
123 MODULE_FIRMWARE("amdgpu/green_sardine_pfp.bin");
124 MODULE_FIRMWARE("amdgpu/green_sardine_me.bin");
125 MODULE_FIRMWARE("amdgpu/green_sardine_mec.bin");
126 MODULE_FIRMWARE("amdgpu/green_sardine_mec2.bin");
127 MODULE_FIRMWARE("amdgpu/green_sardine_rlc.bin");
128
129 MODULE_FIRMWARE("amdgpu/aldebaran_mec.bin");
130 MODULE_FIRMWARE("amdgpu/aldebaran_mec2.bin");
131 MODULE_FIRMWARE("amdgpu/aldebaran_rlc.bin");
132 MODULE_FIRMWARE("amdgpu/aldebaran_sjt_mec.bin");
133 MODULE_FIRMWARE("amdgpu/aldebaran_sjt_mec2.bin");
134
135 #define mmTCP_CHAN_STEER_0_ARCT                                                         0x0b03
136 #define mmTCP_CHAN_STEER_0_ARCT_BASE_IDX                                                        0
137 #define mmTCP_CHAN_STEER_1_ARCT                                                         0x0b04
138 #define mmTCP_CHAN_STEER_1_ARCT_BASE_IDX                                                        0
139 #define mmTCP_CHAN_STEER_2_ARCT                                                         0x0b09
140 #define mmTCP_CHAN_STEER_2_ARCT_BASE_IDX                                                        0
141 #define mmTCP_CHAN_STEER_3_ARCT                                                         0x0b0a
142 #define mmTCP_CHAN_STEER_3_ARCT_BASE_IDX                                                        0
143 #define mmTCP_CHAN_STEER_4_ARCT                                                         0x0b0b
144 #define mmTCP_CHAN_STEER_4_ARCT_BASE_IDX                                                        0
145 #define mmTCP_CHAN_STEER_5_ARCT                                                         0x0b0c
146 #define mmTCP_CHAN_STEER_5_ARCT_BASE_IDX                                                        0
147
148 #define mmGOLDEN_TSC_COUNT_UPPER_Renoir                0x0025
149 #define mmGOLDEN_TSC_COUNT_UPPER_Renoir_BASE_IDX       1
150 #define mmGOLDEN_TSC_COUNT_LOWER_Renoir                0x0026
151 #define mmGOLDEN_TSC_COUNT_LOWER_Renoir_BASE_IDX       1
152
153 static const struct amdgpu_hwip_reg_entry gc_reg_list_9[] = {
154         SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS),
155         SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS2),
156         SOC15_REG_ENTRY_STR(GC, 0, mmCP_STALLED_STAT1),
157         SOC15_REG_ENTRY_STR(GC, 0, mmCP_STALLED_STAT2),
158         SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPC_STALLED_STAT1),
159         SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPF_STALLED_STAT1),
160         SOC15_REG_ENTRY_STR(GC, 0, mmCP_BUSY_STAT),
161         SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPC_BUSY_STAT),
162         SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPF_BUSY_STAT),
163         SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPF_STATUS),
164         SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_ERROR),
165         SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB_BASE),
166         SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB_RPTR),
167         SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB_WPTR),
168         SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB0_BASE),
169         SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB0_RPTR),
170         SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB0_WPTR),
171         SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB1_BASE),
172         SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB1_RPTR),
173         SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB1_WPTR),
174         SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB2_BASE),
175         SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB2_WPTR),
176         SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB2_WPTR),
177         SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB1_CMD_BUFSZ),
178         SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB2_CMD_BUFSZ),
179         SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB1_CMD_BUFSZ),
180         SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB2_CMD_BUFSZ),
181         SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB1_BASE_LO),
182         SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB1_BASE_HI),
183         SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB1_BUFSZ),
184         SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB2_BASE_LO),
185         SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB2_BASE_HI),
186         SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB2_BUFSZ),
187         SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB1_BASE_LO),
188         SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB1_BASE_HI),
189         SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB1_BUFSZ),
190         SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB2_BASE_LO),
191         SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB2_BASE_HI),
192         SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB2_BUFSZ),
193         SOC15_REG_ENTRY_STR(GC, 0, mmCPF_UTCL1_STATUS),
194         SOC15_REG_ENTRY_STR(GC, 0, mmCPC_UTCL1_STATUS),
195         SOC15_REG_ENTRY_STR(GC, 0, mmCPG_UTCL1_STATUS),
196         SOC15_REG_ENTRY_STR(GC, 0, mmGDS_PROTECTION_FAULT),
197         SOC15_REG_ENTRY_STR(GC, 0, mmGDS_VM_PROTECTION_FAULT),
198         SOC15_REG_ENTRY_STR(GC, 0, mmIA_UTCL1_STATUS),
199         SOC15_REG_ENTRY_STR(GC, 0, mmIA_UTCL1_CNTL),
200         SOC15_REG_ENTRY_STR(GC, 0, mmPA_CL_CNTL_STATUS),
201         SOC15_REG_ENTRY_STR(GC, 0, mmRLC_UTCL1_STATUS),
202         SOC15_REG_ENTRY_STR(GC, 0, mmRMI_UTCL1_STATUS),
203         SOC15_REG_ENTRY_STR(GC, 0, mmSQC_DCACHE_UTCL1_STATUS),
204         SOC15_REG_ENTRY_STR(GC, 0, mmSQC_ICACHE_UTCL1_STATUS),
205         SOC15_REG_ENTRY_STR(GC, 0, mmSQ_UTCL1_STATUS),
206         SOC15_REG_ENTRY_STR(GC, 0, mmTCP_UTCL1_STATUS),
207         SOC15_REG_ENTRY_STR(GC, 0, mmWD_UTCL1_STATUS),
208         SOC15_REG_ENTRY_STR(GC, 0, mmVM_L2_PROTECTION_FAULT_CNTL),
209         SOC15_REG_ENTRY_STR(GC, 0, mmVM_L2_PROTECTION_FAULT_STATUS),
210         SOC15_REG_ENTRY_STR(GC, 0, mmCP_DEBUG),
211         SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_CNTL),
212         SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_INSTR_PNTR),
213         SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC1_INSTR_PNTR),
214         SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC2_INSTR_PNTR),
215         SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_INSTR_PNTR),
216         SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_INSTR_PNTR),
217         SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPC_STATUS),
218         SOC15_REG_ENTRY_STR(GC, 0, mmRLC_STAT),
219         SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SMU_COMMAND),
220         SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SMU_MESSAGE),
221         SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SMU_ARGUMENT_1),
222         SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SMU_ARGUMENT_2),
223         SOC15_REG_ENTRY_STR(GC, 0, mmSMU_RLC_RESPONSE),
224         SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SAFE_MODE),
225         SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SMU_SAFE_MODE),
226         SOC15_REG_ENTRY_STR(GC, 0, mmRLC_INT_STAT),
227         SOC15_REG_ENTRY_STR(GC, 0, mmRLC_GPM_GENERAL_6),
228         /* cp header registers */
229         SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_HEADER_DUMP),
230         SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME1_HEADER_DUMP),
231         SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME2_HEADER_DUMP),
232         SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_HEADER_DUMP),
233         SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_HEADER_DUMP),
234         /* SE status registers */
235         SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS_SE0),
236         SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS_SE1),
237         SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS_SE2),
238         SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS_SE3)
239 };
240
241 static const struct amdgpu_hwip_reg_entry gc_cp_reg_list_9[] = {
242         /* compute queue registers */
243         SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_VMID),
244         SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_ACTIVE),
245         SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PERSISTENT_STATE),
246         SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PIPE_PRIORITY),
247         SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_QUEUE_PRIORITY),
248         SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_QUANTUM),
249         SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_BASE),
250         SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_BASE_HI),
251         SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_RPTR),
252         SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR),
253         SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI),
254         SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL),
255         SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_CONTROL),
256         SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_IB_BASE_ADDR),
257         SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_IB_BASE_ADDR_HI),
258         SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_IB_RPTR),
259         SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_IB_CONTROL),
260         SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_DEQUEUE_REQUEST),
261         SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_BASE_ADDR),
262         SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI),
263         SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_CONTROL),
264         SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_RPTR),
265         SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_WPTR),
266         SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_EVENTS),
267         SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_CTX_SAVE_BASE_ADDR_LO),
268         SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_CTX_SAVE_BASE_ADDR_HI),
269         SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_CTX_SAVE_CONTROL),
270         SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_CNTL_STACK_OFFSET),
271         SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_CNTL_STACK_SIZE),
272         SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_WG_STATE_OFFSET),
273         SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_CTX_SAVE_SIZE),
274         SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_GDS_RESOURCE_STATE),
275         SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_ERROR),
276         SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_WPTR_MEM),
277         SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_WPTR_LO),
278         SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_WPTR_HI),
279         SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_GFX_STATUS),
280 };
281
282 enum ta_ras_gfx_subblock {
283         /*CPC*/
284         TA_RAS_BLOCK__GFX_CPC_INDEX_START = 0,
285         TA_RAS_BLOCK__GFX_CPC_SCRATCH = TA_RAS_BLOCK__GFX_CPC_INDEX_START,
286         TA_RAS_BLOCK__GFX_CPC_UCODE,
287         TA_RAS_BLOCK__GFX_DC_STATE_ME1,
288         TA_RAS_BLOCK__GFX_DC_CSINVOC_ME1,
289         TA_RAS_BLOCK__GFX_DC_RESTORE_ME1,
290         TA_RAS_BLOCK__GFX_DC_STATE_ME2,
291         TA_RAS_BLOCK__GFX_DC_CSINVOC_ME2,
292         TA_RAS_BLOCK__GFX_DC_RESTORE_ME2,
293         TA_RAS_BLOCK__GFX_CPC_INDEX_END = TA_RAS_BLOCK__GFX_DC_RESTORE_ME2,
294         /* CPF*/
295         TA_RAS_BLOCK__GFX_CPF_INDEX_START,
296         TA_RAS_BLOCK__GFX_CPF_ROQ_ME2 = TA_RAS_BLOCK__GFX_CPF_INDEX_START,
297         TA_RAS_BLOCK__GFX_CPF_ROQ_ME1,
298         TA_RAS_BLOCK__GFX_CPF_TAG,
299         TA_RAS_BLOCK__GFX_CPF_INDEX_END = TA_RAS_BLOCK__GFX_CPF_TAG,
300         /* CPG*/
301         TA_RAS_BLOCK__GFX_CPG_INDEX_START,
302         TA_RAS_BLOCK__GFX_CPG_DMA_ROQ = TA_RAS_BLOCK__GFX_CPG_INDEX_START,
303         TA_RAS_BLOCK__GFX_CPG_DMA_TAG,
304         TA_RAS_BLOCK__GFX_CPG_TAG,
305         TA_RAS_BLOCK__GFX_CPG_INDEX_END = TA_RAS_BLOCK__GFX_CPG_TAG,
306         /* GDS*/
307         TA_RAS_BLOCK__GFX_GDS_INDEX_START,
308         TA_RAS_BLOCK__GFX_GDS_MEM = TA_RAS_BLOCK__GFX_GDS_INDEX_START,
309         TA_RAS_BLOCK__GFX_GDS_INPUT_QUEUE,
310         TA_RAS_BLOCK__GFX_GDS_OA_PHY_CMD_RAM_MEM,
311         TA_RAS_BLOCK__GFX_GDS_OA_PHY_DATA_RAM_MEM,
312         TA_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM,
313         TA_RAS_BLOCK__GFX_GDS_INDEX_END = TA_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM,
314         /* SPI*/
315         TA_RAS_BLOCK__GFX_SPI_SR_MEM,
316         /* SQ*/
317         TA_RAS_BLOCK__GFX_SQ_INDEX_START,
318         TA_RAS_BLOCK__GFX_SQ_SGPR = TA_RAS_BLOCK__GFX_SQ_INDEX_START,
319         TA_RAS_BLOCK__GFX_SQ_LDS_D,
320         TA_RAS_BLOCK__GFX_SQ_LDS_I,
321         TA_RAS_BLOCK__GFX_SQ_VGPR, /* VGPR = SP*/
322         TA_RAS_BLOCK__GFX_SQ_INDEX_END = TA_RAS_BLOCK__GFX_SQ_VGPR,
323         /* SQC (3 ranges)*/
324         TA_RAS_BLOCK__GFX_SQC_INDEX_START,
325         /* SQC range 0*/
326         TA_RAS_BLOCK__GFX_SQC_INDEX0_START = TA_RAS_BLOCK__GFX_SQC_INDEX_START,
327         TA_RAS_BLOCK__GFX_SQC_INST_UTCL1_LFIFO =
328                 TA_RAS_BLOCK__GFX_SQC_INDEX0_START,
329         TA_RAS_BLOCK__GFX_SQC_DATA_CU0_WRITE_DATA_BUF,
330         TA_RAS_BLOCK__GFX_SQC_DATA_CU0_UTCL1_LFIFO,
331         TA_RAS_BLOCK__GFX_SQC_DATA_CU1_WRITE_DATA_BUF,
332         TA_RAS_BLOCK__GFX_SQC_DATA_CU1_UTCL1_LFIFO,
333         TA_RAS_BLOCK__GFX_SQC_DATA_CU2_WRITE_DATA_BUF,
334         TA_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO,
335         TA_RAS_BLOCK__GFX_SQC_INDEX0_END =
336                 TA_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO,
337         /* SQC range 1*/
338         TA_RAS_BLOCK__GFX_SQC_INDEX1_START,
339         TA_RAS_BLOCK__GFX_SQC_INST_BANKA_TAG_RAM =
340                 TA_RAS_BLOCK__GFX_SQC_INDEX1_START,
341         TA_RAS_BLOCK__GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO,
342         TA_RAS_BLOCK__GFX_SQC_INST_BANKA_MISS_FIFO,
343         TA_RAS_BLOCK__GFX_SQC_INST_BANKA_BANK_RAM,
344         TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_TAG_RAM,
345         TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_HIT_FIFO,
346         TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_MISS_FIFO,
347         TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM,
348         TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM,
349         TA_RAS_BLOCK__GFX_SQC_INDEX1_END =
350                 TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM,
351         /* SQC range 2*/
352         TA_RAS_BLOCK__GFX_SQC_INDEX2_START,
353         TA_RAS_BLOCK__GFX_SQC_INST_BANKB_TAG_RAM =
354                 TA_RAS_BLOCK__GFX_SQC_INDEX2_START,
355         TA_RAS_BLOCK__GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO,
356         TA_RAS_BLOCK__GFX_SQC_INST_BANKB_MISS_FIFO,
357         TA_RAS_BLOCK__GFX_SQC_INST_BANKB_BANK_RAM,
358         TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_TAG_RAM,
359         TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_HIT_FIFO,
360         TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_MISS_FIFO,
361         TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM,
362         TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM,
363         TA_RAS_BLOCK__GFX_SQC_INDEX2_END =
364                 TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM,
365         TA_RAS_BLOCK__GFX_SQC_INDEX_END = TA_RAS_BLOCK__GFX_SQC_INDEX2_END,
366         /* TA*/
367         TA_RAS_BLOCK__GFX_TA_INDEX_START,
368         TA_RAS_BLOCK__GFX_TA_FS_DFIFO = TA_RAS_BLOCK__GFX_TA_INDEX_START,
369         TA_RAS_BLOCK__GFX_TA_FS_AFIFO,
370         TA_RAS_BLOCK__GFX_TA_FL_LFIFO,
371         TA_RAS_BLOCK__GFX_TA_FX_LFIFO,
372         TA_RAS_BLOCK__GFX_TA_FS_CFIFO,
373         TA_RAS_BLOCK__GFX_TA_INDEX_END = TA_RAS_BLOCK__GFX_TA_FS_CFIFO,
374         /* TCA*/
375         TA_RAS_BLOCK__GFX_TCA_INDEX_START,
376         TA_RAS_BLOCK__GFX_TCA_HOLE_FIFO = TA_RAS_BLOCK__GFX_TCA_INDEX_START,
377         TA_RAS_BLOCK__GFX_TCA_REQ_FIFO,
378         TA_RAS_BLOCK__GFX_TCA_INDEX_END = TA_RAS_BLOCK__GFX_TCA_REQ_FIFO,
379         /* TCC (5 sub-ranges)*/
380         TA_RAS_BLOCK__GFX_TCC_INDEX_START,
381         /* TCC range 0*/
382         TA_RAS_BLOCK__GFX_TCC_INDEX0_START = TA_RAS_BLOCK__GFX_TCC_INDEX_START,
383         TA_RAS_BLOCK__GFX_TCC_CACHE_DATA = TA_RAS_BLOCK__GFX_TCC_INDEX0_START,
384         TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_0_1,
385         TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_0,
386         TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_1,
387         TA_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_0,
388         TA_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_1,
389         TA_RAS_BLOCK__GFX_TCC_HIGH_RATE_TAG,
390         TA_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG,
391         TA_RAS_BLOCK__GFX_TCC_INDEX0_END = TA_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG,
392         /* TCC range 1*/
393         TA_RAS_BLOCK__GFX_TCC_INDEX1_START,
394         TA_RAS_BLOCK__GFX_TCC_IN_USE_DEC = TA_RAS_BLOCK__GFX_TCC_INDEX1_START,
395         TA_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER,
396         TA_RAS_BLOCK__GFX_TCC_INDEX1_END =
397                 TA_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER,
398         /* TCC range 2*/
399         TA_RAS_BLOCK__GFX_TCC_INDEX2_START,
400         TA_RAS_BLOCK__GFX_TCC_RETURN_DATA = TA_RAS_BLOCK__GFX_TCC_INDEX2_START,
401         TA_RAS_BLOCK__GFX_TCC_RETURN_CONTROL,
402         TA_RAS_BLOCK__GFX_TCC_UC_ATOMIC_FIFO,
403         TA_RAS_BLOCK__GFX_TCC_WRITE_RETURN,
404         TA_RAS_BLOCK__GFX_TCC_WRITE_CACHE_READ,
405         TA_RAS_BLOCK__GFX_TCC_SRC_FIFO,
406         TA_RAS_BLOCK__GFX_TCC_SRC_FIFO_NEXT_RAM,
407         TA_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO,
408         TA_RAS_BLOCK__GFX_TCC_INDEX2_END =
409                 TA_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO,
410         /* TCC range 3*/
411         TA_RAS_BLOCK__GFX_TCC_INDEX3_START,
412         TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO = TA_RAS_BLOCK__GFX_TCC_INDEX3_START,
413         TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM,
414         TA_RAS_BLOCK__GFX_TCC_INDEX3_END =
415                 TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM,
416         /* TCC range 4*/
417         TA_RAS_BLOCK__GFX_TCC_INDEX4_START,
418         TA_RAS_BLOCK__GFX_TCC_WRRET_TAG_WRITE_RETURN =
419                 TA_RAS_BLOCK__GFX_TCC_INDEX4_START,
420         TA_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER,
421         TA_RAS_BLOCK__GFX_TCC_INDEX4_END =
422                 TA_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER,
423         TA_RAS_BLOCK__GFX_TCC_INDEX_END = TA_RAS_BLOCK__GFX_TCC_INDEX4_END,
424         /* TCI*/
425         TA_RAS_BLOCK__GFX_TCI_WRITE_RAM,
426         /* TCP*/
427         TA_RAS_BLOCK__GFX_TCP_INDEX_START,
428         TA_RAS_BLOCK__GFX_TCP_CACHE_RAM = TA_RAS_BLOCK__GFX_TCP_INDEX_START,
429         TA_RAS_BLOCK__GFX_TCP_LFIFO_RAM,
430         TA_RAS_BLOCK__GFX_TCP_CMD_FIFO,
431         TA_RAS_BLOCK__GFX_TCP_VM_FIFO,
432         TA_RAS_BLOCK__GFX_TCP_DB_RAM,
433         TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO0,
434         TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1,
435         TA_RAS_BLOCK__GFX_TCP_INDEX_END = TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1,
436         /* TD*/
437         TA_RAS_BLOCK__GFX_TD_INDEX_START,
438         TA_RAS_BLOCK__GFX_TD_SS_FIFO_LO = TA_RAS_BLOCK__GFX_TD_INDEX_START,
439         TA_RAS_BLOCK__GFX_TD_SS_FIFO_HI,
440         TA_RAS_BLOCK__GFX_TD_CS_FIFO,
441         TA_RAS_BLOCK__GFX_TD_INDEX_END = TA_RAS_BLOCK__GFX_TD_CS_FIFO,
442         /* EA (3 sub-ranges)*/
443         TA_RAS_BLOCK__GFX_EA_INDEX_START,
444         /* EA range 0*/
445         TA_RAS_BLOCK__GFX_EA_INDEX0_START = TA_RAS_BLOCK__GFX_EA_INDEX_START,
446         TA_RAS_BLOCK__GFX_EA_DRAMRD_CMDMEM = TA_RAS_BLOCK__GFX_EA_INDEX0_START,
447         TA_RAS_BLOCK__GFX_EA_DRAMWR_CMDMEM,
448         TA_RAS_BLOCK__GFX_EA_DRAMWR_DATAMEM,
449         TA_RAS_BLOCK__GFX_EA_RRET_TAGMEM,
450         TA_RAS_BLOCK__GFX_EA_WRET_TAGMEM,
451         TA_RAS_BLOCK__GFX_EA_GMIRD_CMDMEM,
452         TA_RAS_BLOCK__GFX_EA_GMIWR_CMDMEM,
453         TA_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM,
454         TA_RAS_BLOCK__GFX_EA_INDEX0_END = TA_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM,
455         /* EA range 1*/
456         TA_RAS_BLOCK__GFX_EA_INDEX1_START,
457         TA_RAS_BLOCK__GFX_EA_DRAMRD_PAGEMEM = TA_RAS_BLOCK__GFX_EA_INDEX1_START,
458         TA_RAS_BLOCK__GFX_EA_DRAMWR_PAGEMEM,
459         TA_RAS_BLOCK__GFX_EA_IORD_CMDMEM,
460         TA_RAS_BLOCK__GFX_EA_IOWR_CMDMEM,
461         TA_RAS_BLOCK__GFX_EA_IOWR_DATAMEM,
462         TA_RAS_BLOCK__GFX_EA_GMIRD_PAGEMEM,
463         TA_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM,
464         TA_RAS_BLOCK__GFX_EA_INDEX1_END = TA_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM,
465         /* EA range 2*/
466         TA_RAS_BLOCK__GFX_EA_INDEX2_START,
467         TA_RAS_BLOCK__GFX_EA_MAM_D0MEM = TA_RAS_BLOCK__GFX_EA_INDEX2_START,
468         TA_RAS_BLOCK__GFX_EA_MAM_D1MEM,
469         TA_RAS_BLOCK__GFX_EA_MAM_D2MEM,
470         TA_RAS_BLOCK__GFX_EA_MAM_D3MEM,
471         TA_RAS_BLOCK__GFX_EA_INDEX2_END = TA_RAS_BLOCK__GFX_EA_MAM_D3MEM,
472         TA_RAS_BLOCK__GFX_EA_INDEX_END = TA_RAS_BLOCK__GFX_EA_INDEX2_END,
473         /* UTC VM L2 bank*/
474         TA_RAS_BLOCK__UTC_VML2_BANK_CACHE,
475         /* UTC VM walker*/
476         TA_RAS_BLOCK__UTC_VML2_WALKER,
477         /* UTC ATC L2 2MB cache*/
478         TA_RAS_BLOCK__UTC_ATCL2_CACHE_2M_BANK,
479         /* UTC ATC L2 4KB cache*/
480         TA_RAS_BLOCK__UTC_ATCL2_CACHE_4K_BANK,
481         TA_RAS_BLOCK__GFX_MAX
482 };
483
484 struct ras_gfx_subblock {
485         unsigned char *name;
486         int ta_subblock;
487         int hw_supported_error_type;
488         int sw_supported_error_type;
489 };
490
491 #define AMDGPU_RAS_SUB_BLOCK(subblock, a, b, c, d, e, f, g, h)                             \
492         [AMDGPU_RAS_BLOCK__##subblock] = {                                     \
493                 #subblock,                                                     \
494                 TA_RAS_BLOCK__##subblock,                                      \
495                 ((a) | ((b) << 1) | ((c) << 2) | ((d) << 3)),                  \
496                 (((e) << 1) | ((f) << 3) | (g) | ((h) << 2)),                  \
497         }
498
499 static const struct ras_gfx_subblock ras_gfx_subblocks[] = {
500         AMDGPU_RAS_SUB_BLOCK(GFX_CPC_SCRATCH, 0, 1, 1, 1, 1, 0, 0, 1),
501         AMDGPU_RAS_SUB_BLOCK(GFX_CPC_UCODE, 0, 1, 1, 1, 1, 0, 0, 1),
502         AMDGPU_RAS_SUB_BLOCK(GFX_DC_STATE_ME1, 1, 0, 0, 1, 0, 0, 1, 0),
503         AMDGPU_RAS_SUB_BLOCK(GFX_DC_CSINVOC_ME1, 1, 0, 0, 1, 0, 0, 0, 0),
504         AMDGPU_RAS_SUB_BLOCK(GFX_DC_RESTORE_ME1, 1, 0, 0, 1, 0, 0, 0, 0),
505         AMDGPU_RAS_SUB_BLOCK(GFX_DC_STATE_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
506         AMDGPU_RAS_SUB_BLOCK(GFX_DC_CSINVOC_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
507         AMDGPU_RAS_SUB_BLOCK(GFX_DC_RESTORE_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
508         AMDGPU_RAS_SUB_BLOCK(GFX_CPF_ROQ_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
509         AMDGPU_RAS_SUB_BLOCK(GFX_CPF_ROQ_ME1, 1, 0, 0, 1, 0, 0, 1, 0),
510         AMDGPU_RAS_SUB_BLOCK(GFX_CPF_TAG, 0, 1, 1, 1, 1, 0, 0, 1),
511         AMDGPU_RAS_SUB_BLOCK(GFX_CPG_DMA_ROQ, 1, 0, 0, 1, 0, 0, 1, 0),
512         AMDGPU_RAS_SUB_BLOCK(GFX_CPG_DMA_TAG, 0, 1, 1, 1, 0, 1, 0, 1),
513         AMDGPU_RAS_SUB_BLOCK(GFX_CPG_TAG, 0, 1, 1, 1, 1, 1, 0, 1),
514         AMDGPU_RAS_SUB_BLOCK(GFX_GDS_MEM, 0, 1, 1, 1, 0, 0, 0, 0),
515         AMDGPU_RAS_SUB_BLOCK(GFX_GDS_INPUT_QUEUE, 1, 0, 0, 1, 0, 0, 0, 0),
516         AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PHY_CMD_RAM_MEM, 0, 1, 1, 1, 0, 0, 0,
517                              0),
518         AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PHY_DATA_RAM_MEM, 1, 0, 0, 1, 0, 0, 0,
519                              0),
520         AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PIPE_MEM, 0, 1, 1, 1, 0, 0, 0, 0),
521         AMDGPU_RAS_SUB_BLOCK(GFX_SPI_SR_MEM, 1, 0, 0, 1, 0, 0, 0, 0),
522         AMDGPU_RAS_SUB_BLOCK(GFX_SQ_SGPR, 0, 1, 1, 1, 0, 0, 0, 0),
523         AMDGPU_RAS_SUB_BLOCK(GFX_SQ_LDS_D, 0, 1, 1, 1, 1, 0, 0, 1),
524         AMDGPU_RAS_SUB_BLOCK(GFX_SQ_LDS_I, 0, 1, 1, 1, 0, 0, 0, 0),
525         AMDGPU_RAS_SUB_BLOCK(GFX_SQ_VGPR, 0, 1, 1, 1, 0, 0, 0, 0),
526         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0, 1),
527         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU0_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0,
528                              0, 0),
529         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU0_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0,
530                              0),
531         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU1_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0,
532                              0, 0),
533         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU1_UTCL1_LFIFO, 0, 1, 1, 1, 1, 0, 0,
534                              0),
535         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU2_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0,
536                              0, 0),
537         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU2_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0,
538                              0),
539         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_TAG_RAM, 0, 1, 1, 1, 1, 0, 0,
540                              1),
541         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO, 1, 0, 0, 1, 0,
542                              0, 0, 0),
543         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
544                              0),
545         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
546                              0),
547         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_TAG_RAM, 0, 1, 1, 1, 0, 0, 0,
548                              0),
549         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_HIT_FIFO, 1, 0, 0, 1, 0, 0, 0,
550                              0),
551         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
552                              0),
553         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM, 1, 0, 0, 1, 0, 0,
554                              0, 0),
555         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
556                              0),
557         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_TAG_RAM, 0, 1, 1, 1, 1, 0, 0,
558                              0),
559         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO, 1, 0, 0, 1, 0,
560                              0, 0, 0),
561         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
562                              0),
563         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
564                              0),
565         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_TAG_RAM, 0, 1, 1, 1, 0, 0, 0,
566                              0),
567         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_HIT_FIFO, 1, 0, 0, 1, 0, 0, 0,
568                              0),
569         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
570                              0),
571         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM, 1, 0, 0, 1, 0, 0,
572                              0, 0),
573         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
574                              0),
575         AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_DFIFO, 0, 1, 1, 1, 1, 0, 0, 1),
576         AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_AFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
577         AMDGPU_RAS_SUB_BLOCK(GFX_TA_FL_LFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
578         AMDGPU_RAS_SUB_BLOCK(GFX_TA_FX_LFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
579         AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_CFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
580         AMDGPU_RAS_SUB_BLOCK(GFX_TCA_HOLE_FIFO, 1, 0, 0, 1, 0, 1, 1, 0),
581         AMDGPU_RAS_SUB_BLOCK(GFX_TCA_REQ_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
582         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA, 0, 1, 1, 1, 1, 0, 0, 1),
583         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_0_1, 0, 1, 1, 1, 1, 0, 0,
584                              1),
585         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_1_0, 0, 1, 1, 1, 1, 0, 0,
586                              1),
587         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_1_1, 0, 1, 1, 1, 1, 0, 0,
588                              1),
589         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DIRTY_BANK_0, 0, 1, 1, 1, 0, 0, 0,
590                              0),
591         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DIRTY_BANK_1, 0, 1, 1, 1, 0, 0, 0,
592                              0),
593         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_HIGH_RATE_TAG, 0, 1, 1, 1, 0, 0, 0, 0),
594         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LOW_RATE_TAG, 0, 1, 1, 1, 0, 0, 0, 0),
595         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_IN_USE_DEC, 1, 0, 0, 1, 0, 0, 0, 0),
596         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_IN_USE_TRANSFER, 1, 0, 0, 1, 0, 0, 0, 0),
597         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_RETURN_DATA, 1, 0, 0, 1, 0, 0, 0, 0),
598         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_RETURN_CONTROL, 1, 0, 0, 1, 0, 0, 0, 0),
599         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_UC_ATOMIC_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
600         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRITE_RETURN, 1, 0, 0, 1, 0, 1, 1, 0),
601         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRITE_CACHE_READ, 1, 0, 0, 1, 0, 0, 0, 0),
602         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_SRC_FIFO, 0, 1, 1, 1, 0, 0, 0, 0),
603         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_SRC_FIFO_NEXT_RAM, 1, 0, 0, 1, 0, 0, 1, 0),
604         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_TAG_PROBE_FIFO, 1, 0, 0, 1, 0, 0, 0,
605                              0),
606         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LATENCY_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
607         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LATENCY_FIFO_NEXT_RAM, 1, 0, 0, 1, 0, 0, 0,
608                              0),
609         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRRET_TAG_WRITE_RETURN, 1, 0, 0, 1, 0, 0,
610                              0, 0),
611         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_ATOMIC_RETURN_BUFFER, 1, 0, 0, 1, 0, 0, 0,
612                              0),
613         AMDGPU_RAS_SUB_BLOCK(GFX_TCI_WRITE_RAM, 1, 0, 0, 1, 0, 0, 0, 0),
614         AMDGPU_RAS_SUB_BLOCK(GFX_TCP_CACHE_RAM, 0, 1, 1, 1, 1, 0, 0, 1),
615         AMDGPU_RAS_SUB_BLOCK(GFX_TCP_LFIFO_RAM, 0, 1, 1, 1, 0, 0, 0, 0),
616         AMDGPU_RAS_SUB_BLOCK(GFX_TCP_CMD_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
617         AMDGPU_RAS_SUB_BLOCK(GFX_TCP_VM_FIFO, 0, 1, 1, 1, 0, 0, 0, 0),
618         AMDGPU_RAS_SUB_BLOCK(GFX_TCP_DB_RAM, 1, 0, 0, 1, 0, 0, 0, 0),
619         AMDGPU_RAS_SUB_BLOCK(GFX_TCP_UTCL1_LFIFO0, 0, 1, 1, 1, 0, 0, 0, 0),
620         AMDGPU_RAS_SUB_BLOCK(GFX_TCP_UTCL1_LFIFO1, 0, 1, 1, 1, 0, 0, 0, 0),
621         AMDGPU_RAS_SUB_BLOCK(GFX_TD_SS_FIFO_LO, 0, 1, 1, 1, 1, 0, 0, 1),
622         AMDGPU_RAS_SUB_BLOCK(GFX_TD_SS_FIFO_HI, 0, 1, 1, 1, 0, 0, 0, 0),
623         AMDGPU_RAS_SUB_BLOCK(GFX_TD_CS_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
624         AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMRD_CMDMEM, 0, 1, 1, 1, 1, 0, 0, 1),
625         AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0),
626         AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_DATAMEM, 0, 1, 1, 1, 0, 0, 0, 0),
627         AMDGPU_RAS_SUB_BLOCK(GFX_EA_RRET_TAGMEM, 0, 1, 1, 1, 0, 0, 0, 0),
628         AMDGPU_RAS_SUB_BLOCK(GFX_EA_WRET_TAGMEM, 0, 1, 1, 1, 0, 0, 0, 0),
629         AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIRD_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0),
630         AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0),
631         AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_DATAMEM, 0, 1, 1, 1, 0, 0, 0, 0),
632         AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMRD_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
633         AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
634         AMDGPU_RAS_SUB_BLOCK(GFX_EA_IORD_CMDMEM, 1, 0, 0, 1, 0, 0, 0, 0),
635         AMDGPU_RAS_SUB_BLOCK(GFX_EA_IOWR_CMDMEM, 1, 0, 0, 1, 0, 0, 0, 0),
636         AMDGPU_RAS_SUB_BLOCK(GFX_EA_IOWR_DATAMEM, 1, 0, 0, 1, 0, 0, 0, 0),
637         AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIRD_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
638         AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
639         AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D0MEM, 1, 0, 0, 1, 0, 0, 0, 0),
640         AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D1MEM, 1, 0, 0, 1, 0, 0, 0, 0),
641         AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D2MEM, 1, 0, 0, 1, 0, 0, 0, 0),
642         AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D3MEM, 1, 0, 0, 1, 0, 0, 0, 0),
643         AMDGPU_RAS_SUB_BLOCK(UTC_VML2_BANK_CACHE, 0, 1, 1, 1, 0, 0, 0, 0),
644         AMDGPU_RAS_SUB_BLOCK(UTC_VML2_WALKER, 0, 1, 1, 1, 0, 0, 0, 0),
645         AMDGPU_RAS_SUB_BLOCK(UTC_ATCL2_CACHE_2M_BANK, 1, 0, 0, 1, 0, 0, 0, 0),
646         AMDGPU_RAS_SUB_BLOCK(UTC_ATCL2_CACHE_4K_BANK, 0, 1, 1, 1, 0, 0, 0, 0),
647 };
648
649 static const struct soc15_reg_golden golden_settings_gc_9_0[] =
650 {
651         SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000400),
652         SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0x80000000, 0x80000000),
653         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
654         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
655         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
656         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
657         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000),
658         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800),
659         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800),
660         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x00ffff87),
661         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x00ffff8f),
662         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000),
663         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
664         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68),
665         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197),
666         SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
667         SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff),
668         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800),
669         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800),
670         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000)
671 };
672
673 static const struct soc15_reg_golden golden_settings_gc_9_0_vg10[] =
674 {
675         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0x0000f000, 0x00012107),
676         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
677         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080),
678         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080),
679         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080),
680         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042),
681         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x2a114042),
682         SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080),
683         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0x00008000, 0x00048000),
684         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080),
685         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080),
686         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080),
687         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080),
688         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080),
689         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000),
690         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x01000107),
691         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x00001800, 0x00000800),
692         SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080)
693 };
694
695 static const struct soc15_reg_golden golden_settings_gc_9_0_vg20[] =
696 {
697         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x0f000080, 0x04000080),
698         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000),
699         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
700         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xf3e777ff, 0x22014042),
701         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xf3e777ff, 0x22014042),
702         SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0x00003e00, 0x00000400),
703         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xff840000, 0x04040000),
704         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00030000),
705         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff010f, 0x01000107),
706         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0x000b0000, 0x000b0000),
707         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01000000, 0x01000000)
708 };
709
710 static const struct soc15_reg_golden golden_settings_gc_9_1[] =
711 {
712         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
713         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080),
714         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080),
715         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080),
716         SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420),
717         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
718         SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080),
719         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
720         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
721         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
722         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080),
723         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080),
724         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080),
725         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080),
726         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080),
727         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
728         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
729         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003120),
730         SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
731         SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000000ff),
732         SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080),
733         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800),
734         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800),
735         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000)
736 };
737
738 static const struct soc15_reg_golden golden_settings_gc_9_1_rv1[] =
739 {
740         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
741         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24000042),
742         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24000042),
743         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04048000),
744         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_MODE_CNTL_1, 0x06000000, 0x06000000),
745         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000),
746         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x00000800)
747 };
748
749 static const struct soc15_reg_golden golden_settings_gc_9_1_rv2[] =
750 {
751         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0xff7fffff, 0x04000000),
752         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
753         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0xff7fffff, 0x0a000000),
754         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x7f0fffff, 0x08000080),
755         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0xff8fffff, 0x08000080),
756         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x7f8fffff, 0x08000080),
757         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x26013041),
758         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x26013041),
759         SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x3f8fffff, 0x08000080),
760         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
761         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0xff0fffff, 0x08000080),
762         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0xff0fffff, 0x08000080),
763         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0xff0fffff, 0x08000080),
764         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0xff0fffff, 0x08000080),
765         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0xff0fffff, 0x08000080),
766         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
767         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010),
768         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000),
769         SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x3f8fffff, 0x08000080),
770 };
771
772 static const struct soc15_reg_golden golden_settings_gc_9_1_rn[] =
773 {
774         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
775         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0xff7fffff, 0x0a000000),
776         SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000400),
777         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xf3e777ff, 0x24000042),
778         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xf3e777ff, 0x24000042),
779         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
780         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
781         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
782         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
783         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
784         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003120),
785         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCEA_PROBE_MAP, 0xffffffff, 0x0000cccc),
786 };
787
788 static const struct soc15_reg_golden golden_settings_gc_9_x_common[] =
789 {
790         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_SD_CNTL, 0xffffffff, 0x000001ff),
791         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_INDEX, 0xffffffff, 0x00000000),
792         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_DATA, 0xffffffff, 0x2544c382)
793 };
794
795 static const struct soc15_reg_golden golden_settings_gc_9_2_1[] =
796 {
797         SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420),
798         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
799         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
800         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
801         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
802         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000),
803         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800),
804         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800),
805         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x0000ff87),
806         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x0000ff8f),
807         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000),
808         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
809         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68),
810         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197),
811         SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
812         SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff)
813 };
814
815 static const struct soc15_reg_golden golden_settings_gc_9_2_1_vg12[] =
816 {
817         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x00000080, 0x04000080),
818         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
819         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000),
820         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24104041),
821         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24104041),
822         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
823         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff03ff, 0x01000107),
824         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
825         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x76325410),
826         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000),
827         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800),
828         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800),
829         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000)
830 };
831
832 static const struct soc15_reg_golden golden_settings_gc_9_4_1_arct[] =
833 {
834         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042),
835         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x10b0000),
836         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_0_ARCT, 0x3fffffff, 0x346f0a4e),
837         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_1_ARCT, 0x3fffffff, 0x1c642ca),
838         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_2_ARCT, 0x3fffffff, 0x26f45098),
839         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_3_ARCT, 0x3fffffff, 0x2ebd9fe3),
840         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_4_ARCT, 0x3fffffff, 0xb90f5b1),
841         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_5_ARCT, 0x3ff, 0x135),
842         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_CONFIG, 0xffffffff, 0x011A0000),
843         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_FIFO_SIZES, 0xffffffff, 0x00000f00),
844         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_UTCL1_CNTL1, 0x30000000, 0x30000000)
845 };
846
847 static const struct soc15_reg_rlcg rlcg_access_gc_9_0[] = {
848         {SOC15_REG_ENTRY(GC, 0, mmGRBM_GFX_INDEX)},
849         {SOC15_REG_ENTRY(GC, 0, mmSQ_IND_INDEX)},
850 };
851
852 static const u32 GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[] =
853 {
854         mmRLC_SRM_INDEX_CNTL_ADDR_0 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
855         mmRLC_SRM_INDEX_CNTL_ADDR_1 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
856         mmRLC_SRM_INDEX_CNTL_ADDR_2 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
857         mmRLC_SRM_INDEX_CNTL_ADDR_3 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
858         mmRLC_SRM_INDEX_CNTL_ADDR_4 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
859         mmRLC_SRM_INDEX_CNTL_ADDR_5 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
860         mmRLC_SRM_INDEX_CNTL_ADDR_6 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
861         mmRLC_SRM_INDEX_CNTL_ADDR_7 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
862 };
863
864 static const u32 GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[] =
865 {
866         mmRLC_SRM_INDEX_CNTL_DATA_0 - mmRLC_SRM_INDEX_CNTL_DATA_0,
867         mmRLC_SRM_INDEX_CNTL_DATA_1 - mmRLC_SRM_INDEX_CNTL_DATA_0,
868         mmRLC_SRM_INDEX_CNTL_DATA_2 - mmRLC_SRM_INDEX_CNTL_DATA_0,
869         mmRLC_SRM_INDEX_CNTL_DATA_3 - mmRLC_SRM_INDEX_CNTL_DATA_0,
870         mmRLC_SRM_INDEX_CNTL_DATA_4 - mmRLC_SRM_INDEX_CNTL_DATA_0,
871         mmRLC_SRM_INDEX_CNTL_DATA_5 - mmRLC_SRM_INDEX_CNTL_DATA_0,
872         mmRLC_SRM_INDEX_CNTL_DATA_6 - mmRLC_SRM_INDEX_CNTL_DATA_0,
873         mmRLC_SRM_INDEX_CNTL_DATA_7 - mmRLC_SRM_INDEX_CNTL_DATA_0,
874 };
875
876 #define VEGA10_GB_ADDR_CONFIG_GOLDEN 0x2a114042
877 #define VEGA12_GB_ADDR_CONFIG_GOLDEN 0x24104041
878 #define RAVEN_GB_ADDR_CONFIG_GOLDEN 0x24000042
879 #define RAVEN2_GB_ADDR_CONFIG_GOLDEN 0x26013041
880
881 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev);
882 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev);
883 static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev);
884 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev);
885 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
886                                 struct amdgpu_cu_info *cu_info);
887 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev);
888 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume, bool usegds);
889 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring);
890 static void gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev,
891                                           void *ras_error_status);
892 static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev,
893                                      void *inject_if, uint32_t instance_mask);
894 static void gfx_v9_0_reset_ras_error_count(struct amdgpu_device *adev);
895 static void gfx_v9_0_update_spm_vmid_internal(struct amdgpu_device *adev,
896                                               unsigned int vmid);
897 static void gfx_v9_0_set_safe_mode(struct amdgpu_device *adev, int xcc_id);
898 static void gfx_v9_0_unset_safe_mode(struct amdgpu_device *adev, int xcc_id);
899
900 static void gfx_v9_0_kiq_set_resources(struct amdgpu_ring *kiq_ring,
901                                 uint64_t queue_mask)
902 {
903         struct amdgpu_device *adev = kiq_ring->adev;
904         u64 shader_mc_addr;
905
906         /* Cleaner shader MC address */
907         shader_mc_addr = adev->gfx.cleaner_shader_gpu_addr >> 8;
908
909         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
910         amdgpu_ring_write(kiq_ring,
911                 PACKET3_SET_RESOURCES_VMID_MASK(0) |
912                 /* vmid_mask:0* queue_type:0 (KIQ) */
913                 PACKET3_SET_RESOURCES_QUEUE_TYPE(0));
914         amdgpu_ring_write(kiq_ring,
915                         lower_32_bits(queue_mask));     /* queue mask lo */
916         amdgpu_ring_write(kiq_ring,
917                         upper_32_bits(queue_mask));     /* queue mask hi */
918         amdgpu_ring_write(kiq_ring, lower_32_bits(shader_mc_addr)); /* cleaner shader addr lo */
919         amdgpu_ring_write(kiq_ring, upper_32_bits(shader_mc_addr)); /* cleaner shader addr hi */
920         amdgpu_ring_write(kiq_ring, 0); /* oac mask */
921         amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */
922 }
923
924 static void gfx_v9_0_kiq_map_queues(struct amdgpu_ring *kiq_ring,
925                                  struct amdgpu_ring *ring)
926 {
927         uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
928         uint64_t wptr_addr = ring->wptr_gpu_addr;
929         uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
930
931         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
932         /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
933         amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
934                          PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */
935                          PACKET3_MAP_QUEUES_VMID(0) | /* VMID */
936                          PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
937                          PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
938                          PACKET3_MAP_QUEUES_ME((ring->me == 1 ? 0 : 1)) |
939                          /*queue_type: normal compute queue */
940                          PACKET3_MAP_QUEUES_QUEUE_TYPE(0) |
941                          /* alloc format: all_on_one_pipe */
942                          PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) |
943                          PACKET3_MAP_QUEUES_ENGINE_SEL(eng_sel) |
944                          /* num_queues: must be 1 */
945                          PACKET3_MAP_QUEUES_NUM_QUEUES(1));
946         amdgpu_ring_write(kiq_ring,
947                         PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index));
948         amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
949         amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
950         amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
951         amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
952 }
953
954 static void gfx_v9_0_kiq_unmap_queues(struct amdgpu_ring *kiq_ring,
955                                    struct amdgpu_ring *ring,
956                                    enum amdgpu_unmap_queues_action action,
957                                    u64 gpu_addr, u64 seq)
958 {
959         uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
960
961         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
962         amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
963                           PACKET3_UNMAP_QUEUES_ACTION(action) |
964                           PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) |
965                           PACKET3_UNMAP_QUEUES_ENGINE_SEL(eng_sel) |
966                           PACKET3_UNMAP_QUEUES_NUM_QUEUES(1));
967         amdgpu_ring_write(kiq_ring,
968                         PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
969
970         if (action == PREEMPT_QUEUES_NO_UNMAP) {
971                 amdgpu_ring_write(kiq_ring, lower_32_bits(ring->wptr & ring->buf_mask));
972                 amdgpu_ring_write(kiq_ring, 0);
973                 amdgpu_ring_write(kiq_ring, 0);
974
975         } else {
976                 amdgpu_ring_write(kiq_ring, 0);
977                 amdgpu_ring_write(kiq_ring, 0);
978                 amdgpu_ring_write(kiq_ring, 0);
979         }
980 }
981
982 static void gfx_v9_0_kiq_query_status(struct amdgpu_ring *kiq_ring,
983                                    struct amdgpu_ring *ring,
984                                    u64 addr,
985                                    u64 seq)
986 {
987         uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
988
989         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_QUERY_STATUS, 5));
990         amdgpu_ring_write(kiq_ring,
991                           PACKET3_QUERY_STATUS_CONTEXT_ID(0) |
992                           PACKET3_QUERY_STATUS_INTERRUPT_SEL(0) |
993                           PACKET3_QUERY_STATUS_COMMAND(2));
994         /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
995         amdgpu_ring_write(kiq_ring,
996                         PACKET3_QUERY_STATUS_DOORBELL_OFFSET(ring->doorbell_index) |
997                         PACKET3_QUERY_STATUS_ENG_SEL(eng_sel));
998         amdgpu_ring_write(kiq_ring, lower_32_bits(addr));
999         amdgpu_ring_write(kiq_ring, upper_32_bits(addr));
1000         amdgpu_ring_write(kiq_ring, lower_32_bits(seq));
1001         amdgpu_ring_write(kiq_ring, upper_32_bits(seq));
1002 }
1003
1004 static void gfx_v9_0_kiq_invalidate_tlbs(struct amdgpu_ring *kiq_ring,
1005                                 uint16_t pasid, uint32_t flush_type,
1006                                 bool all_hub)
1007 {
1008         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0));
1009         amdgpu_ring_write(kiq_ring,
1010                         PACKET3_INVALIDATE_TLBS_DST_SEL(1) |
1011                         PACKET3_INVALIDATE_TLBS_ALL_HUB(all_hub) |
1012                         PACKET3_INVALIDATE_TLBS_PASID(pasid) |
1013                         PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type));
1014 }
1015
1016
1017 static void gfx_v9_0_kiq_reset_hw_queue(struct amdgpu_ring *kiq_ring, uint32_t queue_type,
1018                                         uint32_t me_id, uint32_t pipe_id, uint32_t queue_id,
1019                                         uint32_t xcc_id, uint32_t vmid)
1020 {
1021         struct amdgpu_device *adev = kiq_ring->adev;
1022         unsigned i;
1023
1024         /* enter save mode */
1025         amdgpu_gfx_rlc_enter_safe_mode(adev, xcc_id);
1026         mutex_lock(&adev->srbm_mutex);
1027         soc15_grbm_select(adev, me_id, pipe_id, queue_id, 0, 0);
1028
1029         if (queue_type == AMDGPU_RING_TYPE_COMPUTE) {
1030                 WREG32_SOC15(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 0x2);
1031                 WREG32_SOC15(GC, 0, mmSPI_COMPUTE_QUEUE_RESET, 0x1);
1032                 /* wait till dequeue take effects */
1033                 for (i = 0; i < adev->usec_timeout; i++) {
1034                         if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
1035                                 break;
1036                         udelay(1);
1037                 }
1038                 if (i >= adev->usec_timeout)
1039                         dev_err(adev->dev, "fail to wait on hqd deactive\n");
1040         } else {
1041                 dev_err(adev->dev, "reset queue_type(%d) not supported\n", queue_type);
1042         }
1043
1044         soc15_grbm_select(adev, 0, 0, 0, 0, 0);
1045         mutex_unlock(&adev->srbm_mutex);
1046         /* exit safe mode */
1047         amdgpu_gfx_rlc_exit_safe_mode(adev, xcc_id);
1048 }
1049
1050 static const struct kiq_pm4_funcs gfx_v9_0_kiq_pm4_funcs = {
1051         .kiq_set_resources = gfx_v9_0_kiq_set_resources,
1052         .kiq_map_queues = gfx_v9_0_kiq_map_queues,
1053         .kiq_unmap_queues = gfx_v9_0_kiq_unmap_queues,
1054         .kiq_query_status = gfx_v9_0_kiq_query_status,
1055         .kiq_invalidate_tlbs = gfx_v9_0_kiq_invalidate_tlbs,
1056         .kiq_reset_hw_queue = gfx_v9_0_kiq_reset_hw_queue,
1057         .set_resources_size = 8,
1058         .map_queues_size = 7,
1059         .unmap_queues_size = 6,
1060         .query_status_size = 7,
1061         .invalidate_tlbs_size = 2,
1062 };
1063
1064 static void gfx_v9_0_set_kiq_pm4_funcs(struct amdgpu_device *adev)
1065 {
1066         adev->gfx.kiq[0].pmf = &gfx_v9_0_kiq_pm4_funcs;
1067 }
1068
1069 static void gfx_v9_0_init_golden_registers(struct amdgpu_device *adev)
1070 {
1071         switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
1072         case IP_VERSION(9, 0, 1):
1073                 soc15_program_register_sequence(adev,
1074                                                 golden_settings_gc_9_0,
1075                                                 ARRAY_SIZE(golden_settings_gc_9_0));
1076                 soc15_program_register_sequence(adev,
1077                                                 golden_settings_gc_9_0_vg10,
1078                                                 ARRAY_SIZE(golden_settings_gc_9_0_vg10));
1079                 break;
1080         case IP_VERSION(9, 2, 1):
1081                 soc15_program_register_sequence(adev,
1082                                                 golden_settings_gc_9_2_1,
1083                                                 ARRAY_SIZE(golden_settings_gc_9_2_1));
1084                 soc15_program_register_sequence(adev,
1085                                                 golden_settings_gc_9_2_1_vg12,
1086                                                 ARRAY_SIZE(golden_settings_gc_9_2_1_vg12));
1087                 break;
1088         case IP_VERSION(9, 4, 0):
1089                 soc15_program_register_sequence(adev,
1090                                                 golden_settings_gc_9_0,
1091                                                 ARRAY_SIZE(golden_settings_gc_9_0));
1092                 soc15_program_register_sequence(adev,
1093                                                 golden_settings_gc_9_0_vg20,
1094                                                 ARRAY_SIZE(golden_settings_gc_9_0_vg20));
1095                 break;
1096         case IP_VERSION(9, 4, 1):
1097                 soc15_program_register_sequence(adev,
1098                                                 golden_settings_gc_9_4_1_arct,
1099                                                 ARRAY_SIZE(golden_settings_gc_9_4_1_arct));
1100                 break;
1101         case IP_VERSION(9, 2, 2):
1102         case IP_VERSION(9, 1, 0):
1103                 soc15_program_register_sequence(adev, golden_settings_gc_9_1,
1104                                                 ARRAY_SIZE(golden_settings_gc_9_1));
1105                 if (adev->apu_flags & AMD_APU_IS_RAVEN2)
1106                         soc15_program_register_sequence(adev,
1107                                                         golden_settings_gc_9_1_rv2,
1108                                                         ARRAY_SIZE(golden_settings_gc_9_1_rv2));
1109                 else
1110                         soc15_program_register_sequence(adev,
1111                                                         golden_settings_gc_9_1_rv1,
1112                                                         ARRAY_SIZE(golden_settings_gc_9_1_rv1));
1113                 break;
1114          case IP_VERSION(9, 3, 0):
1115                 soc15_program_register_sequence(adev,
1116                                                 golden_settings_gc_9_1_rn,
1117                                                 ARRAY_SIZE(golden_settings_gc_9_1_rn));
1118                 return; /* for renoir, don't need common goldensetting */
1119         case IP_VERSION(9, 4, 2):
1120                 gfx_v9_4_2_init_golden_registers(adev,
1121                                                  adev->smuio.funcs->get_die_id(adev));
1122                 break;
1123         default:
1124                 break;
1125         }
1126
1127         if ((amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 4, 1)) &&
1128             (amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 4, 2)))
1129                 soc15_program_register_sequence(adev, golden_settings_gc_9_x_common,
1130                                                 (const u32)ARRAY_SIZE(golden_settings_gc_9_x_common));
1131 }
1132
1133 static void gfx_v9_0_write_data_to_reg(struct amdgpu_ring *ring, int eng_sel,
1134                                        bool wc, uint32_t reg, uint32_t val)
1135 {
1136         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
1137         amdgpu_ring_write(ring, WRITE_DATA_ENGINE_SEL(eng_sel) |
1138                                 WRITE_DATA_DST_SEL(0) |
1139                                 (wc ? WR_CONFIRM : 0));
1140         amdgpu_ring_write(ring, reg);
1141         amdgpu_ring_write(ring, 0);
1142         amdgpu_ring_write(ring, val);
1143 }
1144
1145 static void gfx_v9_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel,
1146                                   int mem_space, int opt, uint32_t addr0,
1147                                   uint32_t addr1, uint32_t ref, uint32_t mask,
1148                                   uint32_t inv)
1149 {
1150         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
1151         amdgpu_ring_write(ring,
1152                                  /* memory (1) or register (0) */
1153                                  (WAIT_REG_MEM_MEM_SPACE(mem_space) |
1154                                  WAIT_REG_MEM_OPERATION(opt) | /* wait */
1155                                  WAIT_REG_MEM_FUNCTION(3) |  /* equal */
1156                                  WAIT_REG_MEM_ENGINE(eng_sel)));
1157
1158         if (mem_space)
1159                 BUG_ON(addr0 & 0x3); /* Dword align */
1160         amdgpu_ring_write(ring, addr0);
1161         amdgpu_ring_write(ring, addr1);
1162         amdgpu_ring_write(ring, ref);
1163         amdgpu_ring_write(ring, mask);
1164         amdgpu_ring_write(ring, inv); /* poll interval */
1165 }
1166
1167 static int gfx_v9_0_ring_test_ring(struct amdgpu_ring *ring)
1168 {
1169         struct amdgpu_device *adev = ring->adev;
1170         uint32_t scratch = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0);
1171         uint32_t tmp = 0;
1172         unsigned i;
1173         int r;
1174
1175         WREG32(scratch, 0xCAFEDEAD);
1176         r = amdgpu_ring_alloc(ring, 3);
1177         if (r)
1178                 return r;
1179
1180         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
1181         amdgpu_ring_write(ring, scratch - PACKET3_SET_UCONFIG_REG_START);
1182         amdgpu_ring_write(ring, 0xDEADBEEF);
1183         amdgpu_ring_commit(ring);
1184
1185         for (i = 0; i < adev->usec_timeout; i++) {
1186                 tmp = RREG32(scratch);
1187                 if (tmp == 0xDEADBEEF)
1188                         break;
1189                 udelay(1);
1190         }
1191
1192         if (i >= adev->usec_timeout)
1193                 r = -ETIMEDOUT;
1194         return r;
1195 }
1196
1197 static int gfx_v9_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
1198 {
1199         struct amdgpu_device *adev = ring->adev;
1200         struct amdgpu_ib ib;
1201         struct dma_fence *f = NULL;
1202
1203         unsigned index;
1204         uint64_t gpu_addr;
1205         uint32_t tmp;
1206         long r;
1207
1208         r = amdgpu_device_wb_get(adev, &index);
1209         if (r)
1210                 return r;
1211
1212         gpu_addr = adev->wb.gpu_addr + (index * 4);
1213         adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
1214         memset(&ib, 0, sizeof(ib));
1215
1216         r = amdgpu_ib_get(adev, NULL, 20, AMDGPU_IB_POOL_DIRECT, &ib);
1217         if (r)
1218                 goto err1;
1219
1220         ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3);
1221         ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
1222         ib.ptr[2] = lower_32_bits(gpu_addr);
1223         ib.ptr[3] = upper_32_bits(gpu_addr);
1224         ib.ptr[4] = 0xDEADBEEF;
1225         ib.length_dw = 5;
1226
1227         r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
1228         if (r)
1229                 goto err2;
1230
1231         r = dma_fence_wait_timeout(f, false, timeout);
1232         if (r == 0) {
1233                 r = -ETIMEDOUT;
1234                 goto err2;
1235         } else if (r < 0) {
1236                 goto err2;
1237         }
1238
1239         tmp = adev->wb.wb[index];
1240         if (tmp == 0xDEADBEEF)
1241                 r = 0;
1242         else
1243                 r = -EINVAL;
1244
1245 err2:
1246         amdgpu_ib_free(&ib, NULL);
1247         dma_fence_put(f);
1248 err1:
1249         amdgpu_device_wb_free(adev, index);
1250         return r;
1251 }
1252
1253
1254 static void gfx_v9_0_free_microcode(struct amdgpu_device *adev)
1255 {
1256         amdgpu_ucode_release(&adev->gfx.pfp_fw);
1257         amdgpu_ucode_release(&adev->gfx.me_fw);
1258         amdgpu_ucode_release(&adev->gfx.ce_fw);
1259         amdgpu_ucode_release(&adev->gfx.rlc_fw);
1260         amdgpu_ucode_release(&adev->gfx.mec_fw);
1261         amdgpu_ucode_release(&adev->gfx.mec2_fw);
1262
1263         kfree(adev->gfx.rlc.register_list_format);
1264 }
1265
1266 static void gfx_v9_0_check_fw_write_wait(struct amdgpu_device *adev)
1267 {
1268         adev->gfx.me_fw_write_wait = false;
1269         adev->gfx.mec_fw_write_wait = false;
1270
1271         if ((amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 4, 1)) &&
1272             ((adev->gfx.mec_fw_version < 0x000001a5) ||
1273              (adev->gfx.mec_feature_version < 46) ||
1274              (adev->gfx.pfp_fw_version < 0x000000b7) ||
1275              (adev->gfx.pfp_feature_version < 46)))
1276                 DRM_WARN_ONCE("CP firmware version too old, please update!");
1277
1278         switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
1279         case IP_VERSION(9, 0, 1):
1280                 if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1281                     (adev->gfx.me_feature_version >= 42) &&
1282                     (adev->gfx.pfp_fw_version >=  0x000000b1) &&
1283                     (adev->gfx.pfp_feature_version >= 42))
1284                         adev->gfx.me_fw_write_wait = true;
1285
1286                 if ((adev->gfx.mec_fw_version >=  0x00000193) &&
1287                     (adev->gfx.mec_feature_version >= 42))
1288                         adev->gfx.mec_fw_write_wait = true;
1289                 break;
1290         case IP_VERSION(9, 2, 1):
1291                 if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1292                     (adev->gfx.me_feature_version >= 44) &&
1293                     (adev->gfx.pfp_fw_version >=  0x000000b2) &&
1294                     (adev->gfx.pfp_feature_version >= 44))
1295                         adev->gfx.me_fw_write_wait = true;
1296
1297                 if ((adev->gfx.mec_fw_version >=  0x00000196) &&
1298                     (adev->gfx.mec_feature_version >= 44))
1299                         adev->gfx.mec_fw_write_wait = true;
1300                 break;
1301         case IP_VERSION(9, 4, 0):
1302                 if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1303                     (adev->gfx.me_feature_version >= 44) &&
1304                     (adev->gfx.pfp_fw_version >=  0x000000b2) &&
1305                     (adev->gfx.pfp_feature_version >= 44))
1306                         adev->gfx.me_fw_write_wait = true;
1307
1308                 if ((adev->gfx.mec_fw_version >=  0x00000197) &&
1309                     (adev->gfx.mec_feature_version >= 44))
1310                         adev->gfx.mec_fw_write_wait = true;
1311                 break;
1312         case IP_VERSION(9, 1, 0):
1313         case IP_VERSION(9, 2, 2):
1314                 if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1315                     (adev->gfx.me_feature_version >= 42) &&
1316                     (adev->gfx.pfp_fw_version >=  0x000000b1) &&
1317                     (adev->gfx.pfp_feature_version >= 42))
1318                         adev->gfx.me_fw_write_wait = true;
1319
1320                 if ((adev->gfx.mec_fw_version >=  0x00000192) &&
1321                     (adev->gfx.mec_feature_version >= 42))
1322                         adev->gfx.mec_fw_write_wait = true;
1323                 break;
1324         default:
1325                 adev->gfx.me_fw_write_wait = true;
1326                 adev->gfx.mec_fw_write_wait = true;
1327                 break;
1328         }
1329 }
1330
1331 struct amdgpu_gfxoff_quirk {
1332         u16 chip_vendor;
1333         u16 chip_device;
1334         u16 subsys_vendor;
1335         u16 subsys_device;
1336         u8 revision;
1337 };
1338
1339 static const struct amdgpu_gfxoff_quirk amdgpu_gfxoff_quirk_list[] = {
1340         /* https://bugzilla.kernel.org/show_bug.cgi?id=204689 */
1341         { 0x1002, 0x15dd, 0x1002, 0x15dd, 0xc8 },
1342         /* https://bugzilla.kernel.org/show_bug.cgi?id=207171 */
1343         { 0x1002, 0x15dd, 0x103c, 0x83e7, 0xd3 },
1344         /* GFXOFF is unstable on C6 parts with a VBIOS 113-RAVEN-114 */
1345         { 0x1002, 0x15dd, 0x1002, 0x15dd, 0xc6 },
1346         /* Apple MacBook Pro (15-inch, 2019) Radeon Pro Vega 20 4 GB */
1347         { 0x1002, 0x69af, 0x106b, 0x019a, 0xc0 },
1348         /* https://bbs.openkylin.top/t/topic/171497 */
1349         { 0x1002, 0x15d8, 0x19e5, 0x3e14, 0xc2 },
1350         /* HP 705G4 DM with R5 2400G */
1351         { 0x1002, 0x15dd, 0x103c, 0x8464, 0xd6 },
1352         { 0, 0, 0, 0, 0 },
1353 };
1354
1355 static bool gfx_v9_0_should_disable_gfxoff(struct pci_dev *pdev)
1356 {
1357         const struct amdgpu_gfxoff_quirk *p = amdgpu_gfxoff_quirk_list;
1358
1359         while (p && p->chip_device != 0) {
1360                 if (pdev->vendor == p->chip_vendor &&
1361                     pdev->device == p->chip_device &&
1362                     pdev->subsystem_vendor == p->subsys_vendor &&
1363                     pdev->subsystem_device == p->subsys_device &&
1364                     pdev->revision == p->revision) {
1365                         return true;
1366                 }
1367                 ++p;
1368         }
1369         return false;
1370 }
1371
1372 static bool is_raven_kicker(struct amdgpu_device *adev)
1373 {
1374         if (adev->pm.fw_version >= 0x41e2b)
1375                 return true;
1376         else
1377                 return false;
1378 }
1379
1380 static bool check_if_enlarge_doorbell_range(struct amdgpu_device *adev)
1381 {
1382         if ((amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 3, 0)) &&
1383             (adev->gfx.me_fw_version >= 0x000000a5) &&
1384             (adev->gfx.me_feature_version >= 52))
1385                 return true;
1386         else
1387                 return false;
1388 }
1389
1390 static void gfx_v9_0_check_if_need_gfxoff(struct amdgpu_device *adev)
1391 {
1392         if (gfx_v9_0_should_disable_gfxoff(adev->pdev))
1393                 adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
1394
1395         switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
1396         case IP_VERSION(9, 0, 1):
1397         case IP_VERSION(9, 2, 1):
1398         case IP_VERSION(9, 4, 0):
1399                 break;
1400         case IP_VERSION(9, 2, 2):
1401         case IP_VERSION(9, 1, 0):
1402                 if (!((adev->apu_flags & AMD_APU_IS_RAVEN2) ||
1403                       (adev->apu_flags & AMD_APU_IS_PICASSO)) &&
1404                     ((!is_raven_kicker(adev) &&
1405                       adev->gfx.rlc_fw_version < 531) ||
1406                      (adev->gfx.rlc_feature_version < 1) ||
1407                      !adev->gfx.rlc.is_rlc_v2_1))
1408                         adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
1409
1410                 if (adev->pm.pp_feature & PP_GFXOFF_MASK)
1411                         adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG |
1412                                 AMD_PG_SUPPORT_CP |
1413                                 AMD_PG_SUPPORT_RLC_SMU_HS;
1414                 break;
1415         case IP_VERSION(9, 3, 0):
1416                 if (adev->pm.pp_feature & PP_GFXOFF_MASK)
1417                         adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG |
1418                                 AMD_PG_SUPPORT_CP |
1419                                 AMD_PG_SUPPORT_RLC_SMU_HS;
1420                 break;
1421         default:
1422                 break;
1423         }
1424 }
1425
1426 static int gfx_v9_0_init_cp_gfx_microcode(struct amdgpu_device *adev,
1427                                           char *chip_name)
1428 {
1429         int err;
1430
1431         err = amdgpu_ucode_request(adev, &adev->gfx.pfp_fw,
1432                                    AMDGPU_UCODE_REQUIRED,
1433                                    "amdgpu/%s_pfp.bin", chip_name);
1434         if (err)
1435                 goto out;
1436         amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_PFP);
1437
1438         err = amdgpu_ucode_request(adev, &adev->gfx.me_fw,
1439                                    AMDGPU_UCODE_REQUIRED,
1440                                    "amdgpu/%s_me.bin", chip_name);
1441         if (err)
1442                 goto out;
1443         amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_ME);
1444
1445         err = amdgpu_ucode_request(adev, &adev->gfx.ce_fw,
1446                                    AMDGPU_UCODE_REQUIRED,
1447                                    "amdgpu/%s_ce.bin", chip_name);
1448         if (err)
1449                 goto out;
1450         amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_CE);
1451
1452 out:
1453         if (err) {
1454                 amdgpu_ucode_release(&adev->gfx.pfp_fw);
1455                 amdgpu_ucode_release(&adev->gfx.me_fw);
1456                 amdgpu_ucode_release(&adev->gfx.ce_fw);
1457         }
1458         return err;
1459 }
1460
1461 static int gfx_v9_0_init_rlc_microcode(struct amdgpu_device *adev,
1462                                        char *chip_name)
1463 {
1464         int err;
1465         const struct rlc_firmware_header_v2_0 *rlc_hdr;
1466         uint16_t version_major;
1467         uint16_t version_minor;
1468         uint32_t smu_version;
1469
1470         /*
1471          * For Picasso && AM4 SOCKET board, we use picasso_rlc_am4.bin
1472          * instead of picasso_rlc.bin.
1473          * Judgment method:
1474          * PCO AM4: revision >= 0xC8 && revision <= 0xCF
1475          *          or revision >= 0xD8 && revision <= 0xDF
1476          * otherwise is PCO FP5
1477          */
1478         if (!strcmp(chip_name, "picasso") &&
1479                 (((adev->pdev->revision >= 0xC8) && (adev->pdev->revision <= 0xCF)) ||
1480                 ((adev->pdev->revision >= 0xD8) && (adev->pdev->revision <= 0xDF))))
1481                 err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw,
1482                                            AMDGPU_UCODE_REQUIRED,
1483                                            "amdgpu/%s_rlc_am4.bin", chip_name);
1484         else if (!strcmp(chip_name, "raven") && (amdgpu_pm_load_smu_firmware(adev, &smu_version) == 0) &&
1485                 (smu_version >= 0x41e2b))
1486                 /**
1487                 *SMC is loaded by SBIOS on APU and it's able to get the SMU version directly.
1488                 */
1489                 err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw,
1490                                            AMDGPU_UCODE_REQUIRED,
1491                                            "amdgpu/%s_kicker_rlc.bin", chip_name);
1492         else
1493                 err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw,
1494                                            AMDGPU_UCODE_REQUIRED,
1495                                            "amdgpu/%s_rlc.bin", chip_name);
1496         if (err)
1497                 goto out;
1498
1499         rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
1500         version_major = le16_to_cpu(rlc_hdr->header.header_version_major);
1501         version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor);
1502         err = amdgpu_gfx_rlc_init_microcode(adev, version_major, version_minor);
1503 out:
1504         if (err)
1505                 amdgpu_ucode_release(&adev->gfx.rlc_fw);
1506
1507         return err;
1508 }
1509
1510 static bool gfx_v9_0_load_mec2_fw_bin_support(struct amdgpu_device *adev)
1511 {
1512         if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 2) ||
1513             amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 1) ||
1514             amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 3, 0))
1515                 return false;
1516
1517         return true;
1518 }
1519
1520 static int gfx_v9_0_init_cp_compute_microcode(struct amdgpu_device *adev,
1521                                               char *chip_name)
1522 {
1523         int err;
1524
1525         if (amdgpu_sriov_vf(adev) && (adev->asic_type == CHIP_ALDEBARAN))
1526                 err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw,
1527                                    AMDGPU_UCODE_REQUIRED,
1528                                    "amdgpu/%s_sjt_mec.bin", chip_name);
1529         else
1530                 err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw,
1531                                            AMDGPU_UCODE_REQUIRED,
1532                                            "amdgpu/%s_mec.bin", chip_name);
1533         if (err)
1534                 goto out;
1535
1536         amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC1);
1537         amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC1_JT);
1538
1539         if (gfx_v9_0_load_mec2_fw_bin_support(adev)) {
1540                 if (amdgpu_sriov_vf(adev) && (adev->asic_type == CHIP_ALDEBARAN))
1541                         err = amdgpu_ucode_request(adev, &adev->gfx.mec2_fw,
1542                                                    AMDGPU_UCODE_REQUIRED,
1543                                                    "amdgpu/%s_sjt_mec2.bin", chip_name);
1544                 else
1545                         err = amdgpu_ucode_request(adev, &adev->gfx.mec2_fw,
1546                                                    AMDGPU_UCODE_REQUIRED,
1547                                                    "amdgpu/%s_mec2.bin", chip_name);
1548                 if (!err) {
1549                         amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC2);
1550                         amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC2_JT);
1551                 } else {
1552                         err = 0;
1553                         amdgpu_ucode_release(&adev->gfx.mec2_fw);
1554                 }
1555         } else {
1556                 adev->gfx.mec2_fw_version = adev->gfx.mec_fw_version;
1557                 adev->gfx.mec2_feature_version = adev->gfx.mec_feature_version;
1558         }
1559
1560         gfx_v9_0_check_if_need_gfxoff(adev);
1561         gfx_v9_0_check_fw_write_wait(adev);
1562
1563 out:
1564         if (err)
1565                 amdgpu_ucode_release(&adev->gfx.mec_fw);
1566         return err;
1567 }
1568
1569 static int gfx_v9_0_init_microcode(struct amdgpu_device *adev)
1570 {
1571         char ucode_prefix[30];
1572         int r;
1573
1574         DRM_DEBUG("\n");
1575         amdgpu_ucode_ip_version_decode(adev, GC_HWIP, ucode_prefix, sizeof(ucode_prefix));
1576
1577         /* No CPG in Arcturus */
1578         if (adev->gfx.num_gfx_rings) {
1579                 r = gfx_v9_0_init_cp_gfx_microcode(adev, ucode_prefix);
1580                 if (r)
1581                         return r;
1582         }
1583
1584         r = gfx_v9_0_init_rlc_microcode(adev, ucode_prefix);
1585         if (r)
1586                 return r;
1587
1588         r = gfx_v9_0_init_cp_compute_microcode(adev, ucode_prefix);
1589         if (r)
1590                 return r;
1591
1592         return r;
1593 }
1594
1595 static u32 gfx_v9_0_get_csb_size(struct amdgpu_device *adev)
1596 {
1597         u32 count = 0;
1598         const struct cs_section_def *sect = NULL;
1599         const struct cs_extent_def *ext = NULL;
1600
1601         /* begin clear state */
1602         count += 2;
1603         /* context control state */
1604         count += 3;
1605
1606         for (sect = gfx9_cs_data; sect->section != NULL; ++sect) {
1607                 for (ext = sect->section; ext->extent != NULL; ++ext) {
1608                         if (sect->id == SECT_CONTEXT)
1609                                 count += 2 + ext->reg_count;
1610                         else
1611                                 return 0;
1612                 }
1613         }
1614
1615         /* end clear state */
1616         count += 2;
1617         /* clear state */
1618         count += 2;
1619
1620         return count;
1621 }
1622
1623 static void gfx_v9_0_get_csb_buffer(struct amdgpu_device *adev,
1624                                     volatile u32 *buffer)
1625 {
1626         u32 count = 0, i;
1627         const struct cs_section_def *sect = NULL;
1628         const struct cs_extent_def *ext = NULL;
1629
1630         if (adev->gfx.rlc.cs_data == NULL)
1631                 return;
1632         if (buffer == NULL)
1633                 return;
1634
1635         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1636         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1637
1638         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
1639         buffer[count++] = cpu_to_le32(0x80000000);
1640         buffer[count++] = cpu_to_le32(0x80000000);
1641
1642         for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
1643                 for (ext = sect->section; ext->extent != NULL; ++ext) {
1644                         if (sect->id == SECT_CONTEXT) {
1645                                 buffer[count++] =
1646                                         cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
1647                                 buffer[count++] = cpu_to_le32(ext->reg_index -
1648                                                 PACKET3_SET_CONTEXT_REG_START);
1649                                 for (i = 0; i < ext->reg_count; i++)
1650                                         buffer[count++] = cpu_to_le32(ext->extent[i]);
1651                         } else {
1652                                 return;
1653                         }
1654                 }
1655         }
1656
1657         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1658         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
1659
1660         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
1661         buffer[count++] = cpu_to_le32(0);
1662 }
1663
1664 static void gfx_v9_0_init_always_on_cu_mask(struct amdgpu_device *adev)
1665 {
1666         struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
1667         uint32_t pg_always_on_cu_num = 2;
1668         uint32_t always_on_cu_num;
1669         uint32_t i, j, k;
1670         uint32_t mask, cu_bitmap, counter;
1671
1672         if (adev->flags & AMD_IS_APU)
1673                 always_on_cu_num = 4;
1674         else if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 2, 1))
1675                 always_on_cu_num = 8;
1676         else
1677                 always_on_cu_num = 12;
1678
1679         mutex_lock(&adev->grbm_idx_mutex);
1680         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
1681                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
1682                         mask = 1;
1683                         cu_bitmap = 0;
1684                         counter = 0;
1685                         amdgpu_gfx_select_se_sh(adev, i, j, 0xffffffff, 0);
1686
1687                         for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
1688                                 if (cu_info->bitmap[0][i][j] & mask) {
1689                                         if (counter == pg_always_on_cu_num)
1690                                                 WREG32_SOC15(GC, 0, mmRLC_PG_ALWAYS_ON_CU_MASK, cu_bitmap);
1691                                         if (counter < always_on_cu_num)
1692                                                 cu_bitmap |= mask;
1693                                         else
1694                                                 break;
1695                                         counter++;
1696                                 }
1697                                 mask <<= 1;
1698                         }
1699
1700                         WREG32_SOC15(GC, 0, mmRLC_LB_ALWAYS_ACTIVE_CU_MASK, cu_bitmap);
1701                         cu_info->ao_cu_bitmap[i][j] = cu_bitmap;
1702                 }
1703         }
1704         amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
1705         mutex_unlock(&adev->grbm_idx_mutex);
1706 }
1707
1708 static void gfx_v9_0_init_lbpw(struct amdgpu_device *adev)
1709 {
1710         uint32_t data;
1711
1712         /* set mmRLC_LB_THR_CONFIG_1/2/3/4 */
1713         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F);
1714         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x0333A5A7);
1715         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077);
1716         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x30 | 0x40 << 8 | 0x02FA << 16));
1717
1718         /* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */
1719         WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000);
1720
1721         /* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */
1722         WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000500);
1723
1724         mutex_lock(&adev->grbm_idx_mutex);
1725         /* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/
1726         amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
1727         WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff);
1728
1729         /* set mmRLC_LB_PARAMS = 0x003F_1006 */
1730         data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003);
1731         data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010);
1732         data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F);
1733         WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data);
1734
1735         /* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */
1736         data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7);
1737         data &= 0x0000FFFF;
1738         data |= 0x00C00000;
1739         WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data);
1740
1741         /*
1742          * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xF (4 CUs AON for Raven),
1743          * programmed in gfx_v9_0_init_always_on_cu_mask()
1744          */
1745
1746         /* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved,
1747          * but used for RLC_LB_CNTL configuration */
1748         data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK;
1749         data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09);
1750         data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000);
1751         WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data);
1752         mutex_unlock(&adev->grbm_idx_mutex);
1753
1754         gfx_v9_0_init_always_on_cu_mask(adev);
1755 }
1756
1757 static void gfx_v9_4_init_lbpw(struct amdgpu_device *adev)
1758 {
1759         uint32_t data;
1760
1761         /* set mmRLC_LB_THR_CONFIG_1/2/3/4 */
1762         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F);
1763         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x033388F8);
1764         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077);
1765         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x10 | 0x27 << 8 | 0x02FA << 16));
1766
1767         /* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */
1768         WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000);
1769
1770         /* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */
1771         WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000800);
1772
1773         mutex_lock(&adev->grbm_idx_mutex);
1774         /* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/
1775         amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
1776         WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff);
1777
1778         /* set mmRLC_LB_PARAMS = 0x003F_1006 */
1779         data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003);
1780         data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010);
1781         data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F);
1782         WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data);
1783
1784         /* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */
1785         data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7);
1786         data &= 0x0000FFFF;
1787         data |= 0x00C00000;
1788         WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data);
1789
1790         /*
1791          * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xFFF (12 CUs AON),
1792          * programmed in gfx_v9_0_init_always_on_cu_mask()
1793          */
1794
1795         /* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved,
1796          * but used for RLC_LB_CNTL configuration */
1797         data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK;
1798         data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09);
1799         data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000);
1800         WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data);
1801         mutex_unlock(&adev->grbm_idx_mutex);
1802
1803         gfx_v9_0_init_always_on_cu_mask(adev);
1804 }
1805
1806 static void gfx_v9_0_enable_lbpw(struct amdgpu_device *adev, bool enable)
1807 {
1808         WREG32_FIELD15(GC, 0, RLC_LB_CNTL, LOAD_BALANCE_ENABLE, enable ? 1 : 0);
1809 }
1810
1811 static int gfx_v9_0_cp_jump_table_num(struct amdgpu_device *adev)
1812 {
1813         if (gfx_v9_0_load_mec2_fw_bin_support(adev))
1814                 return 5;
1815         else
1816                 return 4;
1817 }
1818
1819 static void gfx_v9_0_init_rlcg_reg_access_ctrl(struct amdgpu_device *adev)
1820 {
1821         struct amdgpu_rlcg_reg_access_ctrl *reg_access_ctrl;
1822
1823         reg_access_ctrl = &adev->gfx.rlc.reg_access_ctrl[0];
1824         reg_access_ctrl->scratch_reg0 = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0);
1825         reg_access_ctrl->scratch_reg1 = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG1);
1826         reg_access_ctrl->scratch_reg2 = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG2);
1827         reg_access_ctrl->scratch_reg3 = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG3);
1828         reg_access_ctrl->grbm_cntl = SOC15_REG_OFFSET(GC, 0, mmGRBM_GFX_CNTL);
1829         reg_access_ctrl->grbm_idx = SOC15_REG_OFFSET(GC, 0, mmGRBM_GFX_INDEX);
1830         reg_access_ctrl->spare_int = SOC15_REG_OFFSET(GC, 0, mmRLC_SPARE_INT);
1831         adev->gfx.rlc.rlcg_reg_access_supported = true;
1832 }
1833
1834 static int gfx_v9_0_rlc_init(struct amdgpu_device *adev)
1835 {
1836         const struct cs_section_def *cs_data;
1837         int r;
1838
1839         adev->gfx.rlc.cs_data = gfx9_cs_data;
1840
1841         cs_data = adev->gfx.rlc.cs_data;
1842
1843         if (cs_data) {
1844                 /* init clear state block */
1845                 r = amdgpu_gfx_rlc_init_csb(adev);
1846                 if (r)
1847                         return r;
1848         }
1849
1850         if (adev->flags & AMD_IS_APU) {
1851                 /* TODO: double check the cp_table_size for RV */
1852                 adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
1853                 r = amdgpu_gfx_rlc_init_cpt(adev);
1854                 if (r)
1855                         return r;
1856         }
1857
1858         return 0;
1859 }
1860
1861 static void gfx_v9_0_mec_fini(struct amdgpu_device *adev)
1862 {
1863         amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
1864         amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_obj, NULL, NULL);
1865 }
1866
1867 static int gfx_v9_0_mec_init(struct amdgpu_device *adev)
1868 {
1869         int r;
1870         u32 *hpd;
1871         const __le32 *fw_data;
1872         unsigned fw_size;
1873         u32 *fw;
1874         size_t mec_hpd_size;
1875
1876         const struct gfx_firmware_header_v1_0 *mec_hdr;
1877
1878         bitmap_zero(adev->gfx.mec_bitmap[0].queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
1879
1880         /* take ownership of the relevant compute queues */
1881         amdgpu_gfx_compute_queue_acquire(adev);
1882         mec_hpd_size = adev->gfx.num_compute_rings * GFX9_MEC_HPD_SIZE;
1883         if (mec_hpd_size) {
1884                 r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
1885                                               AMDGPU_GEM_DOMAIN_VRAM |
1886                                               AMDGPU_GEM_DOMAIN_GTT,
1887                                               &adev->gfx.mec.hpd_eop_obj,
1888                                               &adev->gfx.mec.hpd_eop_gpu_addr,
1889                                               (void **)&hpd);
1890                 if (r) {
1891                         dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
1892                         gfx_v9_0_mec_fini(adev);
1893                         return r;
1894                 }
1895
1896                 memset(hpd, 0, mec_hpd_size);
1897
1898                 amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
1899                 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1900         }
1901
1902         mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1903
1904         fw_data = (const __le32 *)
1905                 (adev->gfx.mec_fw->data +
1906                  le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
1907         fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes);
1908
1909         r = amdgpu_bo_create_reserved(adev, mec_hdr->header.ucode_size_bytes,
1910                                       PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
1911                                       &adev->gfx.mec.mec_fw_obj,
1912                                       &adev->gfx.mec.mec_fw_gpu_addr,
1913                                       (void **)&fw);
1914         if (r) {
1915                 dev_warn(adev->dev, "(%d) create mec firmware bo failed\n", r);
1916                 gfx_v9_0_mec_fini(adev);
1917                 return r;
1918         }
1919
1920         memcpy(fw, fw_data, fw_size);
1921
1922         amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj);
1923         amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj);
1924
1925         return 0;
1926 }
1927
1928 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
1929 {
1930         WREG32_SOC15_RLC(GC, 0, mmSQ_IND_INDEX,
1931                 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
1932                 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
1933                 (address << SQ_IND_INDEX__INDEX__SHIFT) |
1934                 (SQ_IND_INDEX__FORCE_READ_MASK));
1935         return RREG32_SOC15(GC, 0, mmSQ_IND_DATA);
1936 }
1937
1938 static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
1939                            uint32_t wave, uint32_t thread,
1940                            uint32_t regno, uint32_t num, uint32_t *out)
1941 {
1942         WREG32_SOC15_RLC(GC, 0, mmSQ_IND_INDEX,
1943                 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
1944                 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
1945                 (regno << SQ_IND_INDEX__INDEX__SHIFT) |
1946                 (thread << SQ_IND_INDEX__THREAD_ID__SHIFT) |
1947                 (SQ_IND_INDEX__FORCE_READ_MASK) |
1948                 (SQ_IND_INDEX__AUTO_INCR_MASK));
1949         while (num--)
1950                 *(out++) = RREG32_SOC15(GC, 0, mmSQ_IND_DATA);
1951 }
1952
1953 static void gfx_v9_0_read_wave_data(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
1954 {
1955         /* type 1 wave data */
1956         dst[(*no_fields)++] = 1;
1957         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS);
1958         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO);
1959         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI);
1960         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO);
1961         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI);
1962         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID);
1963         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0);
1964         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1);
1965         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC);
1966         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC);
1967         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS);
1968         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS);
1969         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0);
1970         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0);
1971         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_MODE);
1972 }
1973
1974 static void gfx_v9_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd,
1975                                      uint32_t wave, uint32_t start,
1976                                      uint32_t size, uint32_t *dst)
1977 {
1978         wave_read_regs(
1979                 adev, simd, wave, 0,
1980                 start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
1981 }
1982
1983 static void gfx_v9_0_read_wave_vgprs(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd,
1984                                      uint32_t wave, uint32_t thread,
1985                                      uint32_t start, uint32_t size,
1986                                      uint32_t *dst)
1987 {
1988         wave_read_regs(
1989                 adev, simd, wave, thread,
1990                 start + SQIND_WAVE_VGPRS_OFFSET, size, dst);
1991 }
1992
1993 static void gfx_v9_0_select_me_pipe_q(struct amdgpu_device *adev,
1994                                   u32 me, u32 pipe, u32 q, u32 vm, u32 xcc_id)
1995 {
1996         soc15_grbm_select(adev, me, pipe, q, vm, 0);
1997 }
1998
1999 static const struct amdgpu_gfx_funcs gfx_v9_0_gfx_funcs = {
2000         .get_gpu_clock_counter = &gfx_v9_0_get_gpu_clock_counter,
2001         .select_se_sh = &gfx_v9_0_select_se_sh,
2002         .read_wave_data = &gfx_v9_0_read_wave_data,
2003         .read_wave_sgprs = &gfx_v9_0_read_wave_sgprs,
2004         .read_wave_vgprs = &gfx_v9_0_read_wave_vgprs,
2005         .select_me_pipe_q = &gfx_v9_0_select_me_pipe_q,
2006 };
2007
2008 const struct amdgpu_ras_block_hw_ops  gfx_v9_0_ras_ops = {
2009                 .ras_error_inject = &gfx_v9_0_ras_error_inject,
2010                 .query_ras_error_count = &gfx_v9_0_query_ras_error_count,
2011                 .reset_ras_error_count = &gfx_v9_0_reset_ras_error_count,
2012 };
2013
2014 static struct amdgpu_gfx_ras gfx_v9_0_ras = {
2015         .ras_block = {
2016                 .hw_ops = &gfx_v9_0_ras_ops,
2017         },
2018 };
2019
2020 static int gfx_v9_0_gpu_early_init(struct amdgpu_device *adev)
2021 {
2022         u32 gb_addr_config;
2023         int err;
2024
2025         switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
2026         case IP_VERSION(9, 0, 1):
2027                 adev->gfx.config.max_hw_contexts = 8;
2028                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2029                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2030                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2031                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2032                 gb_addr_config = VEGA10_GB_ADDR_CONFIG_GOLDEN;
2033                 break;
2034         case IP_VERSION(9, 2, 1):
2035                 adev->gfx.config.max_hw_contexts = 8;
2036                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2037                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2038                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2039                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2040                 gb_addr_config = VEGA12_GB_ADDR_CONFIG_GOLDEN;
2041                 DRM_INFO("fix gfx.config for vega12\n");
2042                 break;
2043         case IP_VERSION(9, 4, 0):
2044                 adev->gfx.ras = &gfx_v9_0_ras;
2045                 adev->gfx.config.max_hw_contexts = 8;
2046                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2047                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2048                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2049                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2050                 gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
2051                 gb_addr_config &= ~0xf3e777ff;
2052                 gb_addr_config |= 0x22014042;
2053                 /* check vbios table if gpu info is not available */
2054                 err = amdgpu_atomfirmware_get_gfx_info(adev);
2055                 if (err)
2056                         return err;
2057                 break;
2058         case IP_VERSION(9, 2, 2):
2059         case IP_VERSION(9, 1, 0):
2060                 adev->gfx.config.max_hw_contexts = 8;
2061                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2062                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2063                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2064                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2065                 if (adev->apu_flags & AMD_APU_IS_RAVEN2)
2066                         gb_addr_config = RAVEN2_GB_ADDR_CONFIG_GOLDEN;
2067                 else
2068                         gb_addr_config = RAVEN_GB_ADDR_CONFIG_GOLDEN;
2069                 break;
2070         case IP_VERSION(9, 4, 1):
2071                 adev->gfx.ras = &gfx_v9_4_ras;
2072                 adev->gfx.config.max_hw_contexts = 8;
2073                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2074                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2075                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2076                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2077                 gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
2078                 gb_addr_config &= ~0xf3e777ff;
2079                 gb_addr_config |= 0x22014042;
2080                 break;
2081         case IP_VERSION(9, 3, 0):
2082                 adev->gfx.config.max_hw_contexts = 8;
2083                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2084                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2085                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x80;
2086                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2087                 gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
2088                 gb_addr_config &= ~0xf3e777ff;
2089                 gb_addr_config |= 0x22010042;
2090                 break;
2091         case IP_VERSION(9, 4, 2):
2092                 adev->gfx.ras = &gfx_v9_4_2_ras;
2093                 adev->gfx.config.max_hw_contexts = 8;
2094                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2095                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2096                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2097                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2098                 gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
2099                 gb_addr_config &= ~0xf3e777ff;
2100                 gb_addr_config |= 0x22014042;
2101                 /* check vbios table if gpu info is not available */
2102                 err = amdgpu_atomfirmware_get_gfx_info(adev);
2103                 if (err)
2104                         return err;
2105                 break;
2106         default:
2107                 BUG();
2108                 break;
2109         }
2110
2111         adev->gfx.config.gb_addr_config = gb_addr_config;
2112
2113         adev->gfx.config.gb_addr_config_fields.num_pipes = 1 <<
2114                         REG_GET_FIELD(
2115                                         adev->gfx.config.gb_addr_config,
2116                                         GB_ADDR_CONFIG,
2117                                         NUM_PIPES);
2118
2119         adev->gfx.config.max_tile_pipes =
2120                 adev->gfx.config.gb_addr_config_fields.num_pipes;
2121
2122         adev->gfx.config.gb_addr_config_fields.num_banks = 1 <<
2123                         REG_GET_FIELD(
2124                                         adev->gfx.config.gb_addr_config,
2125                                         GB_ADDR_CONFIG,
2126                                         NUM_BANKS);
2127         adev->gfx.config.gb_addr_config_fields.max_compress_frags = 1 <<
2128                         REG_GET_FIELD(
2129                                         adev->gfx.config.gb_addr_config,
2130                                         GB_ADDR_CONFIG,
2131                                         MAX_COMPRESSED_FRAGS);
2132         adev->gfx.config.gb_addr_config_fields.num_rb_per_se = 1 <<
2133                         REG_GET_FIELD(
2134                                         adev->gfx.config.gb_addr_config,
2135                                         GB_ADDR_CONFIG,
2136                                         NUM_RB_PER_SE);
2137         adev->gfx.config.gb_addr_config_fields.num_se = 1 <<
2138                         REG_GET_FIELD(
2139                                         adev->gfx.config.gb_addr_config,
2140                                         GB_ADDR_CONFIG,
2141                                         NUM_SHADER_ENGINES);
2142         adev->gfx.config.gb_addr_config_fields.pipe_interleave_size = 1 << (8 +
2143                         REG_GET_FIELD(
2144                                         adev->gfx.config.gb_addr_config,
2145                                         GB_ADDR_CONFIG,
2146                                         PIPE_INTERLEAVE_SIZE));
2147
2148         return 0;
2149 }
2150
2151 static int gfx_v9_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
2152                                       int mec, int pipe, int queue)
2153 {
2154         unsigned irq_type;
2155         struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
2156         unsigned int hw_prio;
2157
2158         ring = &adev->gfx.compute_ring[ring_id];
2159
2160         /* mec0 is me1 */
2161         ring->me = mec + 1;
2162         ring->pipe = pipe;
2163         ring->queue = queue;
2164
2165         ring->ring_obj = NULL;
2166         ring->use_doorbell = true;
2167         ring->doorbell_index = (adev->doorbell_index.mec_ring0 + ring_id) << 1;
2168         ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
2169                                 + (ring_id * GFX9_MEC_HPD_SIZE);
2170         ring->vm_hub = AMDGPU_GFXHUB(0);
2171         sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
2172
2173         irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
2174                 + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
2175                 + ring->pipe;
2176         hw_prio = amdgpu_gfx_is_high_priority_compute_queue(adev, ring) ?
2177                         AMDGPU_RING_PRIO_2 : AMDGPU_RING_PRIO_DEFAULT;
2178         /* type-2 packets are deprecated on MEC, use type-3 instead */
2179         return amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, irq_type,
2180                                 hw_prio, NULL);
2181 }
2182
2183 static void gfx_v9_0_alloc_ip_dump(struct amdgpu_device *adev)
2184 {
2185         uint32_t reg_count = ARRAY_SIZE(gc_reg_list_9);
2186         uint32_t *ptr;
2187         uint32_t inst;
2188
2189         ptr = kcalloc(reg_count, sizeof(uint32_t), GFP_KERNEL);
2190         if (!ptr) {
2191                 DRM_ERROR("Failed to allocate memory for GFX IP Dump\n");
2192                 adev->gfx.ip_dump_core = NULL;
2193         } else {
2194                 adev->gfx.ip_dump_core = ptr;
2195         }
2196
2197         /* Allocate memory for compute queue registers for all the instances */
2198         reg_count = ARRAY_SIZE(gc_cp_reg_list_9);
2199         inst = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe_per_mec *
2200                 adev->gfx.mec.num_queue_per_pipe;
2201
2202         ptr = kcalloc(reg_count * inst, sizeof(uint32_t), GFP_KERNEL);
2203         if (!ptr) {
2204                 DRM_ERROR("Failed to allocate memory for Compute Queues IP Dump\n");
2205                 adev->gfx.ip_dump_compute_queues = NULL;
2206         } else {
2207                 adev->gfx.ip_dump_compute_queues = ptr;
2208         }
2209 }
2210
2211 static int gfx_v9_0_sw_init(struct amdgpu_ip_block *ip_block)
2212 {
2213         int i, j, k, r, ring_id;
2214         int xcc_id = 0;
2215         struct amdgpu_ring *ring;
2216         struct amdgpu_device *adev = ip_block->adev;
2217         unsigned int hw_prio;
2218
2219         switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
2220         case IP_VERSION(9, 0, 1):
2221         case IP_VERSION(9, 2, 1):
2222         case IP_VERSION(9, 4, 0):
2223         case IP_VERSION(9, 2, 2):
2224         case IP_VERSION(9, 1, 0):
2225         case IP_VERSION(9, 4, 1):
2226         case IP_VERSION(9, 3, 0):
2227         case IP_VERSION(9, 4, 2):
2228                 adev->gfx.mec.num_mec = 2;
2229                 break;
2230         default:
2231                 adev->gfx.mec.num_mec = 1;
2232                 break;
2233         }
2234
2235         switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
2236         case IP_VERSION(9, 4, 2):
2237                 adev->gfx.cleaner_shader_ptr = gfx_9_4_2_cleaner_shader_hex;
2238                 adev->gfx.cleaner_shader_size = sizeof(gfx_9_4_2_cleaner_shader_hex);
2239                 if (adev->gfx.mec_fw_version >= 88) {
2240                         adev->gfx.enable_cleaner_shader = true;
2241                         r = amdgpu_gfx_cleaner_shader_sw_init(adev, adev->gfx.cleaner_shader_size);
2242                         if (r) {
2243                                 adev->gfx.enable_cleaner_shader = false;
2244                                 dev_err(adev->dev, "Failed to initialize cleaner shader\n");
2245                         }
2246                 }
2247                 break;
2248         default:
2249                 adev->gfx.enable_cleaner_shader = false;
2250                 break;
2251         }
2252
2253         adev->gfx.mec.num_pipe_per_mec = 4;
2254         adev->gfx.mec.num_queue_per_pipe = 8;
2255
2256         /* EOP Event */
2257         r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_EOP_INTERRUPT, &adev->gfx.eop_irq);
2258         if (r)
2259                 return r;
2260
2261         /* Bad opcode Event */
2262         r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP,
2263                               GFX_9_0__SRCID__CP_BAD_OPCODE_ERROR,
2264                               &adev->gfx.bad_op_irq);
2265         if (r)
2266                 return r;
2267
2268         /* Privileged reg */
2269         r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_REG_FAULT,
2270                               &adev->gfx.priv_reg_irq);
2271         if (r)
2272                 return r;
2273
2274         /* Privileged inst */
2275         r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_INSTR_FAULT,
2276                               &adev->gfx.priv_inst_irq);
2277         if (r)
2278                 return r;
2279
2280         /* ECC error */
2281         r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_ECC_ERROR,
2282                               &adev->gfx.cp_ecc_error_irq);
2283         if (r)
2284                 return r;
2285
2286         /* FUE error */
2287         r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_FUE_ERROR,
2288                               &adev->gfx.cp_ecc_error_irq);
2289         if (r)
2290                 return r;
2291
2292         adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
2293
2294         if (adev->gfx.rlc.funcs) {
2295                 if (adev->gfx.rlc.funcs->init) {
2296                         r = adev->gfx.rlc.funcs->init(adev);
2297                         if (r) {
2298                                 dev_err(adev->dev, "Failed to init rlc BOs!\n");
2299                                 return r;
2300                         }
2301                 }
2302         }
2303
2304         r = gfx_v9_0_mec_init(adev);
2305         if (r) {
2306                 DRM_ERROR("Failed to init MEC BOs!\n");
2307                 return r;
2308         }
2309
2310         /* set up the gfx ring */
2311         for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
2312                 ring = &adev->gfx.gfx_ring[i];
2313                 ring->ring_obj = NULL;
2314                 if (!i)
2315                         sprintf(ring->name, "gfx");
2316                 else
2317                         sprintf(ring->name, "gfx_%d", i);
2318                 ring->use_doorbell = true;
2319                 ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1;
2320
2321                 /* disable scheduler on the real ring */
2322                 ring->no_scheduler = adev->gfx.mcbp;
2323                 ring->vm_hub = AMDGPU_GFXHUB(0);
2324                 r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
2325                                      AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP,
2326                                      AMDGPU_RING_PRIO_DEFAULT, NULL);
2327                 if (r)
2328                         return r;
2329         }
2330
2331         /* set up the software rings */
2332         if (adev->gfx.mcbp && adev->gfx.num_gfx_rings) {
2333                 for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++) {
2334                         ring = &adev->gfx.sw_gfx_ring[i];
2335                         ring->ring_obj = NULL;
2336                         sprintf(ring->name, amdgpu_sw_ring_name(i));
2337                         ring->use_doorbell = true;
2338                         ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1;
2339                         ring->is_sw_ring = true;
2340                         hw_prio = amdgpu_sw_ring_priority(i);
2341                         ring->vm_hub = AMDGPU_GFXHUB(0);
2342                         r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
2343                                              AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP, hw_prio,
2344                                              NULL);
2345                         if (r)
2346                                 return r;
2347                         ring->wptr = 0;
2348                 }
2349
2350                 /* init the muxer and add software rings */
2351                 r = amdgpu_ring_mux_init(&adev->gfx.muxer, &adev->gfx.gfx_ring[0],
2352                                          GFX9_NUM_SW_GFX_RINGS);
2353                 if (r) {
2354                         DRM_ERROR("amdgpu_ring_mux_init failed(%d)\n", r);
2355                         return r;
2356                 }
2357                 for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++) {
2358                         r = amdgpu_ring_mux_add_sw_ring(&adev->gfx.muxer,
2359                                                         &adev->gfx.sw_gfx_ring[i]);
2360                         if (r) {
2361                                 DRM_ERROR("amdgpu_ring_mux_add_sw_ring failed(%d)\n", r);
2362                                 return r;
2363                         }
2364                 }
2365         }
2366
2367         /* set up the compute queues - allocate horizontally across pipes */
2368         ring_id = 0;
2369         for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
2370                 for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
2371                         for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
2372                                 if (!amdgpu_gfx_is_mec_queue_enabled(adev, 0, i,
2373                                                                      k, j))
2374                                         continue;
2375
2376                                 r = gfx_v9_0_compute_ring_init(adev,
2377                                                                ring_id,
2378                                                                i, k, j);
2379                                 if (r)
2380                                         return r;
2381
2382                                 ring_id++;
2383                         }
2384                 }
2385         }
2386
2387         /* TODO: Add queue reset mask when FW fully supports it */
2388         adev->gfx.gfx_supported_reset =
2389                 amdgpu_get_soft_full_reset_mask(&adev->gfx.gfx_ring[0]);
2390         adev->gfx.compute_supported_reset =
2391                 amdgpu_get_soft_full_reset_mask(&adev->gfx.compute_ring[0]);
2392
2393         r = amdgpu_gfx_kiq_init(adev, GFX9_MEC_HPD_SIZE, 0);
2394         if (r) {
2395                 DRM_ERROR("Failed to init KIQ BOs!\n");
2396                 return r;
2397         }
2398
2399         r = amdgpu_gfx_kiq_init_ring(adev, xcc_id);
2400         if (r)
2401                 return r;
2402
2403         /* create MQD for all compute queues as wel as KIQ for SRIOV case */
2404         r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct v9_mqd_allocation), 0);
2405         if (r)
2406                 return r;
2407
2408         adev->gfx.ce_ram_size = 0x8000;
2409
2410         r = gfx_v9_0_gpu_early_init(adev);
2411         if (r)
2412                 return r;
2413
2414         if (amdgpu_gfx_ras_sw_init(adev)) {
2415                 dev_err(adev->dev, "Failed to initialize gfx ras block!\n");
2416                 return -EINVAL;
2417         }
2418
2419         gfx_v9_0_alloc_ip_dump(adev);
2420
2421         r = amdgpu_gfx_sysfs_init(adev);
2422         if (r)
2423                 return r;
2424
2425         return 0;
2426 }
2427
2428
2429 static int gfx_v9_0_sw_fini(struct amdgpu_ip_block *ip_block)
2430 {
2431         int i;
2432         struct amdgpu_device *adev = ip_block->adev;
2433
2434         if (adev->gfx.mcbp && adev->gfx.num_gfx_rings) {
2435                 for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++)
2436                         amdgpu_ring_fini(&adev->gfx.sw_gfx_ring[i]);
2437                 amdgpu_ring_mux_fini(&adev->gfx.muxer);
2438         }
2439
2440         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2441                 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
2442         for (i = 0; i < adev->gfx.num_compute_rings; i++)
2443                 amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
2444
2445         amdgpu_gfx_mqd_sw_fini(adev, 0);
2446         amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq[0].ring);
2447         amdgpu_gfx_kiq_fini(adev, 0);
2448
2449         amdgpu_gfx_cleaner_shader_sw_fini(adev);
2450
2451         gfx_v9_0_mec_fini(adev);
2452         amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj,
2453                                 &adev->gfx.rlc.clear_state_gpu_addr,
2454                                 (void **)&adev->gfx.rlc.cs_ptr);
2455         if (adev->flags & AMD_IS_APU) {
2456                 amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj,
2457                                 &adev->gfx.rlc.cp_table_gpu_addr,
2458                                 (void **)&adev->gfx.rlc.cp_table_ptr);
2459         }
2460         gfx_v9_0_free_microcode(adev);
2461
2462         amdgpu_gfx_sysfs_fini(adev);
2463
2464         kfree(adev->gfx.ip_dump_core);
2465         kfree(adev->gfx.ip_dump_compute_queues);
2466
2467         return 0;
2468 }
2469
2470
2471 static void gfx_v9_0_tiling_mode_table_init(struct amdgpu_device *adev)
2472 {
2473         /* TODO */
2474 }
2475
2476 void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num,
2477                            u32 instance, int xcc_id)
2478 {
2479         u32 data;
2480
2481         if (instance == 0xffffffff)
2482                 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
2483         else
2484                 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
2485
2486         if (se_num == 0xffffffff)
2487                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
2488         else
2489                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
2490
2491         if (sh_num == 0xffffffff)
2492                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
2493         else
2494                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
2495
2496         WREG32_SOC15_RLC_SHADOW(GC, 0, mmGRBM_GFX_INDEX, data);
2497 }
2498
2499 static u32 gfx_v9_0_get_rb_active_bitmap(struct amdgpu_device *adev)
2500 {
2501         u32 data, mask;
2502
2503         data = RREG32_SOC15(GC, 0, mmCC_RB_BACKEND_DISABLE);
2504         data |= RREG32_SOC15(GC, 0, mmGC_USER_RB_BACKEND_DISABLE);
2505
2506         data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK;
2507         data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT;
2508
2509         mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se /
2510                                          adev->gfx.config.max_sh_per_se);
2511
2512         return (~data) & mask;
2513 }
2514
2515 static void gfx_v9_0_setup_rb(struct amdgpu_device *adev)
2516 {
2517         int i, j;
2518         u32 data;
2519         u32 active_rbs = 0;
2520         u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
2521                                         adev->gfx.config.max_sh_per_se;
2522
2523         mutex_lock(&adev->grbm_idx_mutex);
2524         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
2525                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
2526                         amdgpu_gfx_select_se_sh(adev, i, j, 0xffffffff, 0);
2527                         data = gfx_v9_0_get_rb_active_bitmap(adev);
2528                         active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
2529                                                rb_bitmap_width_per_sh);
2530                 }
2531         }
2532         amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
2533         mutex_unlock(&adev->grbm_idx_mutex);
2534
2535         adev->gfx.config.backend_enable_mask = active_rbs;
2536         adev->gfx.config.num_rbs = hweight32(active_rbs);
2537 }
2538
2539 static void gfx_v9_0_debug_trap_config_init(struct amdgpu_device *adev,
2540                                 uint32_t first_vmid,
2541                                 uint32_t last_vmid)
2542 {
2543         uint32_t data;
2544         uint32_t trap_config_vmid_mask = 0;
2545         int i;
2546
2547         /* Calculate trap config vmid mask */
2548         for (i = first_vmid; i < last_vmid; i++)
2549                 trap_config_vmid_mask |= (1 << i);
2550
2551         data = REG_SET_FIELD(0, SPI_GDBG_TRAP_CONFIG,
2552                         VMID_SEL, trap_config_vmid_mask);
2553         data = REG_SET_FIELD(data, SPI_GDBG_TRAP_CONFIG,
2554                         TRAP_EN, 1);
2555         WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_CONFIG), data);
2556         WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_MASK), 0);
2557
2558         WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_DATA0), 0);
2559         WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_DATA1), 0);
2560 }
2561
2562 #define DEFAULT_SH_MEM_BASES    (0x6000)
2563 static void gfx_v9_0_init_compute_vmid(struct amdgpu_device *adev)
2564 {
2565         int i;
2566         uint32_t sh_mem_config;
2567         uint32_t sh_mem_bases;
2568
2569         /*
2570          * Configure apertures:
2571          * LDS:         0x60000000'00000000 - 0x60000001'00000000 (4GB)
2572          * Scratch:     0x60000001'00000000 - 0x60000002'00000000 (4GB)
2573          * GPUVM:       0x60010000'00000000 - 0x60020000'00000000 (1TB)
2574          */
2575         sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
2576
2577         sh_mem_config = SH_MEM_ADDRESS_MODE_64 |
2578                         SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
2579                         SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT;
2580
2581         mutex_lock(&adev->srbm_mutex);
2582         for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
2583                 soc15_grbm_select(adev, 0, 0, 0, i, 0);
2584                 /* CP and shaders */
2585                 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, sh_mem_config);
2586                 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, sh_mem_bases);
2587         }
2588         soc15_grbm_select(adev, 0, 0, 0, 0, 0);
2589         mutex_unlock(&adev->srbm_mutex);
2590
2591         /* Initialize all compute VMIDs to have no GDS, GWS, or OA
2592            access. These should be enabled by FW for target VMIDs. */
2593         for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
2594                 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * i, 0);
2595                 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * i, 0);
2596                 WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, i, 0);
2597                 WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, i, 0);
2598         }
2599 }
2600
2601 static void gfx_v9_0_init_gds_vmid(struct amdgpu_device *adev)
2602 {
2603         int vmid;
2604
2605         /*
2606          * Initialize all compute and user-gfx VMIDs to have no GDS, GWS, or OA
2607          * access. Compute VMIDs should be enabled by FW for target VMIDs,
2608          * the driver can enable them for graphics. VMID0 should maintain
2609          * access so that HWS firmware can save/restore entries.
2610          */
2611         for (vmid = 1; vmid < AMDGPU_NUM_VMID; vmid++) {
2612                 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * vmid, 0);
2613                 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * vmid, 0);
2614                 WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, vmid, 0);
2615                 WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, vmid, 0);
2616         }
2617 }
2618
2619 static void gfx_v9_0_init_sq_config(struct amdgpu_device *adev)
2620 {
2621         uint32_t tmp;
2622
2623         switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
2624         case IP_VERSION(9, 4, 1):
2625                 tmp = RREG32_SOC15(GC, 0, mmSQ_CONFIG);
2626                 tmp = REG_SET_FIELD(tmp, SQ_CONFIG, DISABLE_BARRIER_WAITCNT,
2627                                 !READ_ONCE(adev->barrier_has_auto_waitcnt));
2628                 WREG32_SOC15(GC, 0, mmSQ_CONFIG, tmp);
2629                 break;
2630         default:
2631                 break;
2632         }
2633 }
2634
2635 static void gfx_v9_0_constants_init(struct amdgpu_device *adev)
2636 {
2637         u32 tmp;
2638         int i;
2639
2640         WREG32_FIELD15_RLC(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff);
2641
2642         gfx_v9_0_tiling_mode_table_init(adev);
2643
2644         if (adev->gfx.num_gfx_rings)
2645                 gfx_v9_0_setup_rb(adev);
2646         gfx_v9_0_get_cu_info(adev, &adev->gfx.cu_info);
2647         adev->gfx.config.db_debug2 = RREG32_SOC15(GC, 0, mmDB_DEBUG2);
2648
2649         /* XXX SH_MEM regs */
2650         /* where to put LDS, scratch, GPUVM in FSA64 space */
2651         mutex_lock(&adev->srbm_mutex);
2652         for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB(0)].num_ids; i++) {
2653                 soc15_grbm_select(adev, 0, 0, 0, i, 0);
2654                 /* CP and shaders */
2655                 if (i == 0) {
2656                         tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
2657                                             SH_MEM_ALIGNMENT_MODE_UNALIGNED);
2658                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE,
2659                                             !!adev->gmc.noretry);
2660                         WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp);
2661                         WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, 0);
2662                 } else {
2663                         tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
2664                                             SH_MEM_ALIGNMENT_MODE_UNALIGNED);
2665                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE,
2666                                             !!adev->gmc.noretry);
2667                         WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp);
2668                         tmp = REG_SET_FIELD(0, SH_MEM_BASES, PRIVATE_BASE,
2669                                 (adev->gmc.private_aperture_start >> 48));
2670                         tmp = REG_SET_FIELD(tmp, SH_MEM_BASES, SHARED_BASE,
2671                                 (adev->gmc.shared_aperture_start >> 48));
2672                         WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, tmp);
2673                 }
2674         }
2675         soc15_grbm_select(adev, 0, 0, 0, 0, 0);
2676
2677         mutex_unlock(&adev->srbm_mutex);
2678
2679         gfx_v9_0_init_compute_vmid(adev);
2680         gfx_v9_0_init_gds_vmid(adev);
2681         gfx_v9_0_init_sq_config(adev);
2682 }
2683
2684 static void gfx_v9_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
2685 {
2686         u32 i, j, k;
2687         u32 mask;
2688
2689         mutex_lock(&adev->grbm_idx_mutex);
2690         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
2691                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
2692                         amdgpu_gfx_select_se_sh(adev, i, j, 0xffffffff, 0);
2693                         for (k = 0; k < adev->usec_timeout; k++) {
2694                                 if (RREG32_SOC15(GC, 0, mmRLC_SERDES_CU_MASTER_BUSY) == 0)
2695                                         break;
2696                                 udelay(1);
2697                         }
2698                         if (k == adev->usec_timeout) {
2699                                 amdgpu_gfx_select_se_sh(adev, 0xffffffff,
2700                                                       0xffffffff, 0xffffffff, 0);
2701                                 mutex_unlock(&adev->grbm_idx_mutex);
2702                                 DRM_INFO("Timeout wait for RLC serdes %u,%u\n",
2703                                          i, j);
2704                                 return;
2705                         }
2706                 }
2707         }
2708         amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
2709         mutex_unlock(&adev->grbm_idx_mutex);
2710
2711         mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
2712                 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
2713                 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
2714                 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
2715         for (k = 0; k < adev->usec_timeout; k++) {
2716                 if ((RREG32_SOC15(GC, 0, mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
2717                         break;
2718                 udelay(1);
2719         }
2720 }
2721
2722 static void gfx_v9_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
2723                                                bool enable)
2724 {
2725         u32 tmp;
2726
2727         /* These interrupts should be enabled to drive DS clock */
2728
2729         tmp= RREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0);
2730
2731         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
2732         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
2733         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
2734         if (adev->gfx.num_gfx_rings)
2735                 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
2736
2737         WREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0, tmp);
2738 }
2739
2740 static void gfx_v9_0_init_csb(struct amdgpu_device *adev)
2741 {
2742         adev->gfx.rlc.funcs->get_csb_buffer(adev, adev->gfx.rlc.cs_ptr);
2743         /* csib */
2744         WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_HI),
2745                         adev->gfx.rlc.clear_state_gpu_addr >> 32);
2746         WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_LO),
2747                         adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
2748         WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_LENGTH),
2749                         adev->gfx.rlc.clear_state_size);
2750 }
2751
2752 static void gfx_v9_1_parse_ind_reg_list(int *register_list_format,
2753                                 int indirect_offset,
2754                                 int list_size,
2755                                 int *unique_indirect_regs,
2756                                 int unique_indirect_reg_count,
2757                                 int *indirect_start_offsets,
2758                                 int *indirect_start_offsets_count,
2759                                 int max_start_offsets_count)
2760 {
2761         int idx;
2762
2763         for (; indirect_offset < list_size; indirect_offset++) {
2764                 WARN_ON(*indirect_start_offsets_count >= max_start_offsets_count);
2765                 indirect_start_offsets[*indirect_start_offsets_count] = indirect_offset;
2766                 *indirect_start_offsets_count = *indirect_start_offsets_count + 1;
2767
2768                 while (register_list_format[indirect_offset] != 0xFFFFFFFF) {
2769                         indirect_offset += 2;
2770
2771                         /* look for the matching indice */
2772                         for (idx = 0; idx < unique_indirect_reg_count; idx++) {
2773                                 if (unique_indirect_regs[idx] ==
2774                                         register_list_format[indirect_offset] ||
2775                                         !unique_indirect_regs[idx])
2776                                         break;
2777                         }
2778
2779                         BUG_ON(idx >= unique_indirect_reg_count);
2780
2781                         if (!unique_indirect_regs[idx])
2782                                 unique_indirect_regs[idx] = register_list_format[indirect_offset];
2783
2784                         indirect_offset++;
2785                 }
2786         }
2787 }
2788
2789 static int gfx_v9_1_init_rlc_save_restore_list(struct amdgpu_device *adev)
2790 {
2791         int unique_indirect_regs[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
2792         int unique_indirect_reg_count = 0;
2793
2794         int indirect_start_offsets[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
2795         int indirect_start_offsets_count = 0;
2796
2797         int list_size = 0;
2798         int i = 0, j = 0;
2799         u32 tmp = 0;
2800
2801         u32 *register_list_format =
2802                 kmemdup(adev->gfx.rlc.register_list_format,
2803                         adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
2804         if (!register_list_format)
2805                 return -ENOMEM;
2806
2807         /* setup unique_indirect_regs array and indirect_start_offsets array */
2808         unique_indirect_reg_count = ARRAY_SIZE(unique_indirect_regs);
2809         gfx_v9_1_parse_ind_reg_list(register_list_format,
2810                                     adev->gfx.rlc.reg_list_format_direct_reg_list_length,
2811                                     adev->gfx.rlc.reg_list_format_size_bytes >> 2,
2812                                     unique_indirect_regs,
2813                                     unique_indirect_reg_count,
2814                                     indirect_start_offsets,
2815                                     &indirect_start_offsets_count,
2816                                     ARRAY_SIZE(indirect_start_offsets));
2817
2818         /* enable auto inc in case it is disabled */
2819         tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL));
2820         tmp |= RLC_SRM_CNTL__AUTO_INCR_ADDR_MASK;
2821         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL), tmp);
2822
2823         /* write register_restore table to offset 0x0 using RLC_SRM_ARAM_ADDR/DATA */
2824         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_ADDR),
2825                 RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET);
2826         for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
2827                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_DATA),
2828                         adev->gfx.rlc.register_restore[i]);
2829
2830         /* load indirect register */
2831         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2832                 adev->gfx.rlc.reg_list_format_start);
2833
2834         /* direct register portion */
2835         for (i = 0; i < adev->gfx.rlc.reg_list_format_direct_reg_list_length; i++)
2836                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA),
2837                         register_list_format[i]);
2838
2839         /* indirect register portion */
2840         while (i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2)) {
2841                 if (register_list_format[i] == 0xFFFFFFFF) {
2842                         WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2843                         continue;
2844                 }
2845
2846                 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2847                 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2848
2849                 for (j = 0; j < unique_indirect_reg_count; j++) {
2850                         if (register_list_format[i] == unique_indirect_regs[j]) {
2851                                 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, j);
2852                                 break;
2853                         }
2854                 }
2855
2856                 BUG_ON(j >= unique_indirect_reg_count);
2857
2858                 i++;
2859         }
2860
2861         /* set save/restore list size */
2862         list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
2863         list_size = list_size >> 1;
2864         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2865                 adev->gfx.rlc.reg_restore_list_size);
2866         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA), list_size);
2867
2868         /* write the starting offsets to RLC scratch ram */
2869         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2870                 adev->gfx.rlc.starting_offsets_start);
2871         for (i = 0; i < ARRAY_SIZE(indirect_start_offsets); i++)
2872                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA),
2873                        indirect_start_offsets[i]);
2874
2875         /* load unique indirect regs*/
2876         for (i = 0; i < ARRAY_SIZE(unique_indirect_regs); i++) {
2877                 if (unique_indirect_regs[i] != 0) {
2878                         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_ADDR_0)
2879                                + GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[i],
2880                                unique_indirect_regs[i] & 0x3FFFF);
2881
2882                         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_DATA_0)
2883                                + GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[i],
2884                                unique_indirect_regs[i] >> 20);
2885                 }
2886         }
2887
2888         kfree(register_list_format);
2889         return 0;
2890 }
2891
2892 static void gfx_v9_0_enable_save_restore_machine(struct amdgpu_device *adev)
2893 {
2894         WREG32_FIELD15(GC, 0, RLC_SRM_CNTL, SRM_ENABLE, 1);
2895 }
2896
2897 static void pwr_10_0_gfxip_control_over_cgpg(struct amdgpu_device *adev,
2898                                              bool enable)
2899 {
2900         uint32_t data = 0;
2901         uint32_t default_data = 0;
2902
2903         default_data = data = RREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS));
2904         if (enable) {
2905                 /* enable GFXIP control over CGPG */
2906                 data |= PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK;
2907                 if(default_data != data)
2908                         WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2909
2910                 /* update status */
2911                 data &= ~PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK;
2912                 data |= (2 << PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT);
2913                 if(default_data != data)
2914                         WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2915         } else {
2916                 /* restore GFXIP control over GCPG */
2917                 data &= ~PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK;
2918                 if(default_data != data)
2919                         WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2920         }
2921 }
2922
2923 static void gfx_v9_0_init_gfx_power_gating(struct amdgpu_device *adev)
2924 {
2925         uint32_t data = 0;
2926
2927         if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
2928                               AMD_PG_SUPPORT_GFX_SMG |
2929                               AMD_PG_SUPPORT_GFX_DMG)) {
2930                 /* init IDLE_POLL_COUNT = 60 */
2931                 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL));
2932                 data &= ~CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT_MASK;
2933                 data |= (0x60 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
2934                 WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL), data);
2935
2936                 /* init RLC PG Delay */
2937                 data = 0;
2938                 data |= (0x10 << RLC_PG_DELAY__POWER_UP_DELAY__SHIFT);
2939                 data |= (0x10 << RLC_PG_DELAY__POWER_DOWN_DELAY__SHIFT);
2940                 data |= (0x10 << RLC_PG_DELAY__CMD_PROPAGATE_DELAY__SHIFT);
2941                 data |= (0x40 << RLC_PG_DELAY__MEM_SLEEP_DELAY__SHIFT);
2942                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY), data);
2943
2944                 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2));
2945                 data &= ~RLC_PG_DELAY_2__SERDES_CMD_DELAY_MASK;
2946                 data |= (0x4 << RLC_PG_DELAY_2__SERDES_CMD_DELAY__SHIFT);
2947                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2), data);
2948
2949                 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3));
2950                 data &= ~RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG_MASK;
2951                 data |= (0xff << RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG__SHIFT);
2952                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3), data);
2953
2954                 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL));
2955                 data &= ~RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD_MASK;
2956
2957                 /* program GRBM_REG_SAVE_GFX_IDLE_THRESHOLD to 0x55f0 */
2958                 data |= (0x55f0 << RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD__SHIFT);
2959                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL), data);
2960                 if (amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 3, 0))
2961                         pwr_10_0_gfxip_control_over_cgpg(adev, true);
2962         }
2963 }
2964
2965 static void gfx_v9_0_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
2966                                                 bool enable)
2967 {
2968         uint32_t data = 0;
2969         uint32_t default_data = 0;
2970
2971         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2972         data = REG_SET_FIELD(data, RLC_PG_CNTL,
2973                              SMU_CLK_SLOWDOWN_ON_PU_ENABLE,
2974                              enable ? 1 : 0);
2975         if (default_data != data)
2976                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2977 }
2978
2979 static void gfx_v9_0_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
2980                                                 bool enable)
2981 {
2982         uint32_t data = 0;
2983         uint32_t default_data = 0;
2984
2985         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2986         data = REG_SET_FIELD(data, RLC_PG_CNTL,
2987                              SMU_CLK_SLOWDOWN_ON_PD_ENABLE,
2988                              enable ? 1 : 0);
2989         if(default_data != data)
2990                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2991 }
2992
2993 static void gfx_v9_0_enable_cp_power_gating(struct amdgpu_device *adev,
2994                                         bool enable)
2995 {
2996         uint32_t data = 0;
2997         uint32_t default_data = 0;
2998
2999         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
3000         data = REG_SET_FIELD(data, RLC_PG_CNTL,
3001                              CP_PG_DISABLE,
3002                              enable ? 0 : 1);
3003         if(default_data != data)
3004                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
3005 }
3006
3007 static void gfx_v9_0_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
3008                                                 bool enable)
3009 {
3010         uint32_t data, default_data;
3011
3012         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
3013         data = REG_SET_FIELD(data, RLC_PG_CNTL,
3014                              GFX_POWER_GATING_ENABLE,
3015                              enable ? 1 : 0);
3016         if(default_data != data)
3017                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
3018 }
3019
3020 static void gfx_v9_0_enable_gfx_pipeline_powergating(struct amdgpu_device *adev,
3021                                                 bool enable)
3022 {
3023         uint32_t data, default_data;
3024
3025         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
3026         data = REG_SET_FIELD(data, RLC_PG_CNTL,
3027                              GFX_PIPELINE_PG_ENABLE,
3028                              enable ? 1 : 0);
3029         if(default_data != data)
3030                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
3031
3032         if (!enable)
3033                 /* read any GFX register to wake up GFX */
3034                 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmDB_RENDER_CONTROL));
3035 }
3036
3037 static void gfx_v9_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
3038                                                        bool enable)
3039 {
3040         uint32_t data, default_data;
3041
3042         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
3043         data = REG_SET_FIELD(data, RLC_PG_CNTL,
3044                              STATIC_PER_CU_PG_ENABLE,
3045                              enable ? 1 : 0);
3046         if(default_data != data)
3047                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
3048 }
3049
3050 static void gfx_v9_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
3051                                                 bool enable)
3052 {
3053         uint32_t data, default_data;
3054
3055         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
3056         data = REG_SET_FIELD(data, RLC_PG_CNTL,
3057                              DYN_PER_CU_PG_ENABLE,
3058                              enable ? 1 : 0);
3059         if(default_data != data)
3060                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
3061 }
3062
3063 static void gfx_v9_0_init_pg(struct amdgpu_device *adev)
3064 {
3065         gfx_v9_0_init_csb(adev);
3066
3067         /*
3068          * Rlc save restore list is workable since v2_1.
3069          * And it's needed by gfxoff feature.
3070          */
3071         if (adev->gfx.rlc.is_rlc_v2_1) {
3072                 if (amdgpu_ip_version(adev, GC_HWIP, 0) ==
3073                             IP_VERSION(9, 2, 1) ||
3074                     (adev->apu_flags & AMD_APU_IS_RAVEN2))
3075                         gfx_v9_1_init_rlc_save_restore_list(adev);
3076                 gfx_v9_0_enable_save_restore_machine(adev);
3077         }
3078
3079         if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
3080                               AMD_PG_SUPPORT_GFX_SMG |
3081                               AMD_PG_SUPPORT_GFX_DMG |
3082                               AMD_PG_SUPPORT_CP |
3083                               AMD_PG_SUPPORT_GDS |
3084                               AMD_PG_SUPPORT_RLC_SMU_HS)) {
3085                 WREG32_SOC15(GC, 0, mmRLC_JUMP_TABLE_RESTORE,
3086                              adev->gfx.rlc.cp_table_gpu_addr >> 8);
3087                 gfx_v9_0_init_gfx_power_gating(adev);
3088         }
3089 }
3090
3091 static void gfx_v9_0_rlc_stop(struct amdgpu_device *adev)
3092 {
3093         WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 0);
3094         gfx_v9_0_enable_gui_idle_interrupt(adev, false);
3095         gfx_v9_0_wait_for_rlc_serdes(adev);
3096 }
3097
3098 static void gfx_v9_0_rlc_reset(struct amdgpu_device *adev)
3099 {
3100         WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
3101         udelay(50);
3102         WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
3103         udelay(50);
3104 }
3105
3106 static void gfx_v9_0_rlc_start(struct amdgpu_device *adev)
3107 {
3108 #ifdef AMDGPU_RLC_DEBUG_RETRY
3109         u32 rlc_ucode_ver;
3110 #endif
3111
3112         WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 1);
3113         udelay(50);
3114
3115         /* carrizo do enable cp interrupt after cp inited */
3116         if (!(adev->flags & AMD_IS_APU)) {
3117                 gfx_v9_0_enable_gui_idle_interrupt(adev, true);
3118                 udelay(50);
3119         }
3120
3121 #ifdef AMDGPU_RLC_DEBUG_RETRY
3122         /* RLC_GPM_GENERAL_6 : RLC Ucode version */
3123         rlc_ucode_ver = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_6);
3124         if(rlc_ucode_ver == 0x108) {
3125                 DRM_INFO("Using rlc debug ucode. mmRLC_GPM_GENERAL_6 ==0x08%x / fw_ver == %i \n",
3126                                 rlc_ucode_ver, adev->gfx.rlc_fw_version);
3127                 /* RLC_GPM_TIMER_INT_3 : Timer interval in RefCLK cycles,
3128                  * default is 0x9C4 to create a 100us interval */
3129                 WREG32_SOC15(GC, 0, mmRLC_GPM_TIMER_INT_3, 0x9C4);
3130                 /* RLC_GPM_GENERAL_12 : Minimum gap between wptr and rptr
3131                  * to disable the page fault retry interrupts, default is
3132                  * 0x100 (256) */
3133                 WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_12, 0x100);
3134         }
3135 #endif
3136 }
3137
3138 static int gfx_v9_0_rlc_load_microcode(struct amdgpu_device *adev)
3139 {
3140         const struct rlc_firmware_header_v2_0 *hdr;
3141         const __le32 *fw_data;
3142         unsigned i, fw_size;
3143
3144         if (!adev->gfx.rlc_fw)
3145                 return -EINVAL;
3146
3147         hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
3148         amdgpu_ucode_print_rlc_hdr(&hdr->header);
3149
3150         fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
3151                            le32_to_cpu(hdr->header.ucode_array_offset_bytes));
3152         fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
3153
3154         WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR,
3155                         RLCG_UCODE_LOADING_START_ADDRESS);
3156         for (i = 0; i < fw_size; i++)
3157                 WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
3158         WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
3159
3160         return 0;
3161 }
3162
3163 static int gfx_v9_0_rlc_resume(struct amdgpu_device *adev)
3164 {
3165         int r;
3166
3167         if (amdgpu_sriov_vf(adev)) {
3168                 gfx_v9_0_init_csb(adev);
3169                 return 0;
3170         }
3171
3172         adev->gfx.rlc.funcs->stop(adev);
3173
3174         /* disable CG */
3175         WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, 0);
3176
3177         gfx_v9_0_init_pg(adev);
3178
3179         if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
3180                 /* legacy rlc firmware loading */
3181                 r = gfx_v9_0_rlc_load_microcode(adev);
3182                 if (r)
3183                         return r;
3184         }
3185
3186         switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
3187         case IP_VERSION(9, 2, 2):
3188         case IP_VERSION(9, 1, 0):
3189                 gfx_v9_0_init_lbpw(adev);
3190                 if (amdgpu_lbpw == 0)
3191                         gfx_v9_0_enable_lbpw(adev, false);
3192                 else
3193                         gfx_v9_0_enable_lbpw(adev, true);
3194                 break;
3195         case IP_VERSION(9, 4, 0):
3196                 gfx_v9_4_init_lbpw(adev);
3197                 if (amdgpu_lbpw > 0)
3198                         gfx_v9_0_enable_lbpw(adev, true);
3199                 else
3200                         gfx_v9_0_enable_lbpw(adev, false);
3201                 break;
3202         default:
3203                 break;
3204         }
3205
3206         gfx_v9_0_update_spm_vmid_internal(adev, 0xf);
3207
3208         adev->gfx.rlc.funcs->start(adev);
3209
3210         return 0;
3211 }
3212
3213 static void gfx_v9_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
3214 {
3215         u32 tmp = RREG32_SOC15(GC, 0, mmCP_ME_CNTL);
3216
3217         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_INVALIDATE_ICACHE, enable ? 0 : 1);
3218         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_INVALIDATE_ICACHE, enable ? 0 : 1);
3219         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_INVALIDATE_ICACHE, enable ? 0 : 1);
3220         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_PIPE0_RESET, enable ? 0 : 1);
3221         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_PIPE1_RESET, enable ? 0 : 1);
3222         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_PIPE0_RESET, enable ? 0 : 1);
3223         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_PIPE1_RESET, enable ? 0 : 1);
3224         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_PIPE0_RESET, enable ? 0 : 1);
3225         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_PIPE1_RESET, enable ? 0 : 1);
3226         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, enable ? 0 : 1);
3227         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, enable ? 0 : 1);
3228         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, enable ? 0 : 1);
3229         WREG32_SOC15_RLC(GC, 0, mmCP_ME_CNTL, tmp);
3230         udelay(50);
3231 }
3232
3233 static int gfx_v9_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
3234 {
3235         const struct gfx_firmware_header_v1_0 *pfp_hdr;
3236         const struct gfx_firmware_header_v1_0 *ce_hdr;
3237         const struct gfx_firmware_header_v1_0 *me_hdr;
3238         const __le32 *fw_data;
3239         unsigned i, fw_size;
3240
3241         if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
3242                 return -EINVAL;
3243
3244         pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
3245                 adev->gfx.pfp_fw->data;
3246         ce_hdr = (const struct gfx_firmware_header_v1_0 *)
3247                 adev->gfx.ce_fw->data;
3248         me_hdr = (const struct gfx_firmware_header_v1_0 *)
3249                 adev->gfx.me_fw->data;
3250
3251         amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
3252         amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
3253         amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
3254
3255         gfx_v9_0_cp_gfx_enable(adev, false);
3256
3257         /* PFP */
3258         fw_data = (const __le32 *)
3259                 (adev->gfx.pfp_fw->data +
3260                  le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
3261         fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
3262         WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, 0);
3263         for (i = 0; i < fw_size; i++)
3264                 WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
3265         WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
3266
3267         /* CE */
3268         fw_data = (const __le32 *)
3269                 (adev->gfx.ce_fw->data +
3270                  le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
3271         fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
3272         WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, 0);
3273         for (i = 0; i < fw_size; i++)
3274                 WREG32_SOC15(GC, 0, mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
3275         WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
3276
3277         /* ME */
3278         fw_data = (const __le32 *)
3279                 (adev->gfx.me_fw->data +
3280                  le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
3281         fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
3282         WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, 0);
3283         for (i = 0; i < fw_size; i++)
3284                 WREG32_SOC15(GC, 0, mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
3285         WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version);
3286
3287         return 0;
3288 }
3289
3290 static int gfx_v9_0_cp_gfx_start(struct amdgpu_device *adev)
3291 {
3292         struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
3293         const struct cs_section_def *sect = NULL;
3294         const struct cs_extent_def *ext = NULL;
3295         int r, i, tmp;
3296
3297         /* init the CP */
3298         WREG32_SOC15(GC, 0, mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
3299         WREG32_SOC15(GC, 0, mmCP_DEVICE_ID, 1);
3300
3301         gfx_v9_0_cp_gfx_enable(adev, true);
3302
3303         /* Now only limit the quirk on the APU gfx9 series and already
3304          * confirmed that the APU gfx10/gfx11 needn't such update.
3305          */
3306         if (adev->flags & AMD_IS_APU &&
3307                         adev->in_s3 && !pm_resume_via_firmware()) {
3308                 DRM_INFO("Will skip the CSB packet resubmit\n");
3309                 return 0;
3310         }
3311         r = amdgpu_ring_alloc(ring, gfx_v9_0_get_csb_size(adev) + 4 + 3);
3312         if (r) {
3313                 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
3314                 return r;
3315         }
3316
3317         amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3318         amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3319
3320         amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
3321         amdgpu_ring_write(ring, 0x80000000);
3322         amdgpu_ring_write(ring, 0x80000000);
3323
3324         for (sect = gfx9_cs_data; sect->section != NULL; ++sect) {
3325                 for (ext = sect->section; ext->extent != NULL; ++ext) {
3326                         if (sect->id == SECT_CONTEXT) {
3327                                 amdgpu_ring_write(ring,
3328                                        PACKET3(PACKET3_SET_CONTEXT_REG,
3329                                                ext->reg_count));
3330                                 amdgpu_ring_write(ring,
3331                                        ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
3332                                 for (i = 0; i < ext->reg_count; i++)
3333                                         amdgpu_ring_write(ring, ext->extent[i]);
3334                         }
3335                 }
3336         }
3337
3338         amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3339         amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
3340
3341         amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3342         amdgpu_ring_write(ring, 0);
3343
3344         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
3345         amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
3346         amdgpu_ring_write(ring, 0x8000);
3347         amdgpu_ring_write(ring, 0x8000);
3348
3349         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG,1));
3350         tmp = (PACKET3_SET_UCONFIG_REG_INDEX_TYPE |
3351                 (SOC15_REG_OFFSET(GC, 0, mmVGT_INDEX_TYPE) - PACKET3_SET_UCONFIG_REG_START));
3352         amdgpu_ring_write(ring, tmp);
3353         amdgpu_ring_write(ring, 0);
3354
3355         amdgpu_ring_commit(ring);
3356
3357         return 0;
3358 }
3359
3360 static int gfx_v9_0_cp_gfx_resume(struct amdgpu_device *adev)
3361 {
3362         struct amdgpu_ring *ring;
3363         u32 tmp;
3364         u32 rb_bufsz;
3365         u64 rb_addr, rptr_addr, wptr_gpu_addr;
3366
3367         /* Set the write pointer delay */
3368         WREG32_SOC15(GC, 0, mmCP_RB_WPTR_DELAY, 0);
3369
3370         /* set the RB to use vmid 0 */
3371         WREG32_SOC15(GC, 0, mmCP_RB_VMID, 0);
3372
3373         /* Set ring buffer size */
3374         ring = &adev->gfx.gfx_ring[0];
3375         rb_bufsz = order_base_2(ring->ring_size / 8);
3376         tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
3377         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
3378 #ifdef __BIG_ENDIAN
3379         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
3380 #endif
3381         WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp);
3382
3383         /* Initialize the ring buffer's write pointers */
3384         ring->wptr = 0;
3385         WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
3386         WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
3387
3388         /* set the wb address whether it's enabled or not */
3389         rptr_addr = ring->rptr_gpu_addr;
3390         WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
3391         WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & CP_RB_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK);
3392
3393         wptr_gpu_addr = ring->wptr_gpu_addr;
3394         WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr));
3395         WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr));
3396
3397         mdelay(1);
3398         WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp);
3399
3400         rb_addr = ring->gpu_addr >> 8;
3401         WREG32_SOC15(GC, 0, mmCP_RB0_BASE, rb_addr);
3402         WREG32_SOC15(GC, 0, mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
3403
3404         tmp = RREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL);
3405         if (ring->use_doorbell) {
3406                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3407                                     DOORBELL_OFFSET, ring->doorbell_index);
3408                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3409                                     DOORBELL_EN, 1);
3410         } else {
3411                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0);
3412         }
3413         WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL, tmp);
3414
3415         tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
3416                         DOORBELL_RANGE_LOWER, ring->doorbell_index);
3417         WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
3418
3419         WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_UPPER,
3420                        CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
3421
3422
3423         /* start the ring */
3424         gfx_v9_0_cp_gfx_start(adev);
3425
3426         return 0;
3427 }
3428
3429 static void gfx_v9_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
3430 {
3431         if (enable) {
3432                 WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL, 0);
3433         } else {
3434                 WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL,
3435                                  (CP_MEC_CNTL__MEC_INVALIDATE_ICACHE_MASK |
3436                                   CP_MEC_CNTL__MEC_ME1_PIPE0_RESET_MASK |
3437                                   CP_MEC_CNTL__MEC_ME1_PIPE1_RESET_MASK |
3438                                   CP_MEC_CNTL__MEC_ME1_PIPE2_RESET_MASK |
3439                                   CP_MEC_CNTL__MEC_ME1_PIPE3_RESET_MASK |
3440                                   CP_MEC_CNTL__MEC_ME2_PIPE0_RESET_MASK |
3441                                   CP_MEC_CNTL__MEC_ME2_PIPE1_RESET_MASK |
3442                                   CP_MEC_CNTL__MEC_ME1_HALT_MASK |
3443                                   CP_MEC_CNTL__MEC_ME2_HALT_MASK));
3444                 adev->gfx.kiq[0].ring.sched.ready = false;
3445         }
3446         udelay(50);
3447 }
3448
3449 static int gfx_v9_0_cp_compute_load_microcode(struct amdgpu_device *adev)
3450 {
3451         const struct gfx_firmware_header_v1_0 *mec_hdr;
3452         const __le32 *fw_data;
3453         unsigned i;
3454         u32 tmp;
3455
3456         if (!adev->gfx.mec_fw)
3457                 return -EINVAL;
3458
3459         gfx_v9_0_cp_compute_enable(adev, false);
3460
3461         mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
3462         amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
3463
3464         fw_data = (const __le32 *)
3465                 (adev->gfx.mec_fw->data +
3466                  le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
3467         tmp = 0;
3468         tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, VMID, 0);
3469         tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0);
3470         WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_CNTL, tmp);
3471
3472         WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_LO,
3473                 adev->gfx.mec.mec_fw_gpu_addr & 0xFFFFF000);
3474         WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_HI,
3475                 upper_32_bits(adev->gfx.mec.mec_fw_gpu_addr));
3476
3477         /* MEC1 */
3478         WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR,
3479                          mec_hdr->jt_offset);
3480         for (i = 0; i < mec_hdr->jt_size; i++)
3481                 WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_DATA,
3482                         le32_to_cpup(fw_data + mec_hdr->jt_offset + i));
3483
3484         WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR,
3485                         adev->gfx.mec_fw_version);
3486         /* Todo : Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */
3487
3488         return 0;
3489 }
3490
3491 /* KIQ functions */
3492 static void gfx_v9_0_kiq_setting(struct amdgpu_ring *ring)
3493 {
3494         uint32_t tmp;
3495         struct amdgpu_device *adev = ring->adev;
3496
3497         /* tell RLC which is KIQ queue */
3498         tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS);
3499         tmp &= 0xffffff00;
3500         tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
3501         WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp | 0x80);
3502 }
3503
3504 static void gfx_v9_0_mqd_set_priority(struct amdgpu_ring *ring, struct v9_mqd *mqd)
3505 {
3506         struct amdgpu_device *adev = ring->adev;
3507
3508         if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
3509                 if (amdgpu_gfx_is_high_priority_compute_queue(adev, ring)) {
3510                         mqd->cp_hqd_pipe_priority = AMDGPU_GFX_PIPE_PRIO_HIGH;
3511                         mqd->cp_hqd_queue_priority =
3512                                 AMDGPU_GFX_QUEUE_PRIORITY_MAXIMUM;
3513                 }
3514         }
3515 }
3516
3517 static int gfx_v9_0_mqd_init(struct amdgpu_ring *ring)
3518 {
3519         struct amdgpu_device *adev = ring->adev;
3520         struct v9_mqd *mqd = ring->mqd_ptr;
3521         uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
3522         uint32_t tmp;
3523
3524         mqd->header = 0xC0310800;
3525         mqd->compute_pipelinestat_enable = 0x00000001;
3526         mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
3527         mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
3528         mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
3529         mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
3530         mqd->compute_static_thread_mgmt_se4 = 0xffffffff;
3531         mqd->compute_static_thread_mgmt_se5 = 0xffffffff;
3532         mqd->compute_static_thread_mgmt_se6 = 0xffffffff;
3533         mqd->compute_static_thread_mgmt_se7 = 0xffffffff;
3534         mqd->compute_misc_reserved = 0x00000003;
3535
3536         mqd->dynamic_cu_mask_addr_lo =
3537                 lower_32_bits(ring->mqd_gpu_addr
3538                               + offsetof(struct v9_mqd_allocation, dynamic_cu_mask));
3539         mqd->dynamic_cu_mask_addr_hi =
3540                 upper_32_bits(ring->mqd_gpu_addr
3541                               + offsetof(struct v9_mqd_allocation, dynamic_cu_mask));
3542
3543         eop_base_addr = ring->eop_gpu_addr >> 8;
3544         mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
3545         mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
3546
3547         /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
3548         tmp = RREG32_SOC15(GC, 0, mmCP_HQD_EOP_CONTROL);
3549         tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
3550                         (order_base_2(GFX9_MEC_HPD_SIZE / 4) - 1));
3551
3552         mqd->cp_hqd_eop_control = tmp;
3553
3554         /* enable doorbell? */
3555         tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
3556
3557         if (ring->use_doorbell) {
3558                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3559                                     DOORBELL_OFFSET, ring->doorbell_index);
3560                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3561                                     DOORBELL_EN, 1);
3562                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3563                                     DOORBELL_SOURCE, 0);
3564                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3565                                     DOORBELL_HIT, 0);
3566         } else {
3567                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3568                                          DOORBELL_EN, 0);
3569         }
3570
3571         mqd->cp_hqd_pq_doorbell_control = tmp;
3572
3573         /* disable the queue if it's active */
3574         ring->wptr = 0;
3575         mqd->cp_hqd_dequeue_request = 0;
3576         mqd->cp_hqd_pq_rptr = 0;
3577         mqd->cp_hqd_pq_wptr_lo = 0;
3578         mqd->cp_hqd_pq_wptr_hi = 0;
3579
3580         /* set the pointer to the MQD */
3581         mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
3582         mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
3583
3584         /* set MQD vmid to 0 */
3585         tmp = RREG32_SOC15(GC, 0, mmCP_MQD_CONTROL);
3586         tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
3587         mqd->cp_mqd_control = tmp;
3588
3589         /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3590         hqd_gpu_addr = ring->gpu_addr >> 8;
3591         mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
3592         mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
3593
3594         /* set up the HQD, this is similar to CP_RB0_CNTL */
3595         tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_CONTROL);
3596         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
3597                             (order_base_2(ring->ring_size / 4) - 1));
3598         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
3599                         (order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1));
3600 #ifdef __BIG_ENDIAN
3601         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
3602 #endif
3603         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
3604         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
3605         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
3606         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
3607         mqd->cp_hqd_pq_control = tmp;
3608
3609         /* set the wb address whether it's enabled or not */
3610         wb_gpu_addr = ring->rptr_gpu_addr;
3611         mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
3612         mqd->cp_hqd_pq_rptr_report_addr_hi =
3613                 upper_32_bits(wb_gpu_addr) & 0xffff;
3614
3615         /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
3616         wb_gpu_addr = ring->wptr_gpu_addr;
3617         mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
3618         mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
3619
3620         /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3621         ring->wptr = 0;
3622         mqd->cp_hqd_pq_rptr = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR);
3623
3624         /* set the vmid for the queue */
3625         mqd->cp_hqd_vmid = 0;
3626
3627         tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PERSISTENT_STATE);
3628         tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
3629         mqd->cp_hqd_persistent_state = tmp;
3630
3631         /* set MIN_IB_AVAIL_SIZE */
3632         tmp = RREG32_SOC15(GC, 0, mmCP_HQD_IB_CONTROL);
3633         tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
3634         mqd->cp_hqd_ib_control = tmp;
3635
3636         /* set static priority for a queue/ring */
3637         gfx_v9_0_mqd_set_priority(ring, mqd);
3638         mqd->cp_hqd_quantum = RREG32_SOC15(GC, 0, mmCP_HQD_QUANTUM);
3639
3640         /* map_queues packet doesn't need activate the queue,
3641          * so only kiq need set this field.
3642          */
3643         if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)
3644                 mqd->cp_hqd_active = 1;
3645
3646         return 0;
3647 }
3648
3649 static int gfx_v9_0_kiq_init_register(struct amdgpu_ring *ring)
3650 {
3651         struct amdgpu_device *adev = ring->adev;
3652         struct v9_mqd *mqd = ring->mqd_ptr;
3653         int j;
3654
3655         /* disable wptr polling */
3656         WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
3657
3658         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR,
3659                mqd->cp_hqd_eop_base_addr_lo);
3660         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI,
3661                mqd->cp_hqd_eop_base_addr_hi);
3662
3663         /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
3664         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_CONTROL,
3665                mqd->cp_hqd_eop_control);
3666
3667         /* enable doorbell? */
3668         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
3669                mqd->cp_hqd_pq_doorbell_control);
3670
3671         /* disable the queue if it's active */
3672         if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) {
3673                 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
3674                 for (j = 0; j < adev->usec_timeout; j++) {
3675                         if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
3676                                 break;
3677                         udelay(1);
3678                 }
3679                 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST,
3680                        mqd->cp_hqd_dequeue_request);
3681                 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR,
3682                        mqd->cp_hqd_pq_rptr);
3683                 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO,
3684                        mqd->cp_hqd_pq_wptr_lo);
3685                 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI,
3686                        mqd->cp_hqd_pq_wptr_hi);
3687         }
3688
3689         /* set the pointer to the MQD */
3690         WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR,
3691                mqd->cp_mqd_base_addr_lo);
3692         WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR_HI,
3693                mqd->cp_mqd_base_addr_hi);
3694
3695         /* set MQD vmid to 0 */
3696         WREG32_SOC15_RLC(GC, 0, mmCP_MQD_CONTROL,
3697                mqd->cp_mqd_control);
3698
3699         /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3700         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE,
3701                mqd->cp_hqd_pq_base_lo);
3702         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE_HI,
3703                mqd->cp_hqd_pq_base_hi);
3704
3705         /* set up the HQD, this is similar to CP_RB0_CNTL */
3706         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_CONTROL,
3707                mqd->cp_hqd_pq_control);
3708
3709         /* set the wb address whether it's enabled or not */
3710         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR,
3711                                 mqd->cp_hqd_pq_rptr_report_addr_lo);
3712         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
3713                                 mqd->cp_hqd_pq_rptr_report_addr_hi);
3714
3715         /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
3716         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR,
3717                mqd->cp_hqd_pq_wptr_poll_addr_lo);
3718         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
3719                mqd->cp_hqd_pq_wptr_poll_addr_hi);
3720
3721         /* enable the doorbell if requested */
3722         if (ring->use_doorbell) {
3723                 WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_LOWER,
3724                                         (adev->doorbell_index.kiq * 2) << 2);
3725                 /* If GC has entered CGPG, ringing doorbell > first page
3726                  * doesn't wakeup GC. Enlarge CP_MEC_DOORBELL_RANGE_UPPER to
3727                  * workaround this issue. And this change has to align with firmware
3728                  * update.
3729                  */
3730                 if (check_if_enlarge_doorbell_range(adev))
3731                         WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER,
3732                                         (adev->doorbell.size - 4));
3733                 else
3734                         WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER,
3735                                         (adev->doorbell_index.userqueue_end * 2) << 2);
3736         }
3737
3738         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
3739                mqd->cp_hqd_pq_doorbell_control);
3740
3741         /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3742         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO,
3743                mqd->cp_hqd_pq_wptr_lo);
3744         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI,
3745                mqd->cp_hqd_pq_wptr_hi);
3746
3747         /* set the vmid for the queue */
3748         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_VMID, mqd->cp_hqd_vmid);
3749
3750         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE,
3751                mqd->cp_hqd_persistent_state);
3752
3753         /* activate the queue */
3754         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE,
3755                mqd->cp_hqd_active);
3756
3757         if (ring->use_doorbell)
3758                 WREG32_FIELD15(GC, 0, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
3759
3760         return 0;
3761 }
3762
3763 static int gfx_v9_0_kiq_fini_register(struct amdgpu_ring *ring)
3764 {
3765         struct amdgpu_device *adev = ring->adev;
3766         int j;
3767
3768         /* disable the queue if it's active */
3769         if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) {
3770
3771                 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
3772
3773                 for (j = 0; j < adev->usec_timeout; j++) {
3774                         if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
3775                                 break;
3776                         udelay(1);
3777                 }
3778
3779                 if (j == AMDGPU_MAX_USEC_TIMEOUT) {
3780                         DRM_DEBUG("KIQ dequeue request failed.\n");
3781
3782                         /* Manual disable if dequeue request times out */
3783                         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE, 0);
3784                 }
3785
3786                 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST,
3787                       0);
3788         }
3789
3790         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IQ_TIMER, 0);
3791         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IB_CONTROL, 0);
3792         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE, 0);
3793         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0x40000000);
3794         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0);
3795         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR, 0);
3796         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI, 0);
3797         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO, 0);
3798
3799         return 0;
3800 }
3801
3802 static int gfx_v9_0_kiq_init_queue(struct amdgpu_ring *ring)
3803 {
3804         struct amdgpu_device *adev = ring->adev;
3805         struct v9_mqd *mqd = ring->mqd_ptr;
3806         struct v9_mqd *tmp_mqd;
3807
3808         gfx_v9_0_kiq_setting(ring);
3809
3810         /* GPU could be in bad state during probe, driver trigger the reset
3811          * after load the SMU, in this case , the mqd is not be initialized.
3812          * driver need to re-init the mqd.
3813          * check mqd->cp_hqd_pq_control since this value should not be 0
3814          */
3815         tmp_mqd = (struct v9_mqd *)adev->gfx.kiq[0].mqd_backup;
3816         if (amdgpu_in_reset(adev) && tmp_mqd->cp_hqd_pq_control){
3817                 /* for GPU_RESET case , reset MQD to a clean status */
3818                 if (adev->gfx.kiq[0].mqd_backup)
3819                         memcpy(mqd, adev->gfx.kiq[0].mqd_backup, sizeof(struct v9_mqd_allocation));
3820
3821                 /* reset ring buffer */
3822                 ring->wptr = 0;
3823                 amdgpu_ring_clear_ring(ring);
3824
3825                 mutex_lock(&adev->srbm_mutex);
3826                 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0, 0);
3827                 gfx_v9_0_kiq_init_register(ring);
3828                 soc15_grbm_select(adev, 0, 0, 0, 0, 0);
3829                 mutex_unlock(&adev->srbm_mutex);
3830         } else {
3831                 memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation));
3832                 ((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
3833                 ((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
3834                 if (amdgpu_sriov_vf(adev) && adev->in_suspend)
3835                         amdgpu_ring_clear_ring(ring);
3836                 mutex_lock(&adev->srbm_mutex);
3837                 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0, 0);
3838                 gfx_v9_0_mqd_init(ring);
3839                 gfx_v9_0_kiq_init_register(ring);
3840                 soc15_grbm_select(adev, 0, 0, 0, 0, 0);
3841                 mutex_unlock(&adev->srbm_mutex);
3842
3843                 if (adev->gfx.kiq[0].mqd_backup)
3844                         memcpy(adev->gfx.kiq[0].mqd_backup, mqd, sizeof(struct v9_mqd_allocation));
3845         }
3846
3847         return 0;
3848 }
3849
3850 static int gfx_v9_0_kcq_init_queue(struct amdgpu_ring *ring, bool restore)
3851 {
3852         struct amdgpu_device *adev = ring->adev;
3853         struct v9_mqd *mqd = ring->mqd_ptr;
3854         int mqd_idx = ring - &adev->gfx.compute_ring[0];
3855         struct v9_mqd *tmp_mqd;
3856
3857         /* Same as above kiq init, driver need to re-init the mqd if mqd->cp_hqd_pq_control
3858          * is not be initialized before
3859          */
3860         tmp_mqd = (struct v9_mqd *)adev->gfx.mec.mqd_backup[mqd_idx];
3861
3862         if (!restore && (!tmp_mqd->cp_hqd_pq_control ||
3863             (!amdgpu_in_reset(adev) && !adev->in_suspend))) {
3864                 memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation));
3865                 ((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
3866                 ((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
3867                 mutex_lock(&adev->srbm_mutex);
3868                 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0, 0);
3869                 gfx_v9_0_mqd_init(ring);
3870                 soc15_grbm_select(adev, 0, 0, 0, 0, 0);
3871                 mutex_unlock(&adev->srbm_mutex);
3872
3873                 if (adev->gfx.mec.mqd_backup[mqd_idx])
3874                         memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation));
3875         } else {
3876                 /* restore MQD to a clean status */
3877                 if (adev->gfx.mec.mqd_backup[mqd_idx])
3878                         memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation));
3879                 /* reset ring buffer */
3880                 ring->wptr = 0;
3881                 atomic64_set((atomic64_t *)ring->wptr_cpu_addr, 0);
3882                 amdgpu_ring_clear_ring(ring);
3883         }
3884
3885         return 0;
3886 }
3887
3888 static int gfx_v9_0_kiq_resume(struct amdgpu_device *adev)
3889 {
3890         struct amdgpu_ring *ring;
3891         int r;
3892
3893         ring = &adev->gfx.kiq[0].ring;
3894
3895         r = amdgpu_bo_reserve(ring->mqd_obj, false);
3896         if (unlikely(r != 0))
3897                 return r;
3898
3899         r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
3900         if (unlikely(r != 0)) {
3901                 amdgpu_bo_unreserve(ring->mqd_obj);
3902                 return r;
3903         }
3904
3905         gfx_v9_0_kiq_init_queue(ring);
3906         amdgpu_bo_kunmap(ring->mqd_obj);
3907         ring->mqd_ptr = NULL;
3908         amdgpu_bo_unreserve(ring->mqd_obj);
3909         return 0;
3910 }
3911
3912 static int gfx_v9_0_kcq_resume(struct amdgpu_device *adev)
3913 {
3914         struct amdgpu_ring *ring = NULL;
3915         int r = 0, i;
3916
3917         gfx_v9_0_cp_compute_enable(adev, true);
3918
3919         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3920                 ring = &adev->gfx.compute_ring[i];
3921
3922                 r = amdgpu_bo_reserve(ring->mqd_obj, false);
3923                 if (unlikely(r != 0))
3924                         goto done;
3925                 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
3926                 if (!r) {
3927                         r = gfx_v9_0_kcq_init_queue(ring, false);
3928                         amdgpu_bo_kunmap(ring->mqd_obj);
3929                         ring->mqd_ptr = NULL;
3930                 }
3931                 amdgpu_bo_unreserve(ring->mqd_obj);
3932                 if (r)
3933                         goto done;
3934         }
3935
3936         r = amdgpu_gfx_enable_kcq(adev, 0);
3937 done:
3938         return r;
3939 }
3940
3941 static int gfx_v9_0_cp_resume(struct amdgpu_device *adev)
3942 {
3943         int r, i;
3944         struct amdgpu_ring *ring;
3945
3946         if (!(adev->flags & AMD_IS_APU))
3947                 gfx_v9_0_enable_gui_idle_interrupt(adev, false);
3948
3949         if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
3950                 if (adev->gfx.num_gfx_rings) {
3951                         /* legacy firmware loading */
3952                         r = gfx_v9_0_cp_gfx_load_microcode(adev);
3953                         if (r)
3954                                 return r;
3955                 }
3956
3957                 r = gfx_v9_0_cp_compute_load_microcode(adev);
3958                 if (r)
3959                         return r;
3960         }
3961
3962         if (adev->gfx.num_gfx_rings)
3963                 gfx_v9_0_cp_gfx_enable(adev, false);
3964         gfx_v9_0_cp_compute_enable(adev, false);
3965
3966         r = gfx_v9_0_kiq_resume(adev);
3967         if (r)
3968                 return r;
3969
3970         if (adev->gfx.num_gfx_rings) {
3971                 r = gfx_v9_0_cp_gfx_resume(adev);
3972                 if (r)
3973                         return r;
3974         }
3975
3976         r = gfx_v9_0_kcq_resume(adev);
3977         if (r)
3978                 return r;
3979
3980         if (adev->gfx.num_gfx_rings) {
3981                 ring = &adev->gfx.gfx_ring[0];
3982                 r = amdgpu_ring_test_helper(ring);
3983                 if (r)
3984                         return r;
3985         }
3986
3987         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3988                 ring = &adev->gfx.compute_ring[i];
3989                 amdgpu_ring_test_helper(ring);
3990         }
3991
3992         gfx_v9_0_enable_gui_idle_interrupt(adev, true);
3993
3994         return 0;
3995 }
3996
3997 static void gfx_v9_0_init_tcp_config(struct amdgpu_device *adev)
3998 {
3999         u32 tmp;
4000
4001         if (amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 4, 1) &&
4002             amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 4, 2))
4003                 return;
4004
4005         tmp = RREG32_SOC15(GC, 0, mmTCP_ADDR_CONFIG);
4006         tmp = REG_SET_FIELD(tmp, TCP_ADDR_CONFIG, ENABLE64KHASH,
4007                                 adev->df.hash_status.hash_64k);
4008         tmp = REG_SET_FIELD(tmp, TCP_ADDR_CONFIG, ENABLE2MHASH,
4009                                 adev->df.hash_status.hash_2m);
4010         tmp = REG_SET_FIELD(tmp, TCP_ADDR_CONFIG, ENABLE1GHASH,
4011                                 adev->df.hash_status.hash_1g);
4012         WREG32_SOC15(GC, 0, mmTCP_ADDR_CONFIG, tmp);
4013 }
4014
4015 static void gfx_v9_0_cp_enable(struct amdgpu_device *adev, bool enable)
4016 {
4017         if (adev->gfx.num_gfx_rings)
4018                 gfx_v9_0_cp_gfx_enable(adev, enable);
4019         gfx_v9_0_cp_compute_enable(adev, enable);
4020 }
4021
4022 static int gfx_v9_0_hw_init(struct amdgpu_ip_block *ip_block)
4023 {
4024         int r;
4025         struct amdgpu_device *adev = ip_block->adev;
4026
4027         amdgpu_gfx_cleaner_shader_init(adev, adev->gfx.cleaner_shader_size,
4028                                        adev->gfx.cleaner_shader_ptr);
4029
4030         if (!amdgpu_sriov_vf(adev))
4031                 gfx_v9_0_init_golden_registers(adev);
4032
4033         gfx_v9_0_constants_init(adev);
4034
4035         gfx_v9_0_init_tcp_config(adev);
4036
4037         r = adev->gfx.rlc.funcs->resume(adev);
4038         if (r)
4039                 return r;
4040
4041         r = gfx_v9_0_cp_resume(adev);
4042         if (r)
4043                 return r;
4044
4045         if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 2))
4046                 gfx_v9_4_2_set_power_brake_sequence(adev);
4047
4048         return r;
4049 }
4050
4051 static int gfx_v9_0_hw_fini(struct amdgpu_ip_block *ip_block)
4052 {
4053         struct amdgpu_device *adev = ip_block->adev;
4054
4055         if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
4056                 amdgpu_irq_put(adev, &adev->gfx.cp_ecc_error_irq, 0);
4057         amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
4058         amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
4059         amdgpu_irq_put(adev, &adev->gfx.bad_op_irq, 0);
4060
4061         /* DF freeze and kcq disable will fail */
4062         if (!amdgpu_ras_intr_triggered())
4063                 /* disable KCQ to avoid CPC touch memory not valid anymore */
4064                 amdgpu_gfx_disable_kcq(adev, 0);
4065
4066         if (amdgpu_sriov_vf(adev)) {
4067                 gfx_v9_0_cp_gfx_enable(adev, false);
4068                 /* must disable polling for SRIOV when hw finished, otherwise
4069                  * CPC engine may still keep fetching WB address which is already
4070                  * invalid after sw finished and trigger DMAR reading error in
4071                  * hypervisor side.
4072                  */
4073                 WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
4074                 return 0;
4075         }
4076
4077         /* Use deinitialize sequence from CAIL when unbinding device from driver,
4078          * otherwise KIQ is hanging when binding back
4079          */
4080         if (!amdgpu_in_reset(adev) && !adev->in_suspend) {
4081                 mutex_lock(&adev->srbm_mutex);
4082                 soc15_grbm_select(adev, adev->gfx.kiq[0].ring.me,
4083                                 adev->gfx.kiq[0].ring.pipe,
4084                                 adev->gfx.kiq[0].ring.queue, 0, 0);
4085                 gfx_v9_0_kiq_fini_register(&adev->gfx.kiq[0].ring);
4086                 soc15_grbm_select(adev, 0, 0, 0, 0, 0);
4087                 mutex_unlock(&adev->srbm_mutex);
4088         }
4089
4090         gfx_v9_0_cp_enable(adev, false);
4091
4092         /* Skip stopping RLC with A+A reset or when RLC controls GFX clock */
4093         if ((adev->gmc.xgmi.connected_to_cpu && amdgpu_in_reset(adev)) ||
4094             (amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(9, 4, 2))) {
4095                 dev_dbg(adev->dev, "Skipping RLC halt\n");
4096                 return 0;
4097         }
4098
4099         adev->gfx.rlc.funcs->stop(adev);
4100         return 0;
4101 }
4102
4103 static int gfx_v9_0_suspend(struct amdgpu_ip_block *ip_block)
4104 {
4105         return gfx_v9_0_hw_fini(ip_block);
4106 }
4107
4108 static int gfx_v9_0_resume(struct amdgpu_ip_block *ip_block)
4109 {
4110         return gfx_v9_0_hw_init(ip_block);
4111 }
4112
4113 static bool gfx_v9_0_is_idle(void *handle)
4114 {
4115         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4116
4117         if (REG_GET_FIELD(RREG32_SOC15(GC, 0, mmGRBM_STATUS),
4118                                 GRBM_STATUS, GUI_ACTIVE))
4119                 return false;
4120         else
4121                 return true;
4122 }
4123
4124 static int gfx_v9_0_wait_for_idle(struct amdgpu_ip_block *ip_block)
4125 {
4126         unsigned i;
4127         struct amdgpu_device *adev = ip_block->adev;
4128
4129         for (i = 0; i < adev->usec_timeout; i++) {
4130                 if (gfx_v9_0_is_idle(adev))
4131                         return 0;
4132                 udelay(1);
4133         }
4134         return -ETIMEDOUT;
4135 }
4136
4137 static int gfx_v9_0_soft_reset(struct amdgpu_ip_block *ip_block)
4138 {
4139         u32 grbm_soft_reset = 0;
4140         u32 tmp;
4141         struct amdgpu_device *adev = ip_block->adev;
4142
4143         /* GRBM_STATUS */
4144         tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS);
4145         if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
4146                    GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
4147                    GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
4148                    GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
4149                    GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
4150                    GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK)) {
4151                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4152                                                 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
4153                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4154                                                 GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
4155         }
4156
4157         if (tmp & (GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
4158                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4159                                                 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
4160         }
4161
4162         /* GRBM_STATUS2 */
4163         tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS2);
4164         if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
4165                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4166                                                 GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
4167
4168
4169         if (grbm_soft_reset) {
4170                 /* stop the rlc */
4171                 adev->gfx.rlc.funcs->stop(adev);
4172
4173                 if (adev->gfx.num_gfx_rings)
4174                         /* Disable GFX parsing/prefetching */
4175                         gfx_v9_0_cp_gfx_enable(adev, false);
4176
4177                 /* Disable MEC parsing/prefetching */
4178                 gfx_v9_0_cp_compute_enable(adev, false);
4179
4180                 if (grbm_soft_reset) {
4181                         tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
4182                         tmp |= grbm_soft_reset;
4183                         dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
4184                         WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
4185                         tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
4186
4187                         udelay(50);
4188
4189                         tmp &= ~grbm_soft_reset;
4190                         WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
4191                         tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
4192                 }
4193
4194                 /* Wait a little for things to settle down */
4195                 udelay(50);
4196         }
4197         return 0;
4198 }
4199
4200 static uint64_t gfx_v9_0_kiq_read_clock(struct amdgpu_device *adev)
4201 {
4202         signed long r, cnt = 0;
4203         unsigned long flags;
4204         uint32_t seq, reg_val_offs = 0;
4205         uint64_t value = 0;
4206         struct amdgpu_kiq *kiq = &adev->gfx.kiq[0];
4207         struct amdgpu_ring *ring = &kiq->ring;
4208
4209         BUG_ON(!ring->funcs->emit_rreg);
4210
4211         spin_lock_irqsave(&kiq->ring_lock, flags);
4212         if (amdgpu_device_wb_get(adev, &reg_val_offs)) {
4213                 pr_err("critical bug! too many kiq readers\n");
4214                 goto failed_unlock;
4215         }
4216         amdgpu_ring_alloc(ring, 32);
4217         amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
4218         amdgpu_ring_write(ring, 9 |     /* src: register*/
4219                                 (5 << 8) |      /* dst: memory */
4220                                 (1 << 16) |     /* count sel */
4221                                 (1 << 20));     /* write confirm */
4222         amdgpu_ring_write(ring, 0);
4223         amdgpu_ring_write(ring, 0);
4224         amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
4225                                 reg_val_offs * 4));
4226         amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
4227                                 reg_val_offs * 4));
4228         r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT);
4229         if (r)
4230                 goto failed_undo;
4231
4232         amdgpu_ring_commit(ring);
4233         spin_unlock_irqrestore(&kiq->ring_lock, flags);
4234
4235         r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
4236
4237         /* don't wait anymore for gpu reset case because this way may
4238          * block gpu_recover() routine forever, e.g. this virt_kiq_rreg
4239          * is triggered in TTM and ttm_bo_lock_delayed_workqueue() will
4240          * never return if we keep waiting in virt_kiq_rreg, which cause
4241          * gpu_recover() hang there.
4242          *
4243          * also don't wait anymore for IRQ context
4244          * */
4245         if (r < 1 && (amdgpu_in_reset(adev)))
4246                 goto failed_kiq_read;
4247
4248         might_sleep();
4249         while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) {
4250                 msleep(MAX_KIQ_REG_BAILOUT_INTERVAL);
4251                 r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
4252         }
4253
4254         if (cnt > MAX_KIQ_REG_TRY)
4255                 goto failed_kiq_read;
4256
4257         mb();
4258         value = (uint64_t)adev->wb.wb[reg_val_offs] |
4259                 (uint64_t)adev->wb.wb[reg_val_offs + 1 ] << 32ULL;
4260         amdgpu_device_wb_free(adev, reg_val_offs);
4261         return value;
4262
4263 failed_undo:
4264         amdgpu_ring_undo(ring);
4265 failed_unlock:
4266         spin_unlock_irqrestore(&kiq->ring_lock, flags);
4267 failed_kiq_read:
4268         if (reg_val_offs)
4269                 amdgpu_device_wb_free(adev, reg_val_offs);
4270         pr_err("failed to read gpu clock\n");
4271         return ~0;
4272 }
4273
4274 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev)
4275 {
4276         uint64_t clock, clock_lo, clock_hi, hi_check;
4277
4278         switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
4279         case IP_VERSION(9, 3, 0):
4280                 preempt_disable();
4281                 clock_hi = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_UPPER_Renoir);
4282                 clock_lo = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_LOWER_Renoir);
4283                 hi_check = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_UPPER_Renoir);
4284                 /* The SMUIO TSC clock frequency is 100MHz, which sets 32-bit carry over
4285                  * roughly every 42 seconds.
4286                  */
4287                 if (hi_check != clock_hi) {
4288                         clock_lo = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_LOWER_Renoir);
4289                         clock_hi = hi_check;
4290                 }
4291                 preempt_enable();
4292                 clock = clock_lo | (clock_hi << 32ULL);
4293                 break;
4294         default:
4295                 amdgpu_gfx_off_ctrl(adev, false);
4296                 mutex_lock(&adev->gfx.gpu_clock_mutex);
4297                 if (amdgpu_ip_version(adev, GC_HWIP, 0) ==
4298                             IP_VERSION(9, 0, 1) &&
4299                     amdgpu_sriov_runtime(adev)) {
4300                         clock = gfx_v9_0_kiq_read_clock(adev);
4301                 } else {
4302                         WREG32_SOC15(GC, 0, mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
4303                         clock = (uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_LSB) |
4304                                 ((uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
4305                 }
4306                 mutex_unlock(&adev->gfx.gpu_clock_mutex);
4307                 amdgpu_gfx_off_ctrl(adev, true);
4308                 break;
4309         }
4310         return clock;
4311 }
4312
4313 static void gfx_v9_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
4314                                           uint32_t vmid,
4315                                           uint32_t gds_base, uint32_t gds_size,
4316                                           uint32_t gws_base, uint32_t gws_size,
4317                                           uint32_t oa_base, uint32_t oa_size)
4318 {
4319         struct amdgpu_device *adev = ring->adev;
4320
4321         /* GDS Base */
4322         gfx_v9_0_write_data_to_reg(ring, 0, false,
4323                                    SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_BASE) + 2 * vmid,
4324                                    gds_base);
4325
4326         /* GDS Size */
4327         gfx_v9_0_write_data_to_reg(ring, 0, false,
4328                                    SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE) + 2 * vmid,
4329                                    gds_size);
4330
4331         /* GWS */
4332         gfx_v9_0_write_data_to_reg(ring, 0, false,
4333                                    SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID0) + vmid,
4334                                    gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
4335
4336         /* OA */
4337         gfx_v9_0_write_data_to_reg(ring, 0, false,
4338                                    SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID0) + vmid,
4339                                    (1 << (oa_size + oa_base)) - (1 << oa_base));
4340 }
4341
4342 static const u32 vgpr_init_compute_shader[] =
4343 {
4344         0xb07c0000, 0xbe8000ff,
4345         0x000000f8, 0xbf110800,
4346         0x7e000280, 0x7e020280,
4347         0x7e040280, 0x7e060280,
4348         0x7e080280, 0x7e0a0280,
4349         0x7e0c0280, 0x7e0e0280,
4350         0x80808800, 0xbe803200,
4351         0xbf84fff5, 0xbf9c0000,
4352         0xd28c0001, 0x0001007f,
4353         0xd28d0001, 0x0002027e,
4354         0x10020288, 0xb8810904,
4355         0xb7814000, 0xd1196a01,
4356         0x00000301, 0xbe800087,
4357         0xbefc00c1, 0xd89c4000,
4358         0x00020201, 0xd89cc080,
4359         0x00040401, 0x320202ff,
4360         0x00000800, 0x80808100,
4361         0xbf84fff8, 0x7e020280,
4362         0xbf810000, 0x00000000,
4363 };
4364
4365 static const u32 sgpr_init_compute_shader[] =
4366 {
4367         0xb07c0000, 0xbe8000ff,
4368         0x0000005f, 0xbee50080,
4369         0xbe812c65, 0xbe822c65,
4370         0xbe832c65, 0xbe842c65,
4371         0xbe852c65, 0xb77c0005,
4372         0x80808500, 0xbf84fff8,
4373         0xbe800080, 0xbf810000,
4374 };
4375
4376 static const u32 vgpr_init_compute_shader_arcturus[] = {
4377         0xd3d94000, 0x18000080, 0xd3d94001, 0x18000080, 0xd3d94002, 0x18000080,
4378         0xd3d94003, 0x18000080, 0xd3d94004, 0x18000080, 0xd3d94005, 0x18000080,
4379         0xd3d94006, 0x18000080, 0xd3d94007, 0x18000080, 0xd3d94008, 0x18000080,
4380         0xd3d94009, 0x18000080, 0xd3d9400a, 0x18000080, 0xd3d9400b, 0x18000080,
4381         0xd3d9400c, 0x18000080, 0xd3d9400d, 0x18000080, 0xd3d9400e, 0x18000080,
4382         0xd3d9400f, 0x18000080, 0xd3d94010, 0x18000080, 0xd3d94011, 0x18000080,
4383         0xd3d94012, 0x18000080, 0xd3d94013, 0x18000080, 0xd3d94014, 0x18000080,
4384         0xd3d94015, 0x18000080, 0xd3d94016, 0x18000080, 0xd3d94017, 0x18000080,
4385         0xd3d94018, 0x18000080, 0xd3d94019, 0x18000080, 0xd3d9401a, 0x18000080,
4386         0xd3d9401b, 0x18000080, 0xd3d9401c, 0x18000080, 0xd3d9401d, 0x18000080,
4387         0xd3d9401e, 0x18000080, 0xd3d9401f, 0x18000080, 0xd3d94020, 0x18000080,
4388         0xd3d94021, 0x18000080, 0xd3d94022, 0x18000080, 0xd3d94023, 0x18000080,
4389         0xd3d94024, 0x18000080, 0xd3d94025, 0x18000080, 0xd3d94026, 0x18000080,
4390         0xd3d94027, 0x18000080, 0xd3d94028, 0x18000080, 0xd3d94029, 0x18000080,
4391         0xd3d9402a, 0x18000080, 0xd3d9402b, 0x18000080, 0xd3d9402c, 0x18000080,
4392         0xd3d9402d, 0x18000080, 0xd3d9402e, 0x18000080, 0xd3d9402f, 0x18000080,
4393         0xd3d94030, 0x18000080, 0xd3d94031, 0x18000080, 0xd3d94032, 0x18000080,
4394         0xd3d94033, 0x18000080, 0xd3d94034, 0x18000080, 0xd3d94035, 0x18000080,
4395         0xd3d94036, 0x18000080, 0xd3d94037, 0x18000080, 0xd3d94038, 0x18000080,
4396         0xd3d94039, 0x18000080, 0xd3d9403a, 0x18000080, 0xd3d9403b, 0x18000080,
4397         0xd3d9403c, 0x18000080, 0xd3d9403d, 0x18000080, 0xd3d9403e, 0x18000080,
4398         0xd3d9403f, 0x18000080, 0xd3d94040, 0x18000080, 0xd3d94041, 0x18000080,
4399         0xd3d94042, 0x18000080, 0xd3d94043, 0x18000080, 0xd3d94044, 0x18000080,
4400         0xd3d94045, 0x18000080, 0xd3d94046, 0x18000080, 0xd3d94047, 0x18000080,
4401         0xd3d94048, 0x18000080, 0xd3d94049, 0x18000080, 0xd3d9404a, 0x18000080,
4402         0xd3d9404b, 0x18000080, 0xd3d9404c, 0x18000080, 0xd3d9404d, 0x18000080,
4403         0xd3d9404e, 0x18000080, 0xd3d9404f, 0x18000080, 0xd3d94050, 0x18000080,
4404         0xd3d94051, 0x18000080, 0xd3d94052, 0x18000080, 0xd3d94053, 0x18000080,
4405         0xd3d94054, 0x18000080, 0xd3d94055, 0x18000080, 0xd3d94056, 0x18000080,
4406         0xd3d94057, 0x18000080, 0xd3d94058, 0x18000080, 0xd3d94059, 0x18000080,
4407         0xd3d9405a, 0x18000080, 0xd3d9405b, 0x18000080, 0xd3d9405c, 0x18000080,
4408         0xd3d9405d, 0x18000080, 0xd3d9405e, 0x18000080, 0xd3d9405f, 0x18000080,
4409         0xd3d94060, 0x18000080, 0xd3d94061, 0x18000080, 0xd3d94062, 0x18000080,
4410         0xd3d94063, 0x18000080, 0xd3d94064, 0x18000080, 0xd3d94065, 0x18000080,
4411         0xd3d94066, 0x18000080, 0xd3d94067, 0x18000080, 0xd3d94068, 0x18000080,
4412         0xd3d94069, 0x18000080, 0xd3d9406a, 0x18000080, 0xd3d9406b, 0x18000080,
4413         0xd3d9406c, 0x18000080, 0xd3d9406d, 0x18000080, 0xd3d9406e, 0x18000080,
4414         0xd3d9406f, 0x18000080, 0xd3d94070, 0x18000080, 0xd3d94071, 0x18000080,
4415         0xd3d94072, 0x18000080, 0xd3d94073, 0x18000080, 0xd3d94074, 0x18000080,
4416         0xd3d94075, 0x18000080, 0xd3d94076, 0x18000080, 0xd3d94077, 0x18000080,
4417         0xd3d94078, 0x18000080, 0xd3d94079, 0x18000080, 0xd3d9407a, 0x18000080,
4418         0xd3d9407b, 0x18000080, 0xd3d9407c, 0x18000080, 0xd3d9407d, 0x18000080,
4419         0xd3d9407e, 0x18000080, 0xd3d9407f, 0x18000080, 0xd3d94080, 0x18000080,
4420         0xd3d94081, 0x18000080, 0xd3d94082, 0x18000080, 0xd3d94083, 0x18000080,
4421         0xd3d94084, 0x18000080, 0xd3d94085, 0x18000080, 0xd3d94086, 0x18000080,
4422         0xd3d94087, 0x18000080, 0xd3d94088, 0x18000080, 0xd3d94089, 0x18000080,
4423         0xd3d9408a, 0x18000080, 0xd3d9408b, 0x18000080, 0xd3d9408c, 0x18000080,
4424         0xd3d9408d, 0x18000080, 0xd3d9408e, 0x18000080, 0xd3d9408f, 0x18000080,
4425         0xd3d94090, 0x18000080, 0xd3d94091, 0x18000080, 0xd3d94092, 0x18000080,
4426         0xd3d94093, 0x18000080, 0xd3d94094, 0x18000080, 0xd3d94095, 0x18000080,
4427         0xd3d94096, 0x18000080, 0xd3d94097, 0x18000080, 0xd3d94098, 0x18000080,
4428         0xd3d94099, 0x18000080, 0xd3d9409a, 0x18000080, 0xd3d9409b, 0x18000080,
4429         0xd3d9409c, 0x18000080, 0xd3d9409d, 0x18000080, 0xd3d9409e, 0x18000080,
4430         0xd3d9409f, 0x18000080, 0xd3d940a0, 0x18000080, 0xd3d940a1, 0x18000080,
4431         0xd3d940a2, 0x18000080, 0xd3d940a3, 0x18000080, 0xd3d940a4, 0x18000080,
4432         0xd3d940a5, 0x18000080, 0xd3d940a6, 0x18000080, 0xd3d940a7, 0x18000080,
4433         0xd3d940a8, 0x18000080, 0xd3d940a9, 0x18000080, 0xd3d940aa, 0x18000080,
4434         0xd3d940ab, 0x18000080, 0xd3d940ac, 0x18000080, 0xd3d940ad, 0x18000080,
4435         0xd3d940ae, 0x18000080, 0xd3d940af, 0x18000080, 0xd3d940b0, 0x18000080,
4436         0xd3d940b1, 0x18000080, 0xd3d940b2, 0x18000080, 0xd3d940b3, 0x18000080,
4437         0xd3d940b4, 0x18000080, 0xd3d940b5, 0x18000080, 0xd3d940b6, 0x18000080,
4438         0xd3d940b7, 0x18000080, 0xd3d940b8, 0x18000080, 0xd3d940b9, 0x18000080,
4439         0xd3d940ba, 0x18000080, 0xd3d940bb, 0x18000080, 0xd3d940bc, 0x18000080,
4440         0xd3d940bd, 0x18000080, 0xd3d940be, 0x18000080, 0xd3d940bf, 0x18000080,
4441         0xd3d940c0, 0x18000080, 0xd3d940c1, 0x18000080, 0xd3d940c2, 0x18000080,
4442         0xd3d940c3, 0x18000080, 0xd3d940c4, 0x18000080, 0xd3d940c5, 0x18000080,
4443         0xd3d940c6, 0x18000080, 0xd3d940c7, 0x18000080, 0xd3d940c8, 0x18000080,
4444         0xd3d940c9, 0x18000080, 0xd3d940ca, 0x18000080, 0xd3d940cb, 0x18000080,
4445         0xd3d940cc, 0x18000080, 0xd3d940cd, 0x18000080, 0xd3d940ce, 0x18000080,
4446         0xd3d940cf, 0x18000080, 0xd3d940d0, 0x18000080, 0xd3d940d1, 0x18000080,
4447         0xd3d940d2, 0x18000080, 0xd3d940d3, 0x18000080, 0xd3d940d4, 0x18000080,
4448         0xd3d940d5, 0x18000080, 0xd3d940d6, 0x18000080, 0xd3d940d7, 0x18000080,
4449         0xd3d940d8, 0x18000080, 0xd3d940d9, 0x18000080, 0xd3d940da, 0x18000080,
4450         0xd3d940db, 0x18000080, 0xd3d940dc, 0x18000080, 0xd3d940dd, 0x18000080,
4451         0xd3d940de, 0x18000080, 0xd3d940df, 0x18000080, 0xd3d940e0, 0x18000080,
4452         0xd3d940e1, 0x18000080, 0xd3d940e2, 0x18000080, 0xd3d940e3, 0x18000080,
4453         0xd3d940e4, 0x18000080, 0xd3d940e5, 0x18000080, 0xd3d940e6, 0x18000080,
4454         0xd3d940e7, 0x18000080, 0xd3d940e8, 0x18000080, 0xd3d940e9, 0x18000080,
4455         0xd3d940ea, 0x18000080, 0xd3d940eb, 0x18000080, 0xd3d940ec, 0x18000080,
4456         0xd3d940ed, 0x18000080, 0xd3d940ee, 0x18000080, 0xd3d940ef, 0x18000080,
4457         0xd3d940f0, 0x18000080, 0xd3d940f1, 0x18000080, 0xd3d940f2, 0x18000080,
4458         0xd3d940f3, 0x18000080, 0xd3d940f4, 0x18000080, 0xd3d940f5, 0x18000080,
4459         0xd3d940f6, 0x18000080, 0xd3d940f7, 0x18000080, 0xd3d940f8, 0x18000080,
4460         0xd3d940f9, 0x18000080, 0xd3d940fa, 0x18000080, 0xd3d940fb, 0x18000080,
4461         0xd3d940fc, 0x18000080, 0xd3d940fd, 0x18000080, 0xd3d940fe, 0x18000080,
4462         0xd3d940ff, 0x18000080, 0xb07c0000, 0xbe8a00ff, 0x000000f8, 0xbf11080a,
4463         0x7e000280, 0x7e020280, 0x7e040280, 0x7e060280, 0x7e080280, 0x7e0a0280,
4464         0x7e0c0280, 0x7e0e0280, 0x808a880a, 0xbe80320a, 0xbf84fff5, 0xbf9c0000,
4465         0xd28c0001, 0x0001007f, 0xd28d0001, 0x0002027e, 0x10020288, 0xb88b0904,
4466         0xb78b4000, 0xd1196a01, 0x00001701, 0xbe8a0087, 0xbefc00c1, 0xd89c4000,
4467         0x00020201, 0xd89cc080, 0x00040401, 0x320202ff, 0x00000800, 0x808a810a,
4468         0xbf84fff8, 0xbf810000,
4469 };
4470
4471 /* When below register arrays changed, please update gpr_reg_size,
4472   and sec_ded_counter_reg_size in function gfx_v9_0_do_edc_gpr_workarounds,
4473   to cover all gfx9 ASICs */
4474 static const struct soc15_reg_entry vgpr_init_regs[] = {
4475    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
4476    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 },
4477    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 4 },
4478    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
4479    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x3f },
4480    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x400000 },  /* 64KB LDS */
4481    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff },
4482    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff },
4483    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff },
4484    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff },
4485    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0xffffffff },
4486    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0xffffffff },
4487    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0xffffffff },
4488    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0xffffffff },
4489 };
4490
4491 static const struct soc15_reg_entry vgpr_init_regs_arcturus[] = {
4492    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
4493    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 },
4494    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 4 },
4495    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
4496    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0xbf },
4497    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x400000 },  /* 64KB LDS */
4498    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff },
4499    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff },
4500    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff },
4501    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff },
4502    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0xffffffff },
4503    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0xffffffff },
4504    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0xffffffff },
4505    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0xffffffff },
4506 };
4507
4508 static const struct soc15_reg_entry sgpr1_init_regs[] = {
4509    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
4510    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 },
4511    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 8 },
4512    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
4513    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x240 }, /* (80 GPRS) */
4514    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x0 },
4515    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0x000000ff },
4516    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0x000000ff },
4517    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0x000000ff },
4518    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0x000000ff },
4519    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0x000000ff },
4520    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0x000000ff },
4521    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0x000000ff },
4522    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0x000000ff },
4523 };
4524
4525 static const struct soc15_reg_entry sgpr2_init_regs[] = {
4526    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
4527    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 },
4528    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 8 },
4529    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
4530    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x240 }, /* (80 GPRS) */
4531    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x0 },
4532    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0x0000ff00 },
4533    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0x0000ff00 },
4534    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0x0000ff00 },
4535    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0x0000ff00 },
4536    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0x0000ff00 },
4537    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0x0000ff00 },
4538    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0x0000ff00 },
4539    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0x0000ff00 },
4540 };
4541
4542 static const struct soc15_reg_entry gfx_v9_0_edc_counter_regs[] = {
4543    { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT), 0, 1, 1},
4544    { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT), 0, 1, 1},
4545    { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT), 0, 1, 1},
4546    { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT), 0, 1, 1},
4547    { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT), 0, 1, 1},
4548    { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT), 0, 1, 1},
4549    { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT), 0, 1, 1},
4550    { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT), 0, 1, 1},
4551    { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT), 0, 1, 1},
4552    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT), 0, 1, 1},
4553    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_GRBM_CNT), 0, 1, 1},
4554    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_DED), 0, 1, 1},
4555    { SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT), 0, 4, 1},
4556    { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 0, 4, 6},
4557    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_DED_CNT), 0, 4, 16},
4558    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_INFO), 0, 4, 16},
4559    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_SEC_CNT), 0, 4, 16},
4560    { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 1, 16},
4561    { SOC15_REG_ENTRY(GC, 0, mmTCP_ATC_EDC_GATCL1_CNT), 0, 4, 16},
4562    { SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT), 0, 4, 16},
4563    { SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 0, 4, 16},
4564    { SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 0, 4, 16},
4565    { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 0, 4, 6},
4566    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 0, 4, 16},
4567    { SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 0, 4, 16},
4568    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), 0, 1, 1},
4569    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 0, 1, 1},
4570    { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 1, 32},
4571    { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 1, 32},
4572    { SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT), 0, 1, 72},
4573    { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0, 1, 16},
4574    { SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT), 0, 1, 2},
4575    { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 0, 4, 6},
4576 };
4577
4578 static int gfx_v9_0_do_edc_gds_workarounds(struct amdgpu_device *adev)
4579 {
4580         struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
4581         int i, r;
4582
4583         /* only support when RAS is enabled */
4584         if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
4585                 return 0;
4586
4587         r = amdgpu_ring_alloc(ring, 7);
4588         if (r) {
4589                 DRM_ERROR("amdgpu: GDS workarounds failed to lock ring %s (%d).\n",
4590                         ring->name, r);
4591                 return r;
4592         }
4593
4594         WREG32_SOC15(GC, 0, mmGDS_VMID0_BASE, 0x00000000);
4595         WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, adev->gds.gds_size);
4596
4597         amdgpu_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
4598         amdgpu_ring_write(ring, (PACKET3_DMA_DATA_CP_SYNC |
4599                                 PACKET3_DMA_DATA_DST_SEL(1) |
4600                                 PACKET3_DMA_DATA_SRC_SEL(2) |
4601                                 PACKET3_DMA_DATA_ENGINE(0)));
4602         amdgpu_ring_write(ring, 0);
4603         amdgpu_ring_write(ring, 0);
4604         amdgpu_ring_write(ring, 0);
4605         amdgpu_ring_write(ring, 0);
4606         amdgpu_ring_write(ring, PACKET3_DMA_DATA_CMD_RAW_WAIT |
4607                                 adev->gds.gds_size);
4608
4609         amdgpu_ring_commit(ring);
4610
4611         for (i = 0; i < adev->usec_timeout; i++) {
4612                 if (ring->wptr == gfx_v9_0_ring_get_rptr_compute(ring))
4613                         break;
4614                 udelay(1);
4615         }
4616
4617         if (i >= adev->usec_timeout)
4618                 r = -ETIMEDOUT;
4619
4620         WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, 0x00000000);
4621
4622         return r;
4623 }
4624
4625 static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
4626 {
4627         struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
4628         struct amdgpu_ib ib;
4629         struct dma_fence *f = NULL;
4630         int r, i;
4631         unsigned total_size, vgpr_offset, sgpr_offset;
4632         u64 gpu_addr;
4633
4634         int compute_dim_x = adev->gfx.config.max_shader_engines *
4635                                                 adev->gfx.config.max_cu_per_sh *
4636                                                 adev->gfx.config.max_sh_per_se;
4637         int sgpr_work_group_size = 5;
4638         int gpr_reg_size = adev->gfx.config.max_shader_engines + 6;
4639         int vgpr_init_shader_size;
4640         const u32 *vgpr_init_shader_ptr;
4641         const struct soc15_reg_entry *vgpr_init_regs_ptr;
4642
4643         /* only support when RAS is enabled */
4644         if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
4645                 return 0;
4646
4647         /* bail if the compute ring is not ready */
4648         if (!ring->sched.ready)
4649                 return 0;
4650
4651         if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 1)) {
4652                 vgpr_init_shader_ptr = vgpr_init_compute_shader_arcturus;
4653                 vgpr_init_shader_size = sizeof(vgpr_init_compute_shader_arcturus);
4654                 vgpr_init_regs_ptr = vgpr_init_regs_arcturus;
4655         } else {
4656                 vgpr_init_shader_ptr = vgpr_init_compute_shader;
4657                 vgpr_init_shader_size = sizeof(vgpr_init_compute_shader);
4658                 vgpr_init_regs_ptr = vgpr_init_regs;
4659         }
4660
4661         total_size =
4662                 (gpr_reg_size * 3 + 4 + 5 + 2) * 4; /* VGPRS */
4663         total_size +=
4664                 (gpr_reg_size * 3 + 4 + 5 + 2) * 4; /* SGPRS1 */
4665         total_size +=
4666                 (gpr_reg_size * 3 + 4 + 5 + 2) * 4; /* SGPRS2 */
4667         total_size = ALIGN(total_size, 256);
4668         vgpr_offset = total_size;
4669         total_size += ALIGN(vgpr_init_shader_size, 256);
4670         sgpr_offset = total_size;
4671         total_size += sizeof(sgpr_init_compute_shader);
4672
4673         /* allocate an indirect buffer to put the commands in */
4674         memset(&ib, 0, sizeof(ib));
4675         r = amdgpu_ib_get(adev, NULL, total_size,
4676                                         AMDGPU_IB_POOL_DIRECT, &ib);
4677         if (r) {
4678                 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
4679                 return r;
4680         }
4681
4682         /* load the compute shaders */
4683         for (i = 0; i < vgpr_init_shader_size/sizeof(u32); i++)
4684                 ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_shader_ptr[i];
4685
4686         for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
4687                 ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
4688
4689         /* init the ib length to 0 */
4690         ib.length_dw = 0;
4691
4692         /* VGPR */
4693         /* write the register state for the compute dispatch */
4694         for (i = 0; i < gpr_reg_size; i++) {
4695                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
4696                 ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(vgpr_init_regs_ptr[i])
4697                                                                 - PACKET3_SET_SH_REG_START;
4698                 ib.ptr[ib.length_dw++] = vgpr_init_regs_ptr[i].reg_value;
4699         }
4700         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
4701         gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
4702         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
4703         ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
4704                                                         - PACKET3_SET_SH_REG_START;
4705         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
4706         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
4707
4708         /* write dispatch packet */
4709         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
4710         ib.ptr[ib.length_dw++] = compute_dim_x * 2; /* x */
4711         ib.ptr[ib.length_dw++] = 1; /* y */
4712         ib.ptr[ib.length_dw++] = 1; /* z */
4713         ib.ptr[ib.length_dw++] =
4714                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
4715
4716         /* write CS partial flush packet */
4717         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
4718         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
4719
4720         /* SGPR1 */
4721         /* write the register state for the compute dispatch */
4722         for (i = 0; i < gpr_reg_size; i++) {
4723                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
4724                 ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(sgpr1_init_regs[i])
4725                                                                 - PACKET3_SET_SH_REG_START;
4726                 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i].reg_value;
4727         }
4728         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
4729         gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
4730         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
4731         ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
4732                                                         - PACKET3_SET_SH_REG_START;
4733         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
4734         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
4735
4736         /* write dispatch packet */
4737         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
4738         ib.ptr[ib.length_dw++] = compute_dim_x / 2 * sgpr_work_group_size; /* x */
4739         ib.ptr[ib.length_dw++] = 1; /* y */
4740         ib.ptr[ib.length_dw++] = 1; /* z */
4741         ib.ptr[ib.length_dw++] =
4742                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
4743
4744         /* write CS partial flush packet */
4745         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
4746         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
4747
4748         /* SGPR2 */
4749         /* write the register state for the compute dispatch */
4750         for (i = 0; i < gpr_reg_size; i++) {
4751                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
4752                 ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(sgpr2_init_regs[i])
4753                                                                 - PACKET3_SET_SH_REG_START;
4754                 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i].reg_value;
4755         }
4756         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
4757         gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
4758         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
4759         ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
4760                                                         - PACKET3_SET_SH_REG_START;
4761         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
4762         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
4763
4764         /* write dispatch packet */
4765         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
4766         ib.ptr[ib.length_dw++] = compute_dim_x / 2 * sgpr_work_group_size; /* x */
4767         ib.ptr[ib.length_dw++] = 1; /* y */
4768         ib.ptr[ib.length_dw++] = 1; /* z */
4769         ib.ptr[ib.length_dw++] =
4770                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
4771
4772         /* write CS partial flush packet */
4773         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
4774         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
4775
4776         /* shedule the ib on the ring */
4777         r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
4778         if (r) {
4779                 DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
4780                 goto fail;
4781         }
4782
4783         /* wait for the GPU to finish processing the IB */
4784         r = dma_fence_wait(f, false);
4785         if (r) {
4786                 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
4787                 goto fail;
4788         }
4789
4790 fail:
4791         amdgpu_ib_free(&ib, NULL);
4792         dma_fence_put(f);
4793
4794         return r;
4795 }
4796
4797 static int gfx_v9_0_early_init(struct amdgpu_ip_block *ip_block)
4798 {
4799         struct amdgpu_device *adev = ip_block->adev;
4800
4801         adev->gfx.funcs = &gfx_v9_0_gfx_funcs;
4802
4803         if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 1) ||
4804             amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 2))
4805                 adev->gfx.num_gfx_rings = 0;
4806         else
4807                 adev->gfx.num_gfx_rings = GFX9_NUM_GFX_RINGS;
4808         adev->gfx.xcc_mask = 1;
4809         adev->gfx.num_compute_rings = min(amdgpu_gfx_get_num_kcq(adev),
4810                                           AMDGPU_MAX_COMPUTE_RINGS);
4811         gfx_v9_0_set_kiq_pm4_funcs(adev);
4812         gfx_v9_0_set_ring_funcs(adev);
4813         gfx_v9_0_set_irq_funcs(adev);
4814         gfx_v9_0_set_gds_init(adev);
4815         gfx_v9_0_set_rlc_funcs(adev);
4816
4817         /* init rlcg reg access ctrl */
4818         gfx_v9_0_init_rlcg_reg_access_ctrl(adev);
4819
4820         return gfx_v9_0_init_microcode(adev);
4821 }
4822
4823 static int gfx_v9_0_ecc_late_init(struct amdgpu_ip_block *ip_block)
4824 {
4825         struct amdgpu_device *adev = ip_block->adev;
4826         int r;
4827
4828         /*
4829          * Temp workaround to fix the issue that CP firmware fails to
4830          * update read pointer when CPDMA is writing clearing operation
4831          * to GDS in suspend/resume sequence on several cards. So just
4832          * limit this operation in cold boot sequence.
4833          */
4834         if ((!adev->in_suspend) &&
4835             (adev->gds.gds_size)) {
4836                 r = gfx_v9_0_do_edc_gds_workarounds(adev);
4837                 if (r)
4838                         return r;
4839         }
4840
4841         /* requires IBs so do in late init after IB pool is initialized */
4842         if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 2))
4843                 r = gfx_v9_4_2_do_edc_gpr_workarounds(adev);
4844         else
4845                 r = gfx_v9_0_do_edc_gpr_workarounds(adev);
4846
4847         if (r)
4848                 return r;
4849
4850         if (adev->gfx.ras &&
4851             adev->gfx.ras->enable_watchdog_timer)
4852                 adev->gfx.ras->enable_watchdog_timer(adev);
4853
4854         return 0;
4855 }
4856
4857 static int gfx_v9_0_late_init(struct amdgpu_ip_block *ip_block)
4858 {
4859         struct amdgpu_device *adev = ip_block->adev;
4860         int r;
4861
4862         r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
4863         if (r)
4864                 return r;
4865
4866         r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
4867         if (r)
4868                 return r;
4869
4870         r = amdgpu_irq_get(adev, &adev->gfx.bad_op_irq, 0);
4871         if (r)
4872                 return r;
4873
4874         r = gfx_v9_0_ecc_late_init(ip_block);
4875         if (r)
4876                 return r;
4877
4878         if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 2))
4879                 gfx_v9_4_2_debug_trap_config_init(adev,
4880                         adev->vm_manager.first_kfd_vmid, AMDGPU_NUM_VMID);
4881         else
4882                 gfx_v9_0_debug_trap_config_init(adev,
4883                         adev->vm_manager.first_kfd_vmid, AMDGPU_NUM_VMID);
4884
4885         return 0;
4886 }
4887
4888 static bool gfx_v9_0_is_rlc_enabled(struct amdgpu_device *adev)
4889 {
4890         uint32_t rlc_setting;
4891
4892         /* if RLC is not enabled, do nothing */
4893         rlc_setting = RREG32_SOC15(GC, 0, mmRLC_CNTL);
4894         if (!(rlc_setting & RLC_CNTL__RLC_ENABLE_F32_MASK))
4895                 return false;
4896
4897         return true;
4898 }
4899
4900 static void gfx_v9_0_set_safe_mode(struct amdgpu_device *adev, int xcc_id)
4901 {
4902         uint32_t data;
4903         unsigned i;
4904
4905         data = RLC_SAFE_MODE__CMD_MASK;
4906         data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
4907         WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data);
4908
4909         /* wait for RLC_SAFE_MODE */
4910         for (i = 0; i < adev->usec_timeout; i++) {
4911                 if (!REG_GET_FIELD(RREG32_SOC15(GC, 0, mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
4912                         break;
4913                 udelay(1);
4914         }
4915 }
4916
4917 static void gfx_v9_0_unset_safe_mode(struct amdgpu_device *adev, int xcc_id)
4918 {
4919         uint32_t data;
4920
4921         data = RLC_SAFE_MODE__CMD_MASK;
4922         WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data);
4923 }
4924
4925 static void gfx_v9_0_update_gfx_cg_power_gating(struct amdgpu_device *adev,
4926                                                 bool enable)
4927 {
4928         amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
4929
4930         if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
4931                 gfx_v9_0_enable_gfx_cg_power_gating(adev, true);
4932                 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
4933                         gfx_v9_0_enable_gfx_pipeline_powergating(adev, true);
4934         } else {
4935                 gfx_v9_0_enable_gfx_cg_power_gating(adev, false);
4936                 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
4937                         gfx_v9_0_enable_gfx_pipeline_powergating(adev, false);
4938         }
4939
4940         amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
4941 }
4942
4943 static void gfx_v9_0_update_gfx_mg_power_gating(struct amdgpu_device *adev,
4944                                                 bool enable)
4945 {
4946         /* TODO: double check if we need to perform under safe mode */
4947         /* gfx_v9_0_enter_rlc_safe_mode(adev); */
4948
4949         if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
4950                 gfx_v9_0_enable_gfx_static_mg_power_gating(adev, true);
4951         else
4952                 gfx_v9_0_enable_gfx_static_mg_power_gating(adev, false);
4953
4954         if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
4955                 gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, true);
4956         else
4957                 gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, false);
4958
4959         /* gfx_v9_0_exit_rlc_safe_mode(adev); */
4960 }
4961
4962 static void gfx_v9_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
4963                                                       bool enable)
4964 {
4965         uint32_t data, def;
4966
4967         /* It is disabled by HW by default */
4968         if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
4969                 /* 1 - RLC_CGTT_MGCG_OVERRIDE */
4970                 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4971
4972                 if (amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 2, 1))
4973                         data &= ~RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK;
4974
4975                 data &= ~(RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
4976                           RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK |
4977                           RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK);
4978
4979                 /* only for Vega10 & Raven1 */
4980                 data |= RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK;
4981
4982                 if (def != data)
4983                         WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4984
4985                 /* MGLS is a global flag to control all MGLS in GFX */
4986                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
4987                         /* 2 - RLC memory Light sleep */
4988                         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
4989                                 def = data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
4990                                 data |= RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
4991                                 if (def != data)
4992                                         WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data);
4993                         }
4994                         /* 3 - CP memory Light sleep */
4995                         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
4996                                 def = data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
4997                                 data |= CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
4998                                 if (def != data)
4999                                         WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data);
5000                         }
5001                 }
5002         } else {
5003                 /* 1 - MGCG_OVERRIDE */
5004                 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
5005
5006                 if (amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 2, 1))
5007                         data |= RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK;
5008
5009                 data |= (RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK |
5010                          RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
5011                          RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK |
5012                          RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK);
5013
5014                 if (def != data)
5015                         WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
5016
5017                 /* 2 - disable MGLS in RLC */
5018                 data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
5019                 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
5020                         data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
5021                         WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data);
5022                 }
5023
5024                 /* 3 - disable MGLS in CP */
5025                 data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
5026                 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
5027                         data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
5028                         WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data);
5029                 }
5030         }
5031 }
5032
5033 static void gfx_v9_0_update_3d_clock_gating(struct amdgpu_device *adev,
5034                                            bool enable)
5035 {
5036         uint32_t data, def;
5037
5038         if (!adev->gfx.num_gfx_rings)
5039                 return;
5040
5041         /* Enable 3D CGCG/CGLS */
5042         if (enable) {
5043                 /* write cmd to clear cgcg/cgls ov */
5044                 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
5045                 /* unset CGCG override */
5046                 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_GFX3D_CG_OVERRIDE_MASK;
5047                 /* update CGCG and CGLS override bits */
5048                 if (def != data)
5049                         WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
5050
5051                 /* enable 3Dcgcg FSM(0x0000363f) */
5052                 def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
5053
5054                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG)
5055                         data = (0x36 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
5056                                 RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK;
5057                 else
5058                         data = 0x0 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT;
5059
5060                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS)
5061                         data |= (0x000F << RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
5062                                 RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK;
5063                 if (def != data)
5064                         WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data);
5065
5066                 /* set IDLE_POLL_COUNT(0x00900100) */
5067                 def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL);
5068                 data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
5069                         (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
5070                 if (def != data)
5071                         WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data);
5072         } else {
5073                 /* Disable CGCG/CGLS */
5074                 def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
5075                 /* disable cgcg, cgls should be disabled */
5076                 data &= ~(RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK |
5077                           RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK);
5078                 /* disable cgcg and cgls in FSM */
5079                 if (def != data)
5080                         WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data);
5081         }
5082 }
5083
5084 static void gfx_v9_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
5085                                                       bool enable)
5086 {
5087         uint32_t def, data;
5088
5089         if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
5090                 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
5091                 /* unset CGCG override */
5092                 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGCG_OVERRIDE_MASK;
5093                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
5094                         data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
5095                 else
5096                         data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
5097                 /* update CGCG and CGLS override bits */
5098                 if (def != data)
5099                         WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
5100
5101                 /* enable cgcg FSM(0x0000363F) */
5102                 def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
5103
5104                 if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 1))
5105                         data = (0x2000 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
5106                                 RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
5107                 else
5108                         data = (0x36 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
5109                                 RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
5110                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
5111                         data |= (0x000F << RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
5112                                 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5113                 if (def != data)
5114                         WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data);
5115
5116                 /* set IDLE_POLL_COUNT(0x00900100) */
5117                 def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL);
5118                 data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
5119                         (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
5120                 if (def != data)
5121                         WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data);
5122         } else {
5123                 def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
5124                 /* reset CGCG/CGLS bits */
5125                 data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
5126                 /* disable cgcg and cgls in FSM */
5127                 if (def != data)
5128                         WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data);
5129         }
5130 }
5131
5132 static int gfx_v9_0_update_gfx_clock_gating(struct amdgpu_device *adev,
5133                                             bool enable)
5134 {
5135         amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
5136         if (enable) {
5137                 /* CGCG/CGLS should be enabled after MGCG/MGLS
5138                  * ===  MGCG + MGLS ===
5139                  */
5140                 gfx_v9_0_update_medium_grain_clock_gating(adev, enable);
5141                 /* ===  CGCG /CGLS for GFX 3D Only === */
5142                 gfx_v9_0_update_3d_clock_gating(adev, enable);
5143                 /* ===  CGCG + CGLS === */
5144                 gfx_v9_0_update_coarse_grain_clock_gating(adev, enable);
5145         } else {
5146                 /* CGCG/CGLS should be disabled before MGCG/MGLS
5147                  * ===  CGCG + CGLS ===
5148                  */
5149                 gfx_v9_0_update_coarse_grain_clock_gating(adev, enable);
5150                 /* ===  CGCG /CGLS for GFX 3D Only === */
5151                 gfx_v9_0_update_3d_clock_gating(adev, enable);
5152                 /* ===  MGCG + MGLS === */
5153                 gfx_v9_0_update_medium_grain_clock_gating(adev, enable);
5154         }
5155         amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
5156         return 0;
5157 }
5158
5159 static void gfx_v9_0_update_spm_vmid_internal(struct amdgpu_device *adev,
5160                                               unsigned int vmid)
5161 {
5162         u32 reg, data;
5163
5164         reg = SOC15_REG_OFFSET(GC, 0, mmRLC_SPM_MC_CNTL);
5165         if (amdgpu_sriov_is_pp_one_vf(adev))
5166                 data = RREG32_NO_KIQ(reg);
5167         else
5168                 data = RREG32_SOC15(GC, 0, mmRLC_SPM_MC_CNTL);
5169
5170         data &= ~RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK;
5171         data |= (vmid & RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK) << RLC_SPM_MC_CNTL__RLC_SPM_VMID__SHIFT;
5172
5173         if (amdgpu_sriov_is_pp_one_vf(adev))
5174                 WREG32_SOC15_NO_KIQ(GC, 0, mmRLC_SPM_MC_CNTL, data);
5175         else
5176                 WREG32_SOC15(GC, 0, mmRLC_SPM_MC_CNTL, data);
5177 }
5178
5179 static void gfx_v9_0_update_spm_vmid(struct amdgpu_device *adev, struct amdgpu_ring *ring, unsigned int vmid)
5180 {
5181         amdgpu_gfx_off_ctrl(adev, false);
5182
5183         gfx_v9_0_update_spm_vmid_internal(adev, vmid);
5184
5185         amdgpu_gfx_off_ctrl(adev, true);
5186 }
5187
5188 static bool gfx_v9_0_check_rlcg_range(struct amdgpu_device *adev,
5189                                         uint32_t offset,
5190                                         struct soc15_reg_rlcg *entries, int arr_size)
5191 {
5192         int i;
5193         uint32_t reg;
5194
5195         if (!entries)
5196                 return false;
5197
5198         for (i = 0; i < arr_size; i++) {
5199                 const struct soc15_reg_rlcg *entry;
5200
5201                 entry = &entries[i];
5202                 reg = adev->reg_offset[entry->hwip][entry->instance][entry->segment] + entry->reg;
5203                 if (offset == reg)
5204                         return true;
5205         }
5206
5207         return false;
5208 }
5209
5210 static bool gfx_v9_0_is_rlcg_access_range(struct amdgpu_device *adev, u32 offset)
5211 {
5212         return gfx_v9_0_check_rlcg_range(adev, offset,
5213                                         (void *)rlcg_access_gc_9_0,
5214                                         ARRAY_SIZE(rlcg_access_gc_9_0));
5215 }
5216
5217 static const struct amdgpu_rlc_funcs gfx_v9_0_rlc_funcs = {
5218         .is_rlc_enabled = gfx_v9_0_is_rlc_enabled,
5219         .set_safe_mode = gfx_v9_0_set_safe_mode,
5220         .unset_safe_mode = gfx_v9_0_unset_safe_mode,
5221         .init = gfx_v9_0_rlc_init,
5222         .get_csb_size = gfx_v9_0_get_csb_size,
5223         .get_csb_buffer = gfx_v9_0_get_csb_buffer,
5224         .get_cp_table_num = gfx_v9_0_cp_jump_table_num,
5225         .resume = gfx_v9_0_rlc_resume,
5226         .stop = gfx_v9_0_rlc_stop,
5227         .reset = gfx_v9_0_rlc_reset,
5228         .start = gfx_v9_0_rlc_start,
5229         .update_spm_vmid = gfx_v9_0_update_spm_vmid,
5230         .is_rlcg_access_range = gfx_v9_0_is_rlcg_access_range,
5231 };
5232
5233 static int gfx_v9_0_set_powergating_state(struct amdgpu_ip_block *ip_block,
5234                                           enum amd_powergating_state state)
5235 {
5236         struct amdgpu_device *adev = ip_block->adev;
5237         bool enable = (state == AMD_PG_STATE_GATE);
5238
5239         switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
5240         case IP_VERSION(9, 2, 2):
5241         case IP_VERSION(9, 1, 0):
5242         case IP_VERSION(9, 3, 0):
5243                 if (!enable)
5244                         amdgpu_gfx_off_ctrl(adev, false);
5245
5246                 if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
5247                         gfx_v9_0_enable_sck_slow_down_on_power_up(adev, true);
5248                         gfx_v9_0_enable_sck_slow_down_on_power_down(adev, true);
5249                 } else {
5250                         gfx_v9_0_enable_sck_slow_down_on_power_up(adev, false);
5251                         gfx_v9_0_enable_sck_slow_down_on_power_down(adev, false);
5252                 }
5253
5254                 if (adev->pg_flags & AMD_PG_SUPPORT_CP)
5255                         gfx_v9_0_enable_cp_power_gating(adev, true);
5256                 else
5257                         gfx_v9_0_enable_cp_power_gating(adev, false);
5258
5259                 /* update gfx cgpg state */
5260                 gfx_v9_0_update_gfx_cg_power_gating(adev, enable);
5261
5262                 /* update mgcg state */
5263                 gfx_v9_0_update_gfx_mg_power_gating(adev, enable);
5264
5265                 if (enable)
5266                         amdgpu_gfx_off_ctrl(adev, true);
5267                 break;
5268         case IP_VERSION(9, 2, 1):
5269                 amdgpu_gfx_off_ctrl(adev, enable);
5270                 break;
5271         default:
5272                 break;
5273         }
5274
5275         return 0;
5276 }
5277
5278 static int gfx_v9_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
5279                                           enum amd_clockgating_state state)
5280 {
5281         struct amdgpu_device *adev = ip_block->adev;
5282
5283         if (amdgpu_sriov_vf(adev))
5284                 return 0;
5285
5286         switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
5287         case IP_VERSION(9, 0, 1):
5288         case IP_VERSION(9, 2, 1):
5289         case IP_VERSION(9, 4, 0):
5290         case IP_VERSION(9, 2, 2):
5291         case IP_VERSION(9, 1, 0):
5292         case IP_VERSION(9, 4, 1):
5293         case IP_VERSION(9, 3, 0):
5294         case IP_VERSION(9, 4, 2):
5295                 gfx_v9_0_update_gfx_clock_gating(adev,
5296                                                  state == AMD_CG_STATE_GATE);
5297                 break;
5298         default:
5299                 break;
5300         }
5301         return 0;
5302 }
5303
5304 static void gfx_v9_0_get_clockgating_state(void *handle, u64 *flags)
5305 {
5306         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5307         int data;
5308
5309         if (amdgpu_sriov_vf(adev))
5310                 *flags = 0;
5311
5312         /* AMD_CG_SUPPORT_GFX_MGCG */
5313         data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE));
5314         if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK))
5315                 *flags |= AMD_CG_SUPPORT_GFX_MGCG;
5316
5317         /* AMD_CG_SUPPORT_GFX_CGCG */
5318         data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL));
5319         if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
5320                 *flags |= AMD_CG_SUPPORT_GFX_CGCG;
5321
5322         /* AMD_CG_SUPPORT_GFX_CGLS */
5323         if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
5324                 *flags |= AMD_CG_SUPPORT_GFX_CGLS;
5325
5326         /* AMD_CG_SUPPORT_GFX_RLC_LS */
5327         data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_MEM_SLP_CNTL));
5328         if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK)
5329                 *flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS;
5330
5331         /* AMD_CG_SUPPORT_GFX_CP_LS */
5332         data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmCP_MEM_SLP_CNTL));
5333         if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK)
5334                 *flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS;
5335
5336         if (amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 4, 1)) {
5337                 /* AMD_CG_SUPPORT_GFX_3D_CGCG */
5338                 data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D));
5339                 if (data & RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK)
5340                         *flags |= AMD_CG_SUPPORT_GFX_3D_CGCG;
5341
5342                 /* AMD_CG_SUPPORT_GFX_3D_CGLS */
5343                 if (data & RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK)
5344                         *flags |= AMD_CG_SUPPORT_GFX_3D_CGLS;
5345         }
5346 }
5347
5348 static u64 gfx_v9_0_ring_get_rptr_gfx(struct amdgpu_ring *ring)
5349 {
5350         return *ring->rptr_cpu_addr; /* gfx9 is 32bit rptr*/
5351 }
5352
5353 static u64 gfx_v9_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
5354 {
5355         struct amdgpu_device *adev = ring->adev;
5356         u64 wptr;
5357
5358         /* XXX check if swapping is necessary on BE */
5359         if (ring->use_doorbell) {
5360                 wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr);
5361         } else {
5362                 wptr = RREG32_SOC15(GC, 0, mmCP_RB0_WPTR);
5363                 wptr += (u64)RREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI) << 32;
5364         }
5365
5366         return wptr;
5367 }
5368
5369 static void gfx_v9_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
5370 {
5371         struct amdgpu_device *adev = ring->adev;
5372
5373         if (ring->use_doorbell) {
5374                 /* XXX check if swapping is necessary on BE */
5375                 atomic64_set((atomic64_t *)ring->wptr_cpu_addr, ring->wptr);
5376                 WDOORBELL64(ring->doorbell_index, ring->wptr);
5377         } else {
5378                 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
5379                 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
5380         }
5381 }
5382
5383 static void gfx_v9_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
5384 {
5385         struct amdgpu_device *adev = ring->adev;
5386         u32 ref_and_mask, reg_mem_engine;
5387         const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio.hdp_flush_reg;
5388
5389         if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
5390                 switch (ring->me) {
5391                 case 1:
5392                         ref_and_mask = nbio_hf_reg->ref_and_mask_cp2 << ring->pipe;
5393                         break;
5394                 case 2:
5395                         ref_and_mask = nbio_hf_reg->ref_and_mask_cp6 << ring->pipe;
5396                         break;
5397                 default:
5398                         return;
5399                 }
5400                 reg_mem_engine = 0;
5401         } else {
5402                 ref_and_mask = nbio_hf_reg->ref_and_mask_cp0;
5403                 reg_mem_engine = 1; /* pfp */
5404         }
5405
5406         gfx_v9_0_wait_reg_mem(ring, reg_mem_engine, 0, 1,
5407                               adev->nbio.funcs->get_hdp_flush_req_offset(adev),
5408                               adev->nbio.funcs->get_hdp_flush_done_offset(adev),
5409                               ref_and_mask, ref_and_mask, 0x20);
5410 }
5411
5412 static void gfx_v9_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
5413                                         struct amdgpu_job *job,
5414                                         struct amdgpu_ib *ib,
5415                                         uint32_t flags)
5416 {
5417         unsigned vmid = AMDGPU_JOB_GET_VMID(job);
5418         u32 header, control = 0;
5419
5420         if (ib->flags & AMDGPU_IB_FLAG_CE)
5421                 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
5422         else
5423                 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
5424
5425         control |= ib->length_dw | (vmid << 24);
5426
5427         if (ib->flags & AMDGPU_IB_FLAG_PREEMPT) {
5428                 control |= INDIRECT_BUFFER_PRE_ENB(1);
5429
5430                 if (flags & AMDGPU_IB_PREEMPTED)
5431                         control |= INDIRECT_BUFFER_PRE_RESUME(1);
5432
5433                 if (!(ib->flags & AMDGPU_IB_FLAG_CE) && vmid)
5434                         gfx_v9_0_ring_emit_de_meta(ring,
5435                                                    (!amdgpu_sriov_vf(ring->adev) &&
5436                                                    flags & AMDGPU_IB_PREEMPTED) ?
5437                                                    true : false,
5438                                                    job->gds_size > 0 && job->gds_base != 0);
5439         }
5440
5441         amdgpu_ring_write(ring, header);
5442         BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
5443         amdgpu_ring_write(ring,
5444 #ifdef __BIG_ENDIAN
5445                 (2 << 0) |
5446 #endif
5447                 lower_32_bits(ib->gpu_addr));
5448         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
5449         amdgpu_ring_ib_on_emit_cntl(ring);
5450         amdgpu_ring_write(ring, control);
5451 }
5452
5453 static void gfx_v9_0_ring_patch_cntl(struct amdgpu_ring *ring,
5454                                      unsigned offset)
5455 {
5456         u32 control = ring->ring[offset];
5457
5458         control |= INDIRECT_BUFFER_PRE_RESUME(1);
5459         ring->ring[offset] = control;
5460 }
5461
5462 static void gfx_v9_0_ring_patch_ce_meta(struct amdgpu_ring *ring,
5463                                         unsigned offset)
5464 {
5465         struct amdgpu_device *adev = ring->adev;
5466         void *ce_payload_cpu_addr;
5467         uint64_t payload_offset, payload_size;
5468
5469         payload_size = sizeof(struct v9_ce_ib_state);
5470
5471         if (ring->is_mes_queue) {
5472                 payload_offset = offsetof(struct amdgpu_mes_ctx_meta_data,
5473                                           gfx[0].gfx_meta_data) +
5474                         offsetof(struct v9_gfx_meta_data, ce_payload);
5475                 ce_payload_cpu_addr =
5476                         amdgpu_mes_ctx_get_offs_cpu_addr(ring, payload_offset);
5477         } else {
5478                 payload_offset = offsetof(struct v9_gfx_meta_data, ce_payload);
5479                 ce_payload_cpu_addr = adev->virt.csa_cpu_addr + payload_offset;
5480         }
5481
5482         if (offset + (payload_size >> 2) <= ring->buf_mask + 1) {
5483                 memcpy((void *)&ring->ring[offset], ce_payload_cpu_addr, payload_size);
5484         } else {
5485                 memcpy((void *)&ring->ring[offset], ce_payload_cpu_addr,
5486                        (ring->buf_mask + 1 - offset) << 2);
5487                 payload_size -= (ring->buf_mask + 1 - offset) << 2;
5488                 memcpy((void *)&ring->ring[0],
5489                        ce_payload_cpu_addr + ((ring->buf_mask + 1 - offset) << 2),
5490                        payload_size);
5491         }
5492 }
5493
5494 static void gfx_v9_0_ring_patch_de_meta(struct amdgpu_ring *ring,
5495                                         unsigned offset)
5496 {
5497         struct amdgpu_device *adev = ring->adev;
5498         void *de_payload_cpu_addr;
5499         uint64_t payload_offset, payload_size;
5500
5501         payload_size = sizeof(struct v9_de_ib_state);
5502
5503         if (ring->is_mes_queue) {
5504                 payload_offset = offsetof(struct amdgpu_mes_ctx_meta_data,
5505                                           gfx[0].gfx_meta_data) +
5506                         offsetof(struct v9_gfx_meta_data, de_payload);
5507                 de_payload_cpu_addr =
5508                         amdgpu_mes_ctx_get_offs_cpu_addr(ring, payload_offset);
5509         } else {
5510                 payload_offset = offsetof(struct v9_gfx_meta_data, de_payload);
5511                 de_payload_cpu_addr = adev->virt.csa_cpu_addr + payload_offset;
5512         }
5513
5514         ((struct v9_de_ib_state *)de_payload_cpu_addr)->ib_completion_status =
5515                 IB_COMPLETION_STATUS_PREEMPTED;
5516
5517         if (offset + (payload_size >> 2) <= ring->buf_mask + 1) {
5518                 memcpy((void *)&ring->ring[offset], de_payload_cpu_addr, payload_size);
5519         } else {
5520                 memcpy((void *)&ring->ring[offset], de_payload_cpu_addr,
5521                        (ring->buf_mask + 1 - offset) << 2);
5522                 payload_size -= (ring->buf_mask + 1 - offset) << 2;
5523                 memcpy((void *)&ring->ring[0],
5524                        de_payload_cpu_addr + ((ring->buf_mask + 1 - offset) << 2),
5525                        payload_size);
5526         }
5527 }
5528
5529 static void gfx_v9_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
5530                                           struct amdgpu_job *job,
5531                                           struct amdgpu_ib *ib,
5532                                           uint32_t flags)
5533 {
5534         unsigned vmid = AMDGPU_JOB_GET_VMID(job);
5535         u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
5536
5537         /* Currently, there is a high possibility to get wave ID mismatch
5538          * between ME and GDS, leading to a hw deadlock, because ME generates
5539          * different wave IDs than the GDS expects. This situation happens
5540          * randomly when at least 5 compute pipes use GDS ordered append.
5541          * The wave IDs generated by ME are also wrong after suspend/resume.
5542          * Those are probably bugs somewhere else in the kernel driver.
5543          *
5544          * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and
5545          * GDS to 0 for this ring (me/pipe).
5546          */
5547         if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) {
5548                 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
5549                 amdgpu_ring_write(ring, mmGDS_COMPUTE_MAX_WAVE_ID);
5550                 amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id);
5551         }
5552
5553         amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
5554         BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
5555         amdgpu_ring_write(ring,
5556 #ifdef __BIG_ENDIAN
5557                                 (2 << 0) |
5558 #endif
5559                                 lower_32_bits(ib->gpu_addr));
5560         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
5561         amdgpu_ring_write(ring, control);
5562 }
5563
5564 static void gfx_v9_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
5565                                      u64 seq, unsigned flags)
5566 {
5567         bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
5568         bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
5569         bool writeback = flags & AMDGPU_FENCE_FLAG_TC_WB_ONLY;
5570         bool exec = flags & AMDGPU_FENCE_FLAG_EXEC;
5571         uint32_t dw2 = 0;
5572
5573         /* RELEASE_MEM - flush caches, send int */
5574         amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6));
5575
5576         if (writeback) {
5577                 dw2 = EOP_TC_NC_ACTION_EN;
5578         } else {
5579                 dw2 = EOP_TCL1_ACTION_EN | EOP_TC_ACTION_EN |
5580                                 EOP_TC_MD_ACTION_EN;
5581         }
5582         dw2 |= EOP_TC_WB_ACTION_EN | EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
5583                                 EVENT_INDEX(5);
5584         if (exec)
5585                 dw2 |= EOP_EXEC;
5586
5587         amdgpu_ring_write(ring, dw2);
5588         amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
5589
5590         /*
5591          * the address should be Qword aligned if 64bit write, Dword
5592          * aligned if only send 32bit data low (discard data high)
5593          */
5594         if (write64bit)
5595                 BUG_ON(addr & 0x7);
5596         else
5597                 BUG_ON(addr & 0x3);
5598         amdgpu_ring_write(ring, lower_32_bits(addr));
5599         amdgpu_ring_write(ring, upper_32_bits(addr));
5600         amdgpu_ring_write(ring, lower_32_bits(seq));
5601         amdgpu_ring_write(ring, upper_32_bits(seq));
5602         amdgpu_ring_write(ring, 0);
5603 }
5604
5605 static void gfx_v9_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
5606 {
5607         int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
5608         uint32_t seq = ring->fence_drv.sync_seq;
5609         uint64_t addr = ring->fence_drv.gpu_addr;
5610
5611         gfx_v9_0_wait_reg_mem(ring, usepfp, 1, 0,
5612                               lower_32_bits(addr), upper_32_bits(addr),
5613                               seq, 0xffffffff, 4);
5614 }
5615
5616 static void gfx_v9_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
5617                                         unsigned vmid, uint64_t pd_addr)
5618 {
5619         amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
5620
5621         /* compute doesn't have PFP */
5622         if (ring->funcs->type == AMDGPU_RING_TYPE_GFX) {
5623                 /* sync PFP to ME, otherwise we might get invalid PFP reads */
5624                 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
5625                 amdgpu_ring_write(ring, 0x0);
5626         }
5627 }
5628
5629 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
5630 {
5631         return *ring->rptr_cpu_addr; /* gfx9 hardware is 32bit rptr */
5632 }
5633
5634 static u64 gfx_v9_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
5635 {
5636         u64 wptr;
5637
5638         /* XXX check if swapping is necessary on BE */
5639         if (ring->use_doorbell)
5640                 wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr);
5641         else
5642                 BUG();
5643         return wptr;
5644 }
5645
5646 static void gfx_v9_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
5647 {
5648         struct amdgpu_device *adev = ring->adev;
5649
5650         /* XXX check if swapping is necessary on BE */
5651         if (ring->use_doorbell) {
5652                 atomic64_set((atomic64_t *)ring->wptr_cpu_addr, ring->wptr);
5653                 WDOORBELL64(ring->doorbell_index, ring->wptr);
5654         } else{
5655                 BUG(); /* only DOORBELL method supported on gfx9 now */
5656         }
5657 }
5658
5659 static void gfx_v9_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
5660                                          u64 seq, unsigned int flags)
5661 {
5662         struct amdgpu_device *adev = ring->adev;
5663
5664         /* we only allocate 32bit for each seq wb address */
5665         BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
5666
5667         /* write fence seq to the "addr" */
5668         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5669         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5670                                  WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
5671         amdgpu_ring_write(ring, lower_32_bits(addr));
5672         amdgpu_ring_write(ring, upper_32_bits(addr));
5673         amdgpu_ring_write(ring, lower_32_bits(seq));
5674
5675         if (flags & AMDGPU_FENCE_FLAG_INT) {
5676                 /* set register to trigger INT */
5677                 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5678                 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5679                                          WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
5680                 amdgpu_ring_write(ring, SOC15_REG_OFFSET(GC, 0, mmCPC_INT_STATUS));
5681                 amdgpu_ring_write(ring, 0);
5682                 amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
5683         }
5684 }
5685
5686 static void gfx_v9_ring_emit_sb(struct amdgpu_ring *ring)
5687 {
5688         amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
5689         amdgpu_ring_write(ring, 0);
5690 }
5691
5692 static void gfx_v9_0_ring_emit_ce_meta(struct amdgpu_ring *ring, bool resume)
5693 {
5694         struct amdgpu_device *adev = ring->adev;
5695         struct v9_ce_ib_state ce_payload = {0};
5696         uint64_t offset, ce_payload_gpu_addr;
5697         void *ce_payload_cpu_addr;
5698         int cnt;
5699
5700         cnt = (sizeof(ce_payload) >> 2) + 4 - 2;
5701
5702         if (ring->is_mes_queue) {
5703                 offset = offsetof(struct amdgpu_mes_ctx_meta_data,
5704                                   gfx[0].gfx_meta_data) +
5705                         offsetof(struct v9_gfx_meta_data, ce_payload);
5706                 ce_payload_gpu_addr =
5707                         amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
5708                 ce_payload_cpu_addr =
5709                         amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset);
5710         } else {
5711                 offset = offsetof(struct v9_gfx_meta_data, ce_payload);
5712                 ce_payload_gpu_addr = amdgpu_csa_vaddr(ring->adev) + offset;
5713                 ce_payload_cpu_addr = adev->virt.csa_cpu_addr + offset;
5714         }
5715
5716         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
5717         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
5718                                  WRITE_DATA_DST_SEL(8) |
5719                                  WR_CONFIRM) |
5720                                  WRITE_DATA_CACHE_POLICY(0));
5721         amdgpu_ring_write(ring, lower_32_bits(ce_payload_gpu_addr));
5722         amdgpu_ring_write(ring, upper_32_bits(ce_payload_gpu_addr));
5723
5724         amdgpu_ring_ib_on_emit_ce(ring);
5725
5726         if (resume)
5727                 amdgpu_ring_write_multiple(ring, ce_payload_cpu_addr,
5728                                            sizeof(ce_payload) >> 2);
5729         else
5730                 amdgpu_ring_write_multiple(ring, (void *)&ce_payload,
5731                                            sizeof(ce_payload) >> 2);
5732 }
5733
5734 static int gfx_v9_0_ring_preempt_ib(struct amdgpu_ring *ring)
5735 {
5736         int i, r = 0;
5737         struct amdgpu_device *adev = ring->adev;
5738         struct amdgpu_kiq *kiq = &adev->gfx.kiq[0];
5739         struct amdgpu_ring *kiq_ring = &kiq->ring;
5740         unsigned long flags;
5741
5742         if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues)
5743                 return -EINVAL;
5744
5745         spin_lock_irqsave(&kiq->ring_lock, flags);
5746
5747         if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size)) {
5748                 spin_unlock_irqrestore(&kiq->ring_lock, flags);
5749                 return -ENOMEM;
5750         }
5751
5752         /* assert preemption condition */
5753         amdgpu_ring_set_preempt_cond_exec(ring, false);
5754
5755         ring->trail_seq += 1;
5756         amdgpu_ring_alloc(ring, 13);
5757         gfx_v9_0_ring_emit_fence(ring, ring->trail_fence_gpu_addr,
5758                                  ring->trail_seq, AMDGPU_FENCE_FLAG_EXEC | AMDGPU_FENCE_FLAG_INT);
5759
5760         /* assert IB preemption, emit the trailing fence */
5761         kiq->pmf->kiq_unmap_queues(kiq_ring, ring, PREEMPT_QUEUES_NO_UNMAP,
5762                                    ring->trail_fence_gpu_addr,
5763                                    ring->trail_seq);
5764
5765         amdgpu_ring_commit(kiq_ring);
5766         spin_unlock_irqrestore(&kiq->ring_lock, flags);
5767
5768         /* poll the trailing fence */
5769         for (i = 0; i < adev->usec_timeout; i++) {
5770                 if (ring->trail_seq ==
5771                         le32_to_cpu(*ring->trail_fence_cpu_addr))
5772                         break;
5773                 udelay(1);
5774         }
5775
5776         if (i >= adev->usec_timeout) {
5777                 r = -EINVAL;
5778                 DRM_WARN("ring %d timeout to preempt ib\n", ring->idx);
5779         }
5780
5781         /*reset the CP_VMID_PREEMPT after trailing fence*/
5782         amdgpu_ring_emit_wreg(ring,
5783                               SOC15_REG_OFFSET(GC, 0, mmCP_VMID_PREEMPT),
5784                               0x0);
5785         amdgpu_ring_commit(ring);
5786
5787         /* deassert preemption condition */
5788         amdgpu_ring_set_preempt_cond_exec(ring, true);
5789         return r;
5790 }
5791
5792 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume, bool usegds)
5793 {
5794         struct amdgpu_device *adev = ring->adev;
5795         struct v9_de_ib_state de_payload = {0};
5796         uint64_t offset, gds_addr, de_payload_gpu_addr;
5797         void *de_payload_cpu_addr;
5798         int cnt;
5799
5800         if (ring->is_mes_queue) {
5801                 offset = offsetof(struct amdgpu_mes_ctx_meta_data,
5802                                   gfx[0].gfx_meta_data) +
5803                         offsetof(struct v9_gfx_meta_data, de_payload);
5804                 de_payload_gpu_addr =
5805                         amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
5806                 de_payload_cpu_addr =
5807                         amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset);
5808
5809                 offset = offsetof(struct amdgpu_mes_ctx_meta_data,
5810                                   gfx[0].gds_backup) +
5811                         offsetof(struct v9_gfx_meta_data, de_payload);
5812                 gds_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
5813         } else {
5814                 offset = offsetof(struct v9_gfx_meta_data, de_payload);
5815                 de_payload_gpu_addr = amdgpu_csa_vaddr(ring->adev) + offset;
5816                 de_payload_cpu_addr = adev->virt.csa_cpu_addr + offset;
5817
5818                 gds_addr = ALIGN(amdgpu_csa_vaddr(ring->adev) +
5819                                  AMDGPU_CSA_SIZE - adev->gds.gds_size,
5820                                  PAGE_SIZE);
5821         }
5822
5823         if (usegds) {
5824                 de_payload.gds_backup_addrlo = lower_32_bits(gds_addr);
5825                 de_payload.gds_backup_addrhi = upper_32_bits(gds_addr);
5826         }
5827
5828         cnt = (sizeof(de_payload) >> 2) + 4 - 2;
5829         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
5830         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
5831                                  WRITE_DATA_DST_SEL(8) |
5832                                  WR_CONFIRM) |
5833                                  WRITE_DATA_CACHE_POLICY(0));
5834         amdgpu_ring_write(ring, lower_32_bits(de_payload_gpu_addr));
5835         amdgpu_ring_write(ring, upper_32_bits(de_payload_gpu_addr));
5836
5837         amdgpu_ring_ib_on_emit_de(ring);
5838         if (resume)
5839                 amdgpu_ring_write_multiple(ring, de_payload_cpu_addr,
5840                                            sizeof(de_payload) >> 2);
5841         else
5842                 amdgpu_ring_write_multiple(ring, (void *)&de_payload,
5843                                            sizeof(de_payload) >> 2);
5844 }
5845
5846 static void gfx_v9_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start,
5847                                    bool secure)
5848 {
5849         uint32_t v = secure ? FRAME_TMZ : 0;
5850
5851         amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0));
5852         amdgpu_ring_write(ring, v | FRAME_CMD(start ? 0 : 1));
5853 }
5854
5855 static void gfx_v9_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
5856 {
5857         uint32_t dw2 = 0;
5858
5859         gfx_v9_0_ring_emit_ce_meta(ring,
5860                                    (!amdgpu_sriov_vf(ring->adev) &&
5861                                    flags & AMDGPU_IB_PREEMPTED) ? true : false);
5862
5863         dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
5864         if (flags & AMDGPU_HAVE_CTX_SWITCH) {
5865                 /* set load_global_config & load_global_uconfig */
5866                 dw2 |= 0x8001;
5867                 /* set load_cs_sh_regs */
5868                 dw2 |= 0x01000000;
5869                 /* set load_per_context_state & load_gfx_sh_regs for GFX */
5870                 dw2 |= 0x10002;
5871
5872                 /* set load_ce_ram if preamble presented */
5873                 if (AMDGPU_PREAMBLE_IB_PRESENT & flags)
5874                         dw2 |= 0x10000000;
5875         } else {
5876                 /* still load_ce_ram if this is the first time preamble presented
5877                  * although there is no context switch happens.
5878                  */
5879                 if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags)
5880                         dw2 |= 0x10000000;
5881         }
5882
5883         amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
5884         amdgpu_ring_write(ring, dw2);
5885         amdgpu_ring_write(ring, 0);
5886 }
5887
5888 static unsigned gfx_v9_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring,
5889                                                   uint64_t addr)
5890 {
5891         unsigned ret;
5892         amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
5893         amdgpu_ring_write(ring, lower_32_bits(addr));
5894         amdgpu_ring_write(ring, upper_32_bits(addr));
5895         /* discard following DWs if *cond_exec_gpu_addr==0 */
5896         amdgpu_ring_write(ring, 0);
5897         ret = ring->wptr & ring->buf_mask;
5898         /* patch dummy value later */
5899         amdgpu_ring_write(ring, 0);
5900         return ret;
5901 }
5902
5903 static void gfx_v9_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg,
5904                                     uint32_t reg_val_offs)
5905 {
5906         struct amdgpu_device *adev = ring->adev;
5907
5908         amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
5909         amdgpu_ring_write(ring, 0 |     /* src: register*/
5910                                 (5 << 8) |      /* dst: memory */
5911                                 (1 << 20));     /* write confirm */
5912         amdgpu_ring_write(ring, reg);
5913         amdgpu_ring_write(ring, 0);
5914         amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
5915                                 reg_val_offs * 4));
5916         amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
5917                                 reg_val_offs * 4));
5918 }
5919
5920 static void gfx_v9_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
5921                                     uint32_t val)
5922 {
5923         uint32_t cmd = 0;
5924
5925         switch (ring->funcs->type) {
5926         case AMDGPU_RING_TYPE_GFX:
5927                 cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM;
5928                 break;
5929         case AMDGPU_RING_TYPE_KIQ:
5930                 cmd = (1 << 16); /* no inc addr */
5931                 break;
5932         default:
5933                 cmd = WR_CONFIRM;
5934                 break;
5935         }
5936         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5937         amdgpu_ring_write(ring, cmd);
5938         amdgpu_ring_write(ring, reg);
5939         amdgpu_ring_write(ring, 0);
5940         amdgpu_ring_write(ring, val);
5941 }
5942
5943 static void gfx_v9_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
5944                                         uint32_t val, uint32_t mask)
5945 {
5946         gfx_v9_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20);
5947 }
5948
5949 static void gfx_v9_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
5950                                                   uint32_t reg0, uint32_t reg1,
5951                                                   uint32_t ref, uint32_t mask)
5952 {
5953         int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
5954         struct amdgpu_device *adev = ring->adev;
5955         bool fw_version_ok = (ring->funcs->type == AMDGPU_RING_TYPE_GFX) ?
5956                 adev->gfx.me_fw_write_wait : adev->gfx.mec_fw_write_wait;
5957
5958         if (fw_version_ok)
5959                 gfx_v9_0_wait_reg_mem(ring, usepfp, 0, 1, reg0, reg1,
5960                                       ref, mask, 0x20);
5961         else
5962                 amdgpu_ring_emit_reg_write_reg_wait_helper(ring, reg0, reg1,
5963                                                            ref, mask);
5964 }
5965
5966 static void gfx_v9_0_ring_soft_recovery(struct amdgpu_ring *ring, unsigned vmid)
5967 {
5968         struct amdgpu_device *adev = ring->adev;
5969         uint32_t value = 0;
5970
5971         value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03);
5972         value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01);
5973         value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1);
5974         value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid);
5975         amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
5976         WREG32_SOC15(GC, 0, mmSQ_CMD, value);
5977         amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
5978 }
5979
5980 static void gfx_v9_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
5981                                                  enum amdgpu_interrupt_state state)
5982 {
5983         switch (state) {
5984         case AMDGPU_IRQ_STATE_DISABLE:
5985         case AMDGPU_IRQ_STATE_ENABLE:
5986                 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5987                                TIME_STAMP_INT_ENABLE,
5988                                state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
5989                 break;
5990         default:
5991                 break;
5992         }
5993 }
5994
5995 static void gfx_v9_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
5996                                                      int me, int pipe,
5997                                                      enum amdgpu_interrupt_state state)
5998 {
5999         u32 mec_int_cntl, mec_int_cntl_reg;
6000
6001         /*
6002          * amdgpu controls only the first MEC. That's why this function only
6003          * handles the setting of interrupts for this specific MEC. All other
6004          * pipes' interrupts are set by amdkfd.
6005          */
6006
6007         if (me == 1) {
6008                 switch (pipe) {
6009                 case 0:
6010                         mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE0_INT_CNTL);
6011                         break;
6012                 case 1:
6013                         mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE1_INT_CNTL);
6014                         break;
6015                 case 2:
6016                         mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE2_INT_CNTL);
6017                         break;
6018                 case 3:
6019                         mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE3_INT_CNTL);
6020                         break;
6021                 default:
6022                         DRM_DEBUG("invalid pipe %d\n", pipe);
6023                         return;
6024                 }
6025         } else {
6026                 DRM_DEBUG("invalid me %d\n", me);
6027                 return;
6028         }
6029
6030         switch (state) {
6031         case AMDGPU_IRQ_STATE_DISABLE:
6032                 mec_int_cntl = RREG32_SOC15_IP(GC,mec_int_cntl_reg);
6033                 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
6034                                              TIME_STAMP_INT_ENABLE, 0);
6035                 WREG32_SOC15_IP(GC, mec_int_cntl_reg, mec_int_cntl);
6036                 break;
6037         case AMDGPU_IRQ_STATE_ENABLE:
6038                 mec_int_cntl = RREG32_SOC15_IP(GC, mec_int_cntl_reg);
6039                 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
6040                                              TIME_STAMP_INT_ENABLE, 1);
6041                 WREG32_SOC15_IP(GC, mec_int_cntl_reg, mec_int_cntl);
6042                 break;
6043         default:
6044                 break;
6045         }
6046 }
6047
6048 static u32 gfx_v9_0_get_cpc_int_cntl(struct amdgpu_device *adev,
6049                                      int me, int pipe)
6050 {
6051         /*
6052          * amdgpu controls only the first MEC. That's why this function only
6053          * handles the setting of interrupts for this specific MEC. All other
6054          * pipes' interrupts are set by amdkfd.
6055          */
6056         if (me != 1)
6057                 return 0;
6058
6059         switch (pipe) {
6060         case 0:
6061                 return SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE0_INT_CNTL);
6062         case 1:
6063                 return SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE1_INT_CNTL);
6064         case 2:
6065                 return SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE2_INT_CNTL);
6066         case 3:
6067                 return SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE3_INT_CNTL);
6068         default:
6069                 return 0;
6070         }
6071 }
6072
6073 static int gfx_v9_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
6074                                              struct amdgpu_irq_src *source,
6075                                              unsigned type,
6076                                              enum amdgpu_interrupt_state state)
6077 {
6078         u32 cp_int_cntl_reg, cp_int_cntl;
6079         int i, j;
6080
6081         switch (state) {
6082         case AMDGPU_IRQ_STATE_DISABLE:
6083         case AMDGPU_IRQ_STATE_ENABLE:
6084                 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
6085                                PRIV_REG_INT_ENABLE,
6086                                state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
6087                 for (i = 0; i < adev->gfx.mec.num_mec; i++) {
6088                         for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) {
6089                                 /* MECs start at 1 */
6090                                 cp_int_cntl_reg = gfx_v9_0_get_cpc_int_cntl(adev, i + 1, j);
6091
6092                                 if (cp_int_cntl_reg) {
6093                                         cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg);
6094                                         cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_ME1_PIPE0_INT_CNTL,
6095                                                                     PRIV_REG_INT_ENABLE,
6096                                                                     state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
6097                                         WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl);
6098                                 }
6099                         }
6100                 }
6101                 break;
6102         default:
6103                 break;
6104         }
6105
6106         return 0;
6107 }
6108
6109 static int gfx_v9_0_set_bad_op_fault_state(struct amdgpu_device *adev,
6110                                            struct amdgpu_irq_src *source,
6111                                            unsigned type,
6112                                            enum amdgpu_interrupt_state state)
6113 {
6114         u32 cp_int_cntl_reg, cp_int_cntl;
6115         int i, j;
6116
6117         switch (state) {
6118         case AMDGPU_IRQ_STATE_DISABLE:
6119         case AMDGPU_IRQ_STATE_ENABLE:
6120                 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
6121                                OPCODE_ERROR_INT_ENABLE,
6122                                state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
6123                 for (i = 0; i < adev->gfx.mec.num_mec; i++) {
6124                         for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) {
6125                                 /* MECs start at 1 */
6126                                 cp_int_cntl_reg = gfx_v9_0_get_cpc_int_cntl(adev, i + 1, j);
6127
6128                                 if (cp_int_cntl_reg) {
6129                                         cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg);
6130                                         cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_ME1_PIPE0_INT_CNTL,
6131                                                                     OPCODE_ERROR_INT_ENABLE,
6132                                                                     state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
6133                                         WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl);
6134                                 }
6135                         }
6136                 }
6137                 break;
6138         default:
6139                 break;
6140         }
6141
6142         return 0;
6143 }
6144
6145 static int gfx_v9_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
6146                                               struct amdgpu_irq_src *source,
6147                                               unsigned type,
6148                                               enum amdgpu_interrupt_state state)
6149 {
6150         switch (state) {
6151         case AMDGPU_IRQ_STATE_DISABLE:
6152         case AMDGPU_IRQ_STATE_ENABLE:
6153                 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
6154                                PRIV_INSTR_INT_ENABLE,
6155                                state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
6156                 break;
6157         default:
6158                 break;
6159         }
6160
6161         return 0;
6162 }
6163
6164 #define ENABLE_ECC_ON_ME_PIPE(me, pipe)                         \
6165         WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\
6166                         CP_ECC_ERROR_INT_ENABLE, 1)
6167
6168 #define DISABLE_ECC_ON_ME_PIPE(me, pipe)                        \
6169         WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\
6170                         CP_ECC_ERROR_INT_ENABLE, 0)
6171
6172 static int gfx_v9_0_set_cp_ecc_error_state(struct amdgpu_device *adev,
6173                                               struct amdgpu_irq_src *source,
6174                                               unsigned type,
6175                                               enum amdgpu_interrupt_state state)
6176 {
6177         switch (state) {
6178         case AMDGPU_IRQ_STATE_DISABLE:
6179                 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
6180                                 CP_ECC_ERROR_INT_ENABLE, 0);
6181                 DISABLE_ECC_ON_ME_PIPE(1, 0);
6182                 DISABLE_ECC_ON_ME_PIPE(1, 1);
6183                 DISABLE_ECC_ON_ME_PIPE(1, 2);
6184                 DISABLE_ECC_ON_ME_PIPE(1, 3);
6185                 break;
6186
6187         case AMDGPU_IRQ_STATE_ENABLE:
6188                 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
6189                                 CP_ECC_ERROR_INT_ENABLE, 1);
6190                 ENABLE_ECC_ON_ME_PIPE(1, 0);
6191                 ENABLE_ECC_ON_ME_PIPE(1, 1);
6192                 ENABLE_ECC_ON_ME_PIPE(1, 2);
6193                 ENABLE_ECC_ON_ME_PIPE(1, 3);
6194                 break;
6195         default:
6196                 break;
6197         }
6198
6199         return 0;
6200 }
6201
6202
6203 static int gfx_v9_0_set_eop_interrupt_state(struct amdgpu_device *adev,
6204                                             struct amdgpu_irq_src *src,
6205                                             unsigned type,
6206                                             enum amdgpu_interrupt_state state)
6207 {
6208         switch (type) {
6209         case AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP:
6210                 gfx_v9_0_set_gfx_eop_interrupt_state(adev, state);
6211                 break;
6212         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
6213                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
6214                 break;
6215         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
6216                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
6217                 break;
6218         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
6219                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
6220                 break;
6221         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
6222                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
6223                 break;
6224         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
6225                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
6226                 break;
6227         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
6228                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
6229                 break;
6230         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
6231                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
6232                 break;
6233         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
6234                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
6235                 break;
6236         default:
6237                 break;
6238         }
6239         return 0;
6240 }
6241
6242 static int gfx_v9_0_eop_irq(struct amdgpu_device *adev,
6243                             struct amdgpu_irq_src *source,
6244                             struct amdgpu_iv_entry *entry)
6245 {
6246         int i;
6247         u8 me_id, pipe_id, queue_id;
6248         struct amdgpu_ring *ring;
6249
6250         DRM_DEBUG("IH: CP EOP\n");
6251         me_id = (entry->ring_id & 0x0c) >> 2;
6252         pipe_id = (entry->ring_id & 0x03) >> 0;
6253         queue_id = (entry->ring_id & 0x70) >> 4;
6254
6255         switch (me_id) {
6256         case 0:
6257                 if (adev->gfx.num_gfx_rings) {
6258                         if (!adev->gfx.mcbp) {
6259                                 amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
6260                         } else if (!amdgpu_mcbp_handle_trailing_fence_irq(&adev->gfx.muxer)) {
6261                                 /* Fence signals are handled on the software rings*/
6262                                 for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++)
6263                                         amdgpu_fence_process(&adev->gfx.sw_gfx_ring[i]);
6264                         }
6265                 }
6266                 break;
6267         case 1:
6268         case 2:
6269                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
6270                         ring = &adev->gfx.compute_ring[i];
6271                         /* Per-queue interrupt is supported for MEC starting from VI.
6272                           * The interrupt can only be enabled/disabled per pipe instead of per queue.
6273                           */
6274                         if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
6275                                 amdgpu_fence_process(ring);
6276                 }
6277                 break;
6278         }
6279         return 0;
6280 }
6281
6282 static void gfx_v9_0_fault(struct amdgpu_device *adev,
6283                            struct amdgpu_iv_entry *entry)
6284 {
6285         u8 me_id, pipe_id, queue_id;
6286         struct amdgpu_ring *ring;
6287         int i;
6288
6289         me_id = (entry->ring_id & 0x0c) >> 2;
6290         pipe_id = (entry->ring_id & 0x03) >> 0;
6291         queue_id = (entry->ring_id & 0x70) >> 4;
6292
6293         switch (me_id) {
6294         case 0:
6295                 drm_sched_fault(&adev->gfx.gfx_ring[0].sched);
6296                 break;
6297         case 1:
6298         case 2:
6299                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
6300                         ring = &adev->gfx.compute_ring[i];
6301                         if (ring->me == me_id && ring->pipe == pipe_id &&
6302                             ring->queue == queue_id)
6303                                 drm_sched_fault(&ring->sched);
6304                 }
6305                 break;
6306         }
6307 }
6308
6309 static int gfx_v9_0_priv_reg_irq(struct amdgpu_device *adev,
6310                                  struct amdgpu_irq_src *source,
6311                                  struct amdgpu_iv_entry *entry)
6312 {
6313         DRM_ERROR("Illegal register access in command stream\n");
6314         gfx_v9_0_fault(adev, entry);
6315         return 0;
6316 }
6317
6318 static int gfx_v9_0_bad_op_irq(struct amdgpu_device *adev,
6319                                struct amdgpu_irq_src *source,
6320                                struct amdgpu_iv_entry *entry)
6321 {
6322         DRM_ERROR("Illegal opcode in command stream\n");
6323         gfx_v9_0_fault(adev, entry);
6324         return 0;
6325 }
6326
6327 static int gfx_v9_0_priv_inst_irq(struct amdgpu_device *adev,
6328                                   struct amdgpu_irq_src *source,
6329                                   struct amdgpu_iv_entry *entry)
6330 {
6331         DRM_ERROR("Illegal instruction in command stream\n");
6332         gfx_v9_0_fault(adev, entry);
6333         return 0;
6334 }
6335
6336
6337 static const struct soc15_ras_field_entry gfx_v9_0_ras_fields[] = {
6338         { "CPC_SCRATCH", SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT),
6339           SOC15_REG_FIELD(CPC_EDC_SCRATCH_CNT, SEC_COUNT),
6340           SOC15_REG_FIELD(CPC_EDC_SCRATCH_CNT, DED_COUNT)
6341         },
6342         { "CPC_UCODE", SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT),
6343           SOC15_REG_FIELD(CPC_EDC_UCODE_CNT, SEC_COUNT),
6344           SOC15_REG_FIELD(CPC_EDC_UCODE_CNT, DED_COUNT)
6345         },
6346         { "CPF_ROQ_ME1", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT),
6347           SOC15_REG_FIELD(CPF_EDC_ROQ_CNT, COUNT_ME1),
6348           0, 0
6349         },
6350         { "CPF_ROQ_ME2", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT),
6351           SOC15_REG_FIELD(CPF_EDC_ROQ_CNT, COUNT_ME2),
6352           0, 0
6353         },
6354         { "CPF_TAG", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT),
6355           SOC15_REG_FIELD(CPF_EDC_TAG_CNT, SEC_COUNT),
6356           SOC15_REG_FIELD(CPF_EDC_TAG_CNT, DED_COUNT)
6357         },
6358         { "CPG_DMA_ROQ", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT),
6359           SOC15_REG_FIELD(CPG_EDC_DMA_CNT, ROQ_COUNT),
6360           0, 0
6361         },
6362         { "CPG_DMA_TAG", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT),
6363           SOC15_REG_FIELD(CPG_EDC_DMA_CNT, TAG_SEC_COUNT),
6364           SOC15_REG_FIELD(CPG_EDC_DMA_CNT, TAG_DED_COUNT)
6365         },
6366         { "CPG_TAG", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT),
6367           SOC15_REG_FIELD(CPG_EDC_TAG_CNT, SEC_COUNT),
6368           SOC15_REG_FIELD(CPG_EDC_TAG_CNT, DED_COUNT)
6369         },
6370         { "DC_CSINVOC", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT),
6371           SOC15_REG_FIELD(DC_EDC_CSINVOC_CNT, COUNT_ME1),
6372           0, 0
6373         },
6374         { "DC_RESTORE", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT),
6375           SOC15_REG_FIELD(DC_EDC_RESTORE_CNT, COUNT_ME1),
6376           0, 0
6377         },
6378         { "DC_STATE", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT),
6379           SOC15_REG_FIELD(DC_EDC_STATE_CNT, COUNT_ME1),
6380           0, 0
6381         },
6382         { "GDS_MEM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT),
6383           SOC15_REG_FIELD(GDS_EDC_CNT, GDS_MEM_SEC),
6384           SOC15_REG_FIELD(GDS_EDC_CNT, GDS_MEM_DED)
6385         },
6386         { "GDS_INPUT_QUEUE", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT),
6387           SOC15_REG_FIELD(GDS_EDC_CNT, GDS_INPUT_QUEUE_SED),
6388           0, 0
6389         },
6390         { "GDS_ME0_CS_PIPE_MEM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT),
6391           SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, ME0_CS_PIPE_MEM_SEC),
6392           SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, ME0_CS_PIPE_MEM_DED)
6393         },
6394         { "GDS_OA_PHY_PHY_CMD_RAM_MEM",
6395           SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT),
6396           SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_CMD_RAM_MEM_SEC),
6397           SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_CMD_RAM_MEM_DED)
6398         },
6399         { "GDS_OA_PHY_PHY_DATA_RAM_MEM",
6400           SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT),
6401           SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_DATA_RAM_MEM_SED),
6402           0, 0
6403         },
6404         { "GDS_OA_PIPE_ME1_PIPE0_PIPE_MEM",
6405           SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
6406           SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE0_PIPE_MEM_SEC),
6407           SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE0_PIPE_MEM_DED)
6408         },
6409         { "GDS_OA_PIPE_ME1_PIPE1_PIPE_MEM",
6410           SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
6411           SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE1_PIPE_MEM_SEC),
6412           SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE1_PIPE_MEM_DED)
6413         },
6414         { "GDS_OA_PIPE_ME1_PIPE2_PIPE_MEM",
6415           SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
6416           SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE2_PIPE_MEM_SEC),
6417           SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE2_PIPE_MEM_DED)
6418         },
6419         { "GDS_OA_PIPE_ME1_PIPE3_PIPE_MEM",
6420           SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
6421           SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE3_PIPE_MEM_SEC),
6422           SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE3_PIPE_MEM_DED)
6423         },
6424         { "SPI_SR_MEM", SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT),
6425           SOC15_REG_FIELD(SPI_EDC_CNT, SPI_SR_MEM_SED_COUNT),
6426           0, 0
6427         },
6428         { "TA_FS_DFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
6429           SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_DFIFO_SEC_COUNT),
6430           SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_DFIFO_DED_COUNT)
6431         },
6432         { "TA_FS_AFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
6433           SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_AFIFO_SED_COUNT),
6434           0, 0
6435         },
6436         { "TA_FL_LFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
6437           SOC15_REG_FIELD(TA_EDC_CNT, TA_FL_LFIFO_SED_COUNT),
6438           0, 0
6439         },
6440         { "TA_FX_LFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
6441           SOC15_REG_FIELD(TA_EDC_CNT, TA_FX_LFIFO_SED_COUNT),
6442           0, 0
6443         },
6444         { "TA_FS_CFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
6445           SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_CFIFO_SED_COUNT),
6446           0, 0
6447         },
6448         { "TCA_HOLE_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT),
6449           SOC15_REG_FIELD(TCA_EDC_CNT, HOLE_FIFO_SED_COUNT),
6450           0, 0
6451         },
6452         { "TCA_REQ_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT),
6453           SOC15_REG_FIELD(TCA_EDC_CNT, REQ_FIFO_SED_COUNT),
6454           0, 0
6455         },
6456         { "TCC_CACHE_DATA", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6457           SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DATA_SEC_COUNT),
6458           SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DATA_DED_COUNT)
6459         },
6460         { "TCC_CACHE_DIRTY", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6461           SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DIRTY_SEC_COUNT),
6462           SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DIRTY_DED_COUNT)
6463         },
6464         { "TCC_HIGH_RATE_TAG", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6465           SOC15_REG_FIELD(TCC_EDC_CNT, HIGH_RATE_TAG_SEC_COUNT),
6466           SOC15_REG_FIELD(TCC_EDC_CNT, HIGH_RATE_TAG_DED_COUNT)
6467         },
6468         { "TCC_LOW_RATE_TAG", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6469           SOC15_REG_FIELD(TCC_EDC_CNT, LOW_RATE_TAG_SEC_COUNT),
6470           SOC15_REG_FIELD(TCC_EDC_CNT, LOW_RATE_TAG_DED_COUNT)
6471         },
6472         { "TCC_SRC_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6473           SOC15_REG_FIELD(TCC_EDC_CNT, SRC_FIFO_SEC_COUNT),
6474           SOC15_REG_FIELD(TCC_EDC_CNT, SRC_FIFO_DED_COUNT)
6475         },
6476         { "TCC_IN_USE_DEC", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6477           SOC15_REG_FIELD(TCC_EDC_CNT, IN_USE_DEC_SED_COUNT),
6478           0, 0
6479         },
6480         { "TCC_IN_USE_TRANSFER", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6481           SOC15_REG_FIELD(TCC_EDC_CNT, IN_USE_TRANSFER_SED_COUNT),
6482           0, 0
6483         },
6484         { "TCC_LATENCY_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6485           SOC15_REG_FIELD(TCC_EDC_CNT, LATENCY_FIFO_SED_COUNT),
6486           0, 0
6487         },
6488         { "TCC_RETURN_DATA", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6489           SOC15_REG_FIELD(TCC_EDC_CNT, RETURN_DATA_SED_COUNT),
6490           0, 0
6491         },
6492         { "TCC_RETURN_CONTROL", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6493           SOC15_REG_FIELD(TCC_EDC_CNT, RETURN_CONTROL_SED_COUNT),
6494           0, 0
6495         },
6496         { "TCC_UC_ATOMIC_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6497           SOC15_REG_FIELD(TCC_EDC_CNT, UC_ATOMIC_FIFO_SED_COUNT),
6498           0, 0
6499         },
6500         { "TCC_WRITE_RETURN", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6501           SOC15_REG_FIELD(TCC_EDC_CNT2, WRITE_RETURN_SED_COUNT),
6502           0, 0
6503         },
6504         { "TCC_WRITE_CACHE_READ", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6505           SOC15_REG_FIELD(TCC_EDC_CNT2, WRITE_CACHE_READ_SED_COUNT),
6506           0, 0
6507         },
6508         { "TCC_SRC_FIFO_NEXT_RAM", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6509           SOC15_REG_FIELD(TCC_EDC_CNT2, SRC_FIFO_NEXT_RAM_SED_COUNT),
6510           0, 0
6511         },
6512         { "TCC_LATENCY_FIFO_NEXT_RAM", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6513           SOC15_REG_FIELD(TCC_EDC_CNT2, LATENCY_FIFO_NEXT_RAM_SED_COUNT),
6514           0, 0
6515         },
6516         { "TCC_CACHE_TAG_PROBE_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6517           SOC15_REG_FIELD(TCC_EDC_CNT2, CACHE_TAG_PROBE_FIFO_SED_COUNT),
6518           0, 0
6519         },
6520         { "TCC_WRRET_TAG_WRITE_RETURN", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6521           SOC15_REG_FIELD(TCC_EDC_CNT2, WRRET_TAG_WRITE_RETURN_SED_COUNT),
6522           0, 0
6523         },
6524         { "TCC_ATOMIC_RETURN_BUFFER", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6525           SOC15_REG_FIELD(TCC_EDC_CNT2, ATOMIC_RETURN_BUFFER_SED_COUNT),
6526           0, 0
6527         },
6528         { "TCI_WRITE_RAM", SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT),
6529           SOC15_REG_FIELD(TCI_EDC_CNT, WRITE_RAM_SED_COUNT),
6530           0, 0
6531         },
6532         { "TCP_CACHE_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6533           SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CACHE_RAM_SEC_COUNT),
6534           SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CACHE_RAM_DED_COUNT)
6535         },
6536         { "TCP_LFIFO_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6537           SOC15_REG_FIELD(TCP_EDC_CNT_NEW, LFIFO_RAM_SEC_COUNT),
6538           SOC15_REG_FIELD(TCP_EDC_CNT_NEW, LFIFO_RAM_DED_COUNT)
6539         },
6540         { "TCP_CMD_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6541           SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CMD_FIFO_SED_COUNT),
6542           0, 0
6543         },
6544         { "TCP_VM_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6545           SOC15_REG_FIELD(TCP_EDC_CNT_NEW, VM_FIFO_SEC_COUNT),
6546           0, 0
6547         },
6548         { "TCP_DB_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6549           SOC15_REG_FIELD(TCP_EDC_CNT_NEW, DB_RAM_SED_COUNT),
6550           0, 0
6551         },
6552         { "TCP_UTCL1_LFIFO0", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6553           SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO0_SEC_COUNT),
6554           SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO0_DED_COUNT)
6555         },
6556         { "TCP_UTCL1_LFIFO1", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6557           SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO1_SEC_COUNT),
6558           SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO1_DED_COUNT)
6559         },
6560         { "TD_SS_FIFO_LO", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT),
6561           SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_LO_SEC_COUNT),
6562           SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_LO_DED_COUNT)
6563         },
6564         { "TD_SS_FIFO_HI", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT),
6565           SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_HI_SEC_COUNT),
6566           SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_HI_DED_COUNT)
6567         },
6568         { "TD_CS_FIFO", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT),
6569           SOC15_REG_FIELD(TD_EDC_CNT, CS_FIFO_SED_COUNT),
6570           0, 0
6571         },
6572         { "SQ_LDS_D", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6573           SOC15_REG_FIELD(SQ_EDC_CNT, LDS_D_SEC_COUNT),
6574           SOC15_REG_FIELD(SQ_EDC_CNT, LDS_D_DED_COUNT)
6575         },
6576         { "SQ_LDS_I", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6577           SOC15_REG_FIELD(SQ_EDC_CNT, LDS_I_SEC_COUNT),
6578           SOC15_REG_FIELD(SQ_EDC_CNT, LDS_I_DED_COUNT)
6579         },
6580         { "SQ_SGPR", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6581           SOC15_REG_FIELD(SQ_EDC_CNT, SGPR_SEC_COUNT),
6582           SOC15_REG_FIELD(SQ_EDC_CNT, SGPR_DED_COUNT)
6583         },
6584         { "SQ_VGPR0", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6585           SOC15_REG_FIELD(SQ_EDC_CNT, VGPR0_SEC_COUNT),
6586           SOC15_REG_FIELD(SQ_EDC_CNT, VGPR0_DED_COUNT)
6587         },
6588         { "SQ_VGPR1", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6589           SOC15_REG_FIELD(SQ_EDC_CNT, VGPR1_SEC_COUNT),
6590           SOC15_REG_FIELD(SQ_EDC_CNT, VGPR1_DED_COUNT)
6591         },
6592         { "SQ_VGPR2", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6593           SOC15_REG_FIELD(SQ_EDC_CNT, VGPR2_SEC_COUNT),
6594           SOC15_REG_FIELD(SQ_EDC_CNT, VGPR2_DED_COUNT)
6595         },
6596         { "SQ_VGPR3", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6597           SOC15_REG_FIELD(SQ_EDC_CNT, VGPR3_SEC_COUNT),
6598           SOC15_REG_FIELD(SQ_EDC_CNT, VGPR3_DED_COUNT)
6599         },
6600         { "SQC_DATA_CU0_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6601           SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_WRITE_DATA_BUF_SEC_COUNT),
6602           SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_WRITE_DATA_BUF_DED_COUNT)
6603         },
6604         { "SQC_DATA_CU0_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6605           SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_UTCL1_LFIFO_SEC_COUNT),
6606           SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_UTCL1_LFIFO_DED_COUNT)
6607         },
6608         { "SQC_DATA_CU1_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6609           SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_WRITE_DATA_BUF_SEC_COUNT),
6610           SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_WRITE_DATA_BUF_DED_COUNT)
6611         },
6612         { "SQC_DATA_CU1_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6613           SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_UTCL1_LFIFO_SEC_COUNT),
6614           SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_UTCL1_LFIFO_DED_COUNT)
6615         },
6616         { "SQC_DATA_CU2_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6617           SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_WRITE_DATA_BUF_SEC_COUNT),
6618           SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_WRITE_DATA_BUF_DED_COUNT)
6619         },
6620         { "SQC_DATA_CU2_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6621           SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_UTCL1_LFIFO_SEC_COUNT),
6622           SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_UTCL1_LFIFO_DED_COUNT)
6623         },
6624         { "SQC_INST_BANKA_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6625           SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_TAG_RAM_SEC_COUNT),
6626           SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_TAG_RAM_DED_COUNT)
6627         },
6628         { "SQC_INST_BANKA_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6629           SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_BANK_RAM_SEC_COUNT),
6630           SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_BANK_RAM_DED_COUNT)
6631         },
6632         { "SQC_DATA_BANKA_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6633           SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_TAG_RAM_SEC_COUNT),
6634           SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_TAG_RAM_DED_COUNT)
6635         },
6636         { "SQC_DATA_BANKA_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6637           SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_BANK_RAM_SEC_COUNT),
6638           SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_BANK_RAM_DED_COUNT)
6639         },
6640         { "SQC_INST_BANKA_UTCL1_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6641           SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_UTCL1_MISS_FIFO_SED_COUNT),
6642           0, 0
6643         },
6644         { "SQC_INST_BANKA_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6645           SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_MISS_FIFO_SED_COUNT),
6646           0, 0
6647         },
6648         { "SQC_DATA_BANKA_HIT_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6649           SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_HIT_FIFO_SED_COUNT),
6650           0, 0
6651         },
6652         { "SQC_DATA_BANKA_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6653           SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_MISS_FIFO_SED_COUNT),
6654           0, 0
6655         },
6656         { "SQC_DATA_BANKA_DIRTY_BIT_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6657           SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_DIRTY_BIT_RAM_SED_COUNT),
6658           0, 0
6659         },
6660         { "SQC_INST_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6661           SOC15_REG_FIELD(SQC_EDC_CNT2, INST_UTCL1_LFIFO_SEC_COUNT),
6662           SOC15_REG_FIELD(SQC_EDC_CNT2, INST_UTCL1_LFIFO_DED_COUNT)
6663         },
6664         { "SQC_INST_BANKB_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6665           SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_TAG_RAM_SEC_COUNT),
6666           SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_TAG_RAM_DED_COUNT)
6667         },
6668         { "SQC_INST_BANKB_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6669           SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_BANK_RAM_SEC_COUNT),
6670           SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_BANK_RAM_DED_COUNT)
6671         },
6672         { "SQC_DATA_BANKB_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6673           SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_TAG_RAM_SEC_COUNT),
6674           SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_TAG_RAM_DED_COUNT)
6675         },
6676         { "SQC_DATA_BANKB_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6677           SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_BANK_RAM_SEC_COUNT),
6678           SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_BANK_RAM_DED_COUNT)
6679         },
6680         { "SQC_INST_BANKB_UTCL1_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6681           SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_UTCL1_MISS_FIFO_SED_COUNT),
6682           0, 0
6683         },
6684         { "SQC_INST_BANKB_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6685           SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_MISS_FIFO_SED_COUNT),
6686           0, 0
6687         },
6688         { "SQC_DATA_BANKB_HIT_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6689           SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_HIT_FIFO_SED_COUNT),
6690           0, 0
6691         },
6692         { "SQC_DATA_BANKB_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6693           SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_MISS_FIFO_SED_COUNT),
6694           0, 0
6695         },
6696         { "SQC_DATA_BANKB_DIRTY_BIT_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6697           SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_DIRTY_BIT_RAM_SED_COUNT),
6698           0, 0
6699         },
6700         { "EA_DRAMRD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6701           SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_CMDMEM_SEC_COUNT),
6702           SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_CMDMEM_DED_COUNT)
6703         },
6704         { "EA_DRAMWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6705           SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_CMDMEM_SEC_COUNT),
6706           SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_CMDMEM_DED_COUNT)
6707         },
6708         { "EA_DRAMWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6709           SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_DATAMEM_SEC_COUNT),
6710           SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_DATAMEM_DED_COUNT)
6711         },
6712         { "EA_RRET_TAGMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6713           SOC15_REG_FIELD(GCEA_EDC_CNT, RRET_TAGMEM_SEC_COUNT),
6714           SOC15_REG_FIELD(GCEA_EDC_CNT, RRET_TAGMEM_DED_COUNT)
6715         },
6716         { "EA_WRET_TAGMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6717           SOC15_REG_FIELD(GCEA_EDC_CNT, WRET_TAGMEM_SEC_COUNT),
6718           SOC15_REG_FIELD(GCEA_EDC_CNT, WRET_TAGMEM_DED_COUNT)
6719         },
6720         { "EA_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6721           SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_PAGEMEM_SED_COUNT),
6722           0, 0
6723         },
6724         { "EA_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6725           SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_PAGEMEM_SED_COUNT),
6726           0, 0
6727         },
6728         { "EA_IORD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6729           SOC15_REG_FIELD(GCEA_EDC_CNT, IORD_CMDMEM_SED_COUNT),
6730           0, 0
6731         },
6732         { "EA_IOWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6733           SOC15_REG_FIELD(GCEA_EDC_CNT, IOWR_CMDMEM_SED_COUNT),
6734           0, 0
6735         },
6736         { "EA_IOWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6737           SOC15_REG_FIELD(GCEA_EDC_CNT, IOWR_DATAMEM_SED_COUNT),
6738           0, 0
6739         },
6740         { "GMIRD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6741           SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_CMDMEM_SEC_COUNT),
6742           SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_CMDMEM_DED_COUNT)
6743         },
6744         { "GMIWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6745           SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_CMDMEM_SEC_COUNT),
6746           SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_CMDMEM_DED_COUNT)
6747         },
6748         { "GMIWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6749           SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_DATAMEM_SEC_COUNT),
6750           SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_DATAMEM_DED_COUNT)
6751         },
6752         { "GMIRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6753           SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_PAGEMEM_SED_COUNT),
6754           0, 0
6755         },
6756         { "GMIWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6757           SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_PAGEMEM_SED_COUNT),
6758           0, 0
6759         },
6760         { "MAM_D0MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6761           SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D0MEM_SED_COUNT),
6762           0, 0
6763         },
6764         { "MAM_D1MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6765           SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D1MEM_SED_COUNT),
6766           0, 0
6767         },
6768         { "MAM_D2MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6769           SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D2MEM_SED_COUNT),
6770           0, 0
6771         },
6772         { "MAM_D3MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6773           SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D3MEM_SED_COUNT),
6774           0, 0
6775         }
6776 };
6777
6778 static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev,
6779                                      void *inject_if, uint32_t instance_mask)
6780 {
6781         struct ras_inject_if *info = (struct ras_inject_if *)inject_if;
6782         int ret;
6783         struct ta_ras_trigger_error_input block_info = { 0 };
6784
6785         if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
6786                 return -EINVAL;
6787
6788         if (info->head.sub_block_index >= ARRAY_SIZE(ras_gfx_subblocks))
6789                 return -EINVAL;
6790
6791         if (!ras_gfx_subblocks[info->head.sub_block_index].name)
6792                 return -EPERM;
6793
6794         if (!(ras_gfx_subblocks[info->head.sub_block_index].hw_supported_error_type &
6795               info->head.type)) {
6796                 DRM_ERROR("GFX Subblock %s, hardware do not support type 0x%x\n",
6797                         ras_gfx_subblocks[info->head.sub_block_index].name,
6798                         info->head.type);
6799                 return -EPERM;
6800         }
6801
6802         if (!(ras_gfx_subblocks[info->head.sub_block_index].sw_supported_error_type &
6803               info->head.type)) {
6804                 DRM_ERROR("GFX Subblock %s, driver do not support type 0x%x\n",
6805                         ras_gfx_subblocks[info->head.sub_block_index].name,
6806                         info->head.type);
6807                 return -EPERM;
6808         }
6809
6810         block_info.block_id = amdgpu_ras_block_to_ta(info->head.block);
6811         block_info.sub_block_index =
6812                 ras_gfx_subblocks[info->head.sub_block_index].ta_subblock;
6813         block_info.inject_error_type = amdgpu_ras_error_to_ta(info->head.type);
6814         block_info.address = info->address;
6815         block_info.value = info->value;
6816
6817         mutex_lock(&adev->grbm_idx_mutex);
6818         ret = psp_ras_trigger_error(&adev->psp, &block_info, instance_mask);
6819         mutex_unlock(&adev->grbm_idx_mutex);
6820
6821         return ret;
6822 }
6823
6824 static const char * const vml2_mems[] = {
6825         "UTC_VML2_BANK_CACHE_0_BIGK_MEM0",
6826         "UTC_VML2_BANK_CACHE_0_BIGK_MEM1",
6827         "UTC_VML2_BANK_CACHE_0_4K_MEM0",
6828         "UTC_VML2_BANK_CACHE_0_4K_MEM1",
6829         "UTC_VML2_BANK_CACHE_1_BIGK_MEM0",
6830         "UTC_VML2_BANK_CACHE_1_BIGK_MEM1",
6831         "UTC_VML2_BANK_CACHE_1_4K_MEM0",
6832         "UTC_VML2_BANK_CACHE_1_4K_MEM1",
6833         "UTC_VML2_BANK_CACHE_2_BIGK_MEM0",
6834         "UTC_VML2_BANK_CACHE_2_BIGK_MEM1",
6835         "UTC_VML2_BANK_CACHE_2_4K_MEM0",
6836         "UTC_VML2_BANK_CACHE_2_4K_MEM1",
6837         "UTC_VML2_BANK_CACHE_3_BIGK_MEM0",
6838         "UTC_VML2_BANK_CACHE_3_BIGK_MEM1",
6839         "UTC_VML2_BANK_CACHE_3_4K_MEM0",
6840         "UTC_VML2_BANK_CACHE_3_4K_MEM1",
6841 };
6842
6843 static const char * const vml2_walker_mems[] = {
6844         "UTC_VML2_CACHE_PDE0_MEM0",
6845         "UTC_VML2_CACHE_PDE0_MEM1",
6846         "UTC_VML2_CACHE_PDE1_MEM0",
6847         "UTC_VML2_CACHE_PDE1_MEM1",
6848         "UTC_VML2_CACHE_PDE2_MEM0",
6849         "UTC_VML2_CACHE_PDE2_MEM1",
6850         "UTC_VML2_RDIF_LOG_FIFO",
6851 };
6852
6853 static const char * const atc_l2_cache_2m_mems[] = {
6854         "UTC_ATCL2_CACHE_2M_BANK0_WAY0_MEM",
6855         "UTC_ATCL2_CACHE_2M_BANK0_WAY1_MEM",
6856         "UTC_ATCL2_CACHE_2M_BANK1_WAY0_MEM",
6857         "UTC_ATCL2_CACHE_2M_BANK1_WAY1_MEM",
6858 };
6859
6860 static const char *atc_l2_cache_4k_mems[] = {
6861         "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM0",
6862         "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM1",
6863         "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM2",
6864         "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM3",
6865         "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM4",
6866         "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM5",
6867         "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM6",
6868         "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM7",
6869         "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM0",
6870         "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM1",
6871         "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM2",
6872         "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM3",
6873         "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM4",
6874         "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM5",
6875         "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM6",
6876         "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM7",
6877         "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM0",
6878         "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM1",
6879         "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM2",
6880         "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM3",
6881         "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM4",
6882         "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM5",
6883         "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM6",
6884         "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM7",
6885         "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM0",
6886         "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM1",
6887         "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM2",
6888         "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM3",
6889         "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM4",
6890         "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM5",
6891         "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM6",
6892         "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM7",
6893 };
6894
6895 static int gfx_v9_0_query_utc_edc_status(struct amdgpu_device *adev,
6896                                          struct ras_err_data *err_data)
6897 {
6898         uint32_t i, data;
6899         uint32_t sec_count, ded_count;
6900
6901         WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
6902         WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT, 0);
6903         WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
6904         WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT, 0);
6905         WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
6906         WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT, 0);
6907         WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
6908         WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT, 0);
6909
6910         for (i = 0; i < ARRAY_SIZE(vml2_mems); i++) {
6911                 WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, i);
6912                 data = RREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT);
6913
6914                 sec_count = REG_GET_FIELD(data, VM_L2_MEM_ECC_CNT, SEC_COUNT);
6915                 if (sec_count) {
6916                         dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6917                                 "SEC %d\n", i, vml2_mems[i], sec_count);
6918                         err_data->ce_count += sec_count;
6919                 }
6920
6921                 ded_count = REG_GET_FIELD(data, VM_L2_MEM_ECC_CNT, DED_COUNT);
6922                 if (ded_count) {
6923                         dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6924                                 "DED %d\n", i, vml2_mems[i], ded_count);
6925                         err_data->ue_count += ded_count;
6926                 }
6927         }
6928
6929         for (i = 0; i < ARRAY_SIZE(vml2_walker_mems); i++) {
6930                 WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, i);
6931                 data = RREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT);
6932
6933                 sec_count = REG_GET_FIELD(data, VM_L2_WALKER_MEM_ECC_CNT,
6934                                                 SEC_COUNT);
6935                 if (sec_count) {
6936                         dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6937                                 "SEC %d\n", i, vml2_walker_mems[i], sec_count);
6938                         err_data->ce_count += sec_count;
6939                 }
6940
6941                 ded_count = REG_GET_FIELD(data, VM_L2_WALKER_MEM_ECC_CNT,
6942                                                 DED_COUNT);
6943                 if (ded_count) {
6944                         dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6945                                 "DED %d\n", i, vml2_walker_mems[i], ded_count);
6946                         err_data->ue_count += ded_count;
6947                 }
6948         }
6949
6950         for (i = 0; i < ARRAY_SIZE(atc_l2_cache_2m_mems); i++) {
6951                 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, i);
6952                 data = RREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT);
6953
6954                 sec_count = (data & 0x00006000L) >> 0xd;
6955                 if (sec_count) {
6956                         dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6957                                 "SEC %d\n", i, atc_l2_cache_2m_mems[i],
6958                                 sec_count);
6959                         err_data->ce_count += sec_count;
6960                 }
6961         }
6962
6963         for (i = 0; i < ARRAY_SIZE(atc_l2_cache_4k_mems); i++) {
6964                 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, i);
6965                 data = RREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT);
6966
6967                 sec_count = (data & 0x00006000L) >> 0xd;
6968                 if (sec_count) {
6969                         dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6970                                 "SEC %d\n", i, atc_l2_cache_4k_mems[i],
6971                                 sec_count);
6972                         err_data->ce_count += sec_count;
6973                 }
6974
6975                 ded_count = (data & 0x00018000L) >> 0xf;
6976                 if (ded_count) {
6977                         dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6978                                 "DED %d\n", i, atc_l2_cache_4k_mems[i],
6979                                 ded_count);
6980                         err_data->ue_count += ded_count;
6981                 }
6982         }
6983
6984         WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
6985         WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
6986         WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
6987         WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
6988
6989         return 0;
6990 }
6991
6992 static int gfx_v9_0_ras_error_count(struct amdgpu_device *adev,
6993         const struct soc15_reg_entry *reg,
6994         uint32_t se_id, uint32_t inst_id, uint32_t value,
6995         uint32_t *sec_count, uint32_t *ded_count)
6996 {
6997         uint32_t i;
6998         uint32_t sec_cnt, ded_cnt;
6999
7000         for (i = 0; i < ARRAY_SIZE(gfx_v9_0_ras_fields); i++) {
7001                 if(gfx_v9_0_ras_fields[i].reg_offset != reg->reg_offset ||
7002                         gfx_v9_0_ras_fields[i].seg != reg->seg ||
7003                         gfx_v9_0_ras_fields[i].inst != reg->inst)
7004                         continue;
7005
7006                 sec_cnt = (value &
7007                                 gfx_v9_0_ras_fields[i].sec_count_mask) >>
7008                                 gfx_v9_0_ras_fields[i].sec_count_shift;
7009                 if (sec_cnt) {
7010                         dev_info(adev->dev, "GFX SubBlock %s, "
7011                                 "Instance[%d][%d], SEC %d\n",
7012                                 gfx_v9_0_ras_fields[i].name,
7013                                 se_id, inst_id,
7014                                 sec_cnt);
7015                         *sec_count += sec_cnt;
7016                 }
7017
7018                 ded_cnt = (value &
7019                                 gfx_v9_0_ras_fields[i].ded_count_mask) >>
7020                                 gfx_v9_0_ras_fields[i].ded_count_shift;
7021                 if (ded_cnt) {
7022                         dev_info(adev->dev, "GFX SubBlock %s, "
7023                                 "Instance[%d][%d], DED %d\n",
7024                                 gfx_v9_0_ras_fields[i].name,
7025                                 se_id, inst_id,
7026                                 ded_cnt);
7027                         *ded_count += ded_cnt;
7028                 }
7029         }
7030
7031         return 0;
7032 }
7033
7034 static void gfx_v9_0_reset_ras_error_count(struct amdgpu_device *adev)
7035 {
7036         int i, j, k;
7037
7038         if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
7039                 return;
7040
7041         /* read back registers to clear the counters */
7042         mutex_lock(&adev->grbm_idx_mutex);
7043         for (i = 0; i < ARRAY_SIZE(gfx_v9_0_edc_counter_regs); i++) {
7044                 for (j = 0; j < gfx_v9_0_edc_counter_regs[i].se_num; j++) {
7045                         for (k = 0; k < gfx_v9_0_edc_counter_regs[i].instance; k++) {
7046                                 amdgpu_gfx_select_se_sh(adev, j, 0x0, k, 0);
7047                                 RREG32(SOC15_REG_ENTRY_OFFSET(gfx_v9_0_edc_counter_regs[i]));
7048                         }
7049                 }
7050         }
7051         WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, 0xe0000000);
7052         mutex_unlock(&adev->grbm_idx_mutex);
7053
7054         WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
7055         WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT, 0);
7056         WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
7057         WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT, 0);
7058         WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
7059         WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT, 0);
7060         WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
7061         WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT, 0);
7062
7063         for (i = 0; i < ARRAY_SIZE(vml2_mems); i++) {
7064                 WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, i);
7065                 RREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT);
7066         }
7067
7068         for (i = 0; i < ARRAY_SIZE(vml2_walker_mems); i++) {
7069                 WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, i);
7070                 RREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT);
7071         }
7072
7073         for (i = 0; i < ARRAY_SIZE(atc_l2_cache_2m_mems); i++) {
7074                 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, i);
7075                 RREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT);
7076         }
7077
7078         for (i = 0; i < ARRAY_SIZE(atc_l2_cache_4k_mems); i++) {
7079                 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, i);
7080                 RREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT);
7081         }
7082
7083         WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
7084         WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
7085         WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
7086         WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
7087 }
7088
7089 static void gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev,
7090                                           void *ras_error_status)
7091 {
7092         struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
7093         uint32_t sec_count = 0, ded_count = 0;
7094         uint32_t i, j, k;
7095         uint32_t reg_value;
7096
7097         if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
7098                 return;
7099
7100         err_data->ue_count = 0;
7101         err_data->ce_count = 0;
7102
7103         mutex_lock(&adev->grbm_idx_mutex);
7104
7105         for (i = 0; i < ARRAY_SIZE(gfx_v9_0_edc_counter_regs); i++) {
7106                 for (j = 0; j < gfx_v9_0_edc_counter_regs[i].se_num; j++) {
7107                         for (k = 0; k < gfx_v9_0_edc_counter_regs[i].instance; k++) {
7108                                 amdgpu_gfx_select_se_sh(adev, j, 0, k, 0);
7109                                 reg_value =
7110                                         RREG32(SOC15_REG_ENTRY_OFFSET(gfx_v9_0_edc_counter_regs[i]));
7111                                 if (reg_value)
7112                                         gfx_v9_0_ras_error_count(adev,
7113                                                 &gfx_v9_0_edc_counter_regs[i],
7114                                                 j, k, reg_value,
7115                                                 &sec_count, &ded_count);
7116                         }
7117                 }
7118         }
7119
7120         err_data->ce_count += sec_count;
7121         err_data->ue_count += ded_count;
7122
7123         amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
7124         mutex_unlock(&adev->grbm_idx_mutex);
7125
7126         gfx_v9_0_query_utc_edc_status(adev, err_data);
7127 }
7128
7129 static void gfx_v9_0_emit_mem_sync(struct amdgpu_ring *ring)
7130 {
7131         const unsigned int cp_coher_cntl =
7132                         PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_SH_ICACHE_ACTION_ENA(1) |
7133                         PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_SH_KCACHE_ACTION_ENA(1) |
7134                         PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TC_ACTION_ENA(1) |
7135                         PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TCL1_ACTION_ENA(1) |
7136                         PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TC_WB_ACTION_ENA(1);
7137
7138         /* ACQUIRE_MEM -make one or more surfaces valid for use by the subsequent operations */
7139         amdgpu_ring_write(ring, PACKET3(PACKET3_ACQUIRE_MEM, 5));
7140         amdgpu_ring_write(ring, cp_coher_cntl); /* CP_COHER_CNTL */
7141         amdgpu_ring_write(ring, 0xffffffff);  /* CP_COHER_SIZE */
7142         amdgpu_ring_write(ring, 0xffffff);  /* CP_COHER_SIZE_HI */
7143         amdgpu_ring_write(ring, 0); /* CP_COHER_BASE */
7144         amdgpu_ring_write(ring, 0);  /* CP_COHER_BASE_HI */
7145         amdgpu_ring_write(ring, 0x0000000A); /* POLL_INTERVAL */
7146 }
7147
7148 static void gfx_v9_0_emit_wave_limit_cs(struct amdgpu_ring *ring,
7149                                         uint32_t pipe, bool enable)
7150 {
7151         struct amdgpu_device *adev = ring->adev;
7152         uint32_t val;
7153         uint32_t wcl_cs_reg;
7154
7155         /* mmSPI_WCL_PIPE_PERCENT_CS[0-7]_DEFAULT values are same */
7156         val = enable ? 0x1 : mmSPI_WCL_PIPE_PERCENT_CS0_DEFAULT;
7157
7158         switch (pipe) {
7159         case 0:
7160                 wcl_cs_reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_CS0);
7161                 break;
7162         case 1:
7163                 wcl_cs_reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_CS1);
7164                 break;
7165         case 2:
7166                 wcl_cs_reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_CS2);
7167                 break;
7168         case 3:
7169                 wcl_cs_reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_CS3);
7170                 break;
7171         default:
7172                 DRM_DEBUG("invalid pipe %d\n", pipe);
7173                 return;
7174         }
7175
7176         amdgpu_ring_emit_wreg(ring, wcl_cs_reg, val);
7177
7178 }
7179 static void gfx_v9_0_emit_wave_limit(struct amdgpu_ring *ring, bool enable)
7180 {
7181         struct amdgpu_device *adev = ring->adev;
7182         uint32_t val;
7183         int i;
7184
7185
7186         /* mmSPI_WCL_PIPE_PERCENT_GFX is 7 bit multiplier register to limit
7187          * number of gfx waves. Setting 5 bit will make sure gfx only gets
7188          * around 25% of gpu resources.
7189          */
7190         val = enable ? 0x1f : mmSPI_WCL_PIPE_PERCENT_GFX_DEFAULT;
7191         amdgpu_ring_emit_wreg(ring,
7192                               SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_GFX),
7193                               val);
7194
7195         /* Restrict waves for normal/low priority compute queues as well
7196          * to get best QoS for high priority compute jobs.
7197          *
7198          * amdgpu controls only 1st ME(0-3 CS pipes).
7199          */
7200         for (i = 0; i < adev->gfx.mec.num_pipe_per_mec; i++) {
7201                 if (i != ring->pipe)
7202                         gfx_v9_0_emit_wave_limit_cs(ring, i, enable);
7203
7204         }
7205 }
7206
7207 static void gfx_v9_ring_insert_nop(struct amdgpu_ring *ring, uint32_t num_nop)
7208 {
7209         /* Header itself is a NOP packet */
7210         if (num_nop == 1) {
7211                 amdgpu_ring_write(ring, ring->funcs->nop);
7212                 return;
7213         }
7214
7215         /* Max HW optimization till 0x3ffe, followed by remaining one NOP at a time*/
7216         amdgpu_ring_write(ring, PACKET3(PACKET3_NOP, min(num_nop - 2, 0x3ffe)));
7217
7218         /* Header is at index 0, followed by num_nops - 1 NOP packet's */
7219         amdgpu_ring_insert_nop(ring, num_nop - 1);
7220 }
7221
7222 static int gfx_v9_0_reset_kgq(struct amdgpu_ring *ring, unsigned int vmid)
7223 {
7224         struct amdgpu_device *adev = ring->adev;
7225         struct amdgpu_kiq *kiq = &adev->gfx.kiq[0];
7226         struct amdgpu_ring *kiq_ring = &kiq->ring;
7227         unsigned long flags;
7228         u32 tmp;
7229         int r;
7230
7231         if (amdgpu_sriov_vf(adev))
7232                 return -EINVAL;
7233
7234         if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues)
7235                 return -EINVAL;
7236
7237         spin_lock_irqsave(&kiq->ring_lock, flags);
7238
7239         if (amdgpu_ring_alloc(kiq_ring, 5)) {
7240                 spin_unlock_irqrestore(&kiq->ring_lock, flags);
7241                 return -ENOMEM;
7242         }
7243
7244         tmp = REG_SET_FIELD(0, CP_VMID_RESET, RESET_REQUEST, 1 << vmid);
7245         gfx_v9_0_ring_emit_wreg(kiq_ring,
7246                                  SOC15_REG_OFFSET(GC, 0, mmCP_VMID_RESET), tmp);
7247         amdgpu_ring_commit(kiq_ring);
7248
7249         spin_unlock_irqrestore(&kiq->ring_lock, flags);
7250
7251         r = amdgpu_ring_test_ring(kiq_ring);
7252         if (r)
7253                 return r;
7254
7255         if (amdgpu_ring_alloc(ring, 7 + 7 + 5))
7256                 return -ENOMEM;
7257         gfx_v9_0_ring_emit_fence(ring, ring->fence_drv.gpu_addr,
7258                                  ring->fence_drv.sync_seq, AMDGPU_FENCE_FLAG_EXEC);
7259         gfx_v9_0_ring_emit_reg_wait(ring,
7260                                     SOC15_REG_OFFSET(GC, 0, mmCP_VMID_RESET), 0, 0xffff);
7261         gfx_v9_0_ring_emit_wreg(ring,
7262                                 SOC15_REG_OFFSET(GC, 0, mmCP_VMID_RESET), 0);
7263
7264         return amdgpu_ring_test_ring(ring);
7265 }
7266
7267 static int gfx_v9_0_reset_kcq(struct amdgpu_ring *ring,
7268                               unsigned int vmid)
7269 {
7270         struct amdgpu_device *adev = ring->adev;
7271         struct amdgpu_kiq *kiq = &adev->gfx.kiq[0];
7272         struct amdgpu_ring *kiq_ring = &kiq->ring;
7273         unsigned long flags;
7274         int i, r;
7275
7276         if (amdgpu_sriov_vf(adev))
7277                 return -EINVAL;
7278
7279         if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues)
7280                 return -EINVAL;
7281
7282         spin_lock_irqsave(&kiq->ring_lock, flags);
7283
7284         if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size)) {
7285                 spin_unlock_irqrestore(&kiq->ring_lock, flags);
7286                 return -ENOMEM;
7287         }
7288
7289         kiq->pmf->kiq_unmap_queues(kiq_ring, ring, RESET_QUEUES,
7290                                    0, 0);
7291         amdgpu_ring_commit(kiq_ring);
7292
7293         spin_unlock_irqrestore(&kiq->ring_lock, flags);
7294
7295         r = amdgpu_ring_test_ring(kiq_ring);
7296         if (r)
7297                 return r;
7298
7299         /* make sure dequeue is complete*/
7300         amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
7301         mutex_lock(&adev->srbm_mutex);
7302         soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0, 0);
7303         for (i = 0; i < adev->usec_timeout; i++) {
7304                 if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
7305                         break;
7306                 udelay(1);
7307         }
7308         if (i >= adev->usec_timeout)
7309                 r = -ETIMEDOUT;
7310         soc15_grbm_select(adev, 0, 0, 0, 0, 0);
7311         mutex_unlock(&adev->srbm_mutex);
7312         amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
7313         if (r) {
7314                 dev_err(adev->dev, "fail to wait on hqd deactive\n");
7315                 return r;
7316         }
7317
7318         r = amdgpu_bo_reserve(ring->mqd_obj, false);
7319         if (unlikely(r != 0)){
7320                 dev_err(adev->dev, "fail to resv mqd_obj\n");
7321                 return r;
7322         }
7323         r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
7324         if (!r) {
7325                 r = gfx_v9_0_kcq_init_queue(ring, true);
7326                 amdgpu_bo_kunmap(ring->mqd_obj);
7327                 ring->mqd_ptr = NULL;
7328         }
7329         amdgpu_bo_unreserve(ring->mqd_obj);
7330         if (r) {
7331                 dev_err(adev->dev, "fail to unresv mqd_obj\n");
7332                 return r;
7333         }
7334         spin_lock_irqsave(&kiq->ring_lock, flags);
7335         r = amdgpu_ring_alloc(kiq_ring, kiq->pmf->map_queues_size);
7336         if (r) {
7337                 spin_unlock_irqrestore(&kiq->ring_lock, flags);
7338                 return -ENOMEM;
7339         }
7340         kiq->pmf->kiq_map_queues(kiq_ring, ring);
7341         amdgpu_ring_commit(kiq_ring);
7342         spin_unlock_irqrestore(&kiq->ring_lock, flags);
7343         r = amdgpu_ring_test_ring(kiq_ring);
7344         if (r) {
7345                 DRM_ERROR("fail to remap queue\n");
7346                 return r;
7347         }
7348         return amdgpu_ring_test_ring(ring);
7349 }
7350
7351 static void gfx_v9_ip_print(struct amdgpu_ip_block *ip_block, struct drm_printer *p)
7352 {
7353         struct amdgpu_device *adev = ip_block->adev;
7354         uint32_t i, j, k, reg, index = 0;
7355         uint32_t reg_count = ARRAY_SIZE(gc_reg_list_9);
7356
7357         if (!adev->gfx.ip_dump_core)
7358                 return;
7359
7360         for (i = 0; i < reg_count; i++)
7361                 drm_printf(p, "%-50s \t 0x%08x\n",
7362                            gc_reg_list_9[i].reg_name,
7363                            adev->gfx.ip_dump_core[i]);
7364
7365         /* print compute queue registers for all instances */
7366         if (!adev->gfx.ip_dump_compute_queues)
7367                 return;
7368
7369         reg_count = ARRAY_SIZE(gc_cp_reg_list_9);
7370         drm_printf(p, "\nnum_mec: %d num_pipe: %d num_queue: %d\n",
7371                    adev->gfx.mec.num_mec,
7372                    adev->gfx.mec.num_pipe_per_mec,
7373                    adev->gfx.mec.num_queue_per_pipe);
7374
7375         for (i = 0; i < adev->gfx.mec.num_mec; i++) {
7376                 for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) {
7377                         for (k = 0; k < adev->gfx.mec.num_queue_per_pipe; k++) {
7378                                 drm_printf(p, "\nmec %d, pipe %d, queue %d\n", i, j, k);
7379                                 for (reg = 0; reg < reg_count; reg++) {
7380                                         drm_printf(p, "%-50s \t 0x%08x\n",
7381                                                    gc_cp_reg_list_9[reg].reg_name,
7382                                                    adev->gfx.ip_dump_compute_queues[index + reg]);
7383                                 }
7384                                 index += reg_count;
7385                         }
7386                 }
7387         }
7388
7389 }
7390
7391 static void gfx_v9_ip_dump(struct amdgpu_ip_block *ip_block)
7392 {
7393         struct amdgpu_device *adev = ip_block->adev;
7394         uint32_t i, j, k, reg, index = 0;
7395         uint32_t reg_count = ARRAY_SIZE(gc_reg_list_9);
7396
7397         if (!adev->gfx.ip_dump_core || !adev->gfx.num_gfx_rings)
7398                 return;
7399
7400         amdgpu_gfx_off_ctrl(adev, false);
7401         for (i = 0; i < reg_count; i++)
7402                 adev->gfx.ip_dump_core[i] = RREG32(SOC15_REG_ENTRY_OFFSET(gc_reg_list_9[i]));
7403         amdgpu_gfx_off_ctrl(adev, true);
7404
7405         /* dump compute queue registers for all instances */
7406         if (!adev->gfx.ip_dump_compute_queues)
7407                 return;
7408
7409         reg_count = ARRAY_SIZE(gc_cp_reg_list_9);
7410         amdgpu_gfx_off_ctrl(adev, false);
7411         mutex_lock(&adev->srbm_mutex);
7412         for (i = 0; i < adev->gfx.mec.num_mec; i++) {
7413                 for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) {
7414                         for (k = 0; k < adev->gfx.mec.num_queue_per_pipe; k++) {
7415                                 /* ME0 is for GFX so start from 1 for CP */
7416                                 soc15_grbm_select(adev, 1 + i, j, k, 0, 0);
7417
7418                                 for (reg = 0; reg < reg_count; reg++) {
7419                                         adev->gfx.ip_dump_compute_queues[index + reg] =
7420                                                 RREG32(SOC15_REG_ENTRY_OFFSET(
7421                                                         gc_cp_reg_list_9[reg]));
7422                                 }
7423                                 index += reg_count;
7424                         }
7425                 }
7426         }
7427         soc15_grbm_select(adev, 0, 0, 0, 0, 0);
7428         mutex_unlock(&adev->srbm_mutex);
7429         amdgpu_gfx_off_ctrl(adev, true);
7430
7431 }
7432
7433 static void gfx_v9_0_ring_emit_cleaner_shader(struct amdgpu_ring *ring)
7434 {
7435         /* Emit the cleaner shader */
7436         amdgpu_ring_write(ring, PACKET3(PACKET3_RUN_CLEANER_SHADER, 0));
7437         amdgpu_ring_write(ring, 0);  /* RESERVED field, programmed to zero */
7438 }
7439
7440 static const struct amd_ip_funcs gfx_v9_0_ip_funcs = {
7441         .name = "gfx_v9_0",
7442         .early_init = gfx_v9_0_early_init,
7443         .late_init = gfx_v9_0_late_init,
7444         .sw_init = gfx_v9_0_sw_init,
7445         .sw_fini = gfx_v9_0_sw_fini,
7446         .hw_init = gfx_v9_0_hw_init,
7447         .hw_fini = gfx_v9_0_hw_fini,
7448         .suspend = gfx_v9_0_suspend,
7449         .resume = gfx_v9_0_resume,
7450         .is_idle = gfx_v9_0_is_idle,
7451         .wait_for_idle = gfx_v9_0_wait_for_idle,
7452         .soft_reset = gfx_v9_0_soft_reset,
7453         .set_clockgating_state = gfx_v9_0_set_clockgating_state,
7454         .set_powergating_state = gfx_v9_0_set_powergating_state,
7455         .get_clockgating_state = gfx_v9_0_get_clockgating_state,
7456         .dump_ip_state = gfx_v9_ip_dump,
7457         .print_ip_state = gfx_v9_ip_print,
7458 };
7459
7460 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = {
7461         .type = AMDGPU_RING_TYPE_GFX,
7462         .align_mask = 0xff,
7463         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
7464         .support_64bit_ptrs = true,
7465         .secure_submission_supported = true,
7466         .get_rptr = gfx_v9_0_ring_get_rptr_gfx,
7467         .get_wptr = gfx_v9_0_ring_get_wptr_gfx,
7468         .set_wptr = gfx_v9_0_ring_set_wptr_gfx,
7469         .emit_frame_size = /* totally 242 maximum if 16 IBs */
7470                 5 +  /* COND_EXEC */
7471                 7 +  /* PIPELINE_SYNC */
7472                 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
7473                 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
7474                 2 + /* VM_FLUSH */
7475                 8 +  /* FENCE for VM_FLUSH */
7476                 20 + /* GDS switch */
7477                 4 + /* double SWITCH_BUFFER,
7478                        the first COND_EXEC jump to the place just
7479                            prior to this double SWITCH_BUFFER  */
7480                 5 + /* COND_EXEC */
7481                 7 +      /*     HDP_flush */
7482                 4 +      /*     VGT_flush */
7483                 14 + /* CE_META */
7484                 31 + /* DE_META */
7485                 3 + /* CNTX_CTRL */
7486                 5 + /* HDP_INVL */
7487                 8 + 8 + /* FENCE x2 */
7488                 2 + /* SWITCH_BUFFER */
7489                 7 + /* gfx_v9_0_emit_mem_sync */
7490                 2, /* gfx_v9_0_ring_emit_cleaner_shader */
7491         .emit_ib_size = 4, /* gfx_v9_0_ring_emit_ib_gfx */
7492         .emit_ib = gfx_v9_0_ring_emit_ib_gfx,
7493         .emit_fence = gfx_v9_0_ring_emit_fence,
7494         .emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
7495         .emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
7496         .emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
7497         .emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
7498         .test_ring = gfx_v9_0_ring_test_ring,
7499         .insert_nop = gfx_v9_ring_insert_nop,
7500         .pad_ib = amdgpu_ring_generic_pad_ib,
7501         .emit_switch_buffer = gfx_v9_ring_emit_sb,
7502         .emit_cntxcntl = gfx_v9_ring_emit_cntxcntl,
7503         .init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec,
7504         .preempt_ib = gfx_v9_0_ring_preempt_ib,
7505         .emit_frame_cntl = gfx_v9_0_ring_emit_frame_cntl,
7506         .emit_wreg = gfx_v9_0_ring_emit_wreg,
7507         .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
7508         .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
7509         .soft_recovery = gfx_v9_0_ring_soft_recovery,
7510         .emit_mem_sync = gfx_v9_0_emit_mem_sync,
7511         .reset = gfx_v9_0_reset_kgq,
7512         .emit_cleaner_shader = gfx_v9_0_ring_emit_cleaner_shader,
7513         .begin_use = amdgpu_gfx_enforce_isolation_ring_begin_use,
7514         .end_use = amdgpu_gfx_enforce_isolation_ring_end_use,
7515 };
7516
7517 static const struct amdgpu_ring_funcs gfx_v9_0_sw_ring_funcs_gfx = {
7518         .type = AMDGPU_RING_TYPE_GFX,
7519         .align_mask = 0xff,
7520         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
7521         .support_64bit_ptrs = true,
7522         .secure_submission_supported = true,
7523         .get_rptr = amdgpu_sw_ring_get_rptr_gfx,
7524         .get_wptr = amdgpu_sw_ring_get_wptr_gfx,
7525         .set_wptr = amdgpu_sw_ring_set_wptr_gfx,
7526         .emit_frame_size = /* totally 242 maximum if 16 IBs */
7527                 5 +  /* COND_EXEC */
7528                 7 +  /* PIPELINE_SYNC */
7529                 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
7530                 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
7531                 2 + /* VM_FLUSH */
7532                 8 +  /* FENCE for VM_FLUSH */
7533                 20 + /* GDS switch */
7534                 4 + /* double SWITCH_BUFFER,
7535                      * the first COND_EXEC jump to the place just
7536                      * prior to this double SWITCH_BUFFER
7537                      */
7538                 5 + /* COND_EXEC */
7539                 7 +      /*     HDP_flush */
7540                 4 +      /*     VGT_flush */
7541                 14 + /* CE_META */
7542                 31 + /* DE_META */
7543                 3 + /* CNTX_CTRL */
7544                 5 + /* HDP_INVL */
7545                 8 + 8 + /* FENCE x2 */
7546                 2 + /* SWITCH_BUFFER */
7547                 7 + /* gfx_v9_0_emit_mem_sync */
7548                 2, /* gfx_v9_0_ring_emit_cleaner_shader */
7549         .emit_ib_size = 4, /* gfx_v9_0_ring_emit_ib_gfx */
7550         .emit_ib = gfx_v9_0_ring_emit_ib_gfx,
7551         .emit_fence = gfx_v9_0_ring_emit_fence,
7552         .emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
7553         .emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
7554         .emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
7555         .emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
7556         .test_ring = gfx_v9_0_ring_test_ring,
7557         .test_ib = gfx_v9_0_ring_test_ib,
7558         .insert_nop = gfx_v9_ring_insert_nop,
7559         .pad_ib = amdgpu_ring_generic_pad_ib,
7560         .emit_switch_buffer = gfx_v9_ring_emit_sb,
7561         .emit_cntxcntl = gfx_v9_ring_emit_cntxcntl,
7562         .init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec,
7563         .emit_frame_cntl = gfx_v9_0_ring_emit_frame_cntl,
7564         .emit_wreg = gfx_v9_0_ring_emit_wreg,
7565         .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
7566         .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
7567         .soft_recovery = gfx_v9_0_ring_soft_recovery,
7568         .emit_mem_sync = gfx_v9_0_emit_mem_sync,
7569         .patch_cntl = gfx_v9_0_ring_patch_cntl,
7570         .patch_de = gfx_v9_0_ring_patch_de_meta,
7571         .patch_ce = gfx_v9_0_ring_patch_ce_meta,
7572         .emit_cleaner_shader = gfx_v9_0_ring_emit_cleaner_shader,
7573         .begin_use = amdgpu_gfx_enforce_isolation_ring_begin_use,
7574         .end_use = amdgpu_gfx_enforce_isolation_ring_end_use,
7575 };
7576
7577 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = {
7578         .type = AMDGPU_RING_TYPE_COMPUTE,
7579         .align_mask = 0xff,
7580         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
7581         .support_64bit_ptrs = true,
7582         .get_rptr = gfx_v9_0_ring_get_rptr_compute,
7583         .get_wptr = gfx_v9_0_ring_get_wptr_compute,
7584         .set_wptr = gfx_v9_0_ring_set_wptr_compute,
7585         .emit_frame_size =
7586                 20 + /* gfx_v9_0_ring_emit_gds_switch */
7587                 7 + /* gfx_v9_0_ring_emit_hdp_flush */
7588                 5 + /* hdp invalidate */
7589                 7 + /* gfx_v9_0_ring_emit_pipeline_sync */
7590                 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
7591                 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
7592                 8 + 8 + 8 + /* gfx_v9_0_ring_emit_fence x3 for user fence, vm fence */
7593                 7 + /* gfx_v9_0_emit_mem_sync */
7594                 5 + /* gfx_v9_0_emit_wave_limit for updating mmSPI_WCL_PIPE_PERCENT_GFX register */
7595                 15 + /* for updating 3 mmSPI_WCL_PIPE_PERCENT_CS registers */
7596                 2, /* gfx_v9_0_ring_emit_cleaner_shader */
7597         .emit_ib_size = 7, /* gfx_v9_0_ring_emit_ib_compute */
7598         .emit_ib = gfx_v9_0_ring_emit_ib_compute,
7599         .emit_fence = gfx_v9_0_ring_emit_fence,
7600         .emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
7601         .emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
7602         .emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
7603         .emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
7604         .test_ring = gfx_v9_0_ring_test_ring,
7605         .test_ib = gfx_v9_0_ring_test_ib,
7606         .insert_nop = gfx_v9_ring_insert_nop,
7607         .pad_ib = amdgpu_ring_generic_pad_ib,
7608         .emit_wreg = gfx_v9_0_ring_emit_wreg,
7609         .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
7610         .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
7611         .soft_recovery = gfx_v9_0_ring_soft_recovery,
7612         .emit_mem_sync = gfx_v9_0_emit_mem_sync,
7613         .emit_wave_limit = gfx_v9_0_emit_wave_limit,
7614         .reset = gfx_v9_0_reset_kcq,
7615         .emit_cleaner_shader = gfx_v9_0_ring_emit_cleaner_shader,
7616         .begin_use = amdgpu_gfx_enforce_isolation_ring_begin_use,
7617         .end_use = amdgpu_gfx_enforce_isolation_ring_end_use,
7618 };
7619
7620 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = {
7621         .type = AMDGPU_RING_TYPE_KIQ,
7622         .align_mask = 0xff,
7623         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
7624         .support_64bit_ptrs = true,
7625         .get_rptr = gfx_v9_0_ring_get_rptr_compute,
7626         .get_wptr = gfx_v9_0_ring_get_wptr_compute,
7627         .set_wptr = gfx_v9_0_ring_set_wptr_compute,
7628         .emit_frame_size =
7629                 20 + /* gfx_v9_0_ring_emit_gds_switch */
7630                 7 + /* gfx_v9_0_ring_emit_hdp_flush */
7631                 5 + /* hdp invalidate */
7632                 7 + /* gfx_v9_0_ring_emit_pipeline_sync */
7633                 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
7634                 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
7635                 8 + 8 + 8, /* gfx_v9_0_ring_emit_fence_kiq x3 for user fence, vm fence */
7636         .emit_ib_size = 7, /* gfx_v9_0_ring_emit_ib_compute */
7637         .emit_fence = gfx_v9_0_ring_emit_fence_kiq,
7638         .test_ring = gfx_v9_0_ring_test_ring,
7639         .insert_nop = amdgpu_ring_insert_nop,
7640         .pad_ib = amdgpu_ring_generic_pad_ib,
7641         .emit_rreg = gfx_v9_0_ring_emit_rreg,
7642         .emit_wreg = gfx_v9_0_ring_emit_wreg,
7643         .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
7644         .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
7645 };
7646
7647 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev)
7648 {
7649         int i;
7650
7651         adev->gfx.kiq[0].ring.funcs = &gfx_v9_0_ring_funcs_kiq;
7652
7653         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
7654                 adev->gfx.gfx_ring[i].funcs = &gfx_v9_0_ring_funcs_gfx;
7655
7656         if (adev->gfx.mcbp && adev->gfx.num_gfx_rings) {
7657                 for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++)
7658                         adev->gfx.sw_gfx_ring[i].funcs = &gfx_v9_0_sw_ring_funcs_gfx;
7659         }
7660
7661         for (i = 0; i < adev->gfx.num_compute_rings; i++)
7662                 adev->gfx.compute_ring[i].funcs = &gfx_v9_0_ring_funcs_compute;
7663 }
7664
7665 static const struct amdgpu_irq_src_funcs gfx_v9_0_eop_irq_funcs = {
7666         .set = gfx_v9_0_set_eop_interrupt_state,
7667         .process = gfx_v9_0_eop_irq,
7668 };
7669
7670 static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_reg_irq_funcs = {
7671         .set = gfx_v9_0_set_priv_reg_fault_state,
7672         .process = gfx_v9_0_priv_reg_irq,
7673 };
7674
7675 static const struct amdgpu_irq_src_funcs gfx_v9_0_bad_op_irq_funcs = {
7676         .set = gfx_v9_0_set_bad_op_fault_state,
7677         .process = gfx_v9_0_bad_op_irq,
7678 };
7679
7680 static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_inst_irq_funcs = {
7681         .set = gfx_v9_0_set_priv_inst_fault_state,
7682         .process = gfx_v9_0_priv_inst_irq,
7683 };
7684
7685 static const struct amdgpu_irq_src_funcs gfx_v9_0_cp_ecc_error_irq_funcs = {
7686         .set = gfx_v9_0_set_cp_ecc_error_state,
7687         .process = amdgpu_gfx_cp_ecc_error_irq,
7688 };
7689
7690
7691 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev)
7692 {
7693         adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
7694         adev->gfx.eop_irq.funcs = &gfx_v9_0_eop_irq_funcs;
7695
7696         adev->gfx.priv_reg_irq.num_types = 1;
7697         adev->gfx.priv_reg_irq.funcs = &gfx_v9_0_priv_reg_irq_funcs;
7698
7699         adev->gfx.bad_op_irq.num_types = 1;
7700         adev->gfx.bad_op_irq.funcs = &gfx_v9_0_bad_op_irq_funcs;
7701
7702         adev->gfx.priv_inst_irq.num_types = 1;
7703         adev->gfx.priv_inst_irq.funcs = &gfx_v9_0_priv_inst_irq_funcs;
7704
7705         adev->gfx.cp_ecc_error_irq.num_types = 2; /*C5 ECC error and C9 FUE error*/
7706         adev->gfx.cp_ecc_error_irq.funcs = &gfx_v9_0_cp_ecc_error_irq_funcs;
7707 }
7708
7709 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev)
7710 {
7711         switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
7712         case IP_VERSION(9, 0, 1):
7713         case IP_VERSION(9, 2, 1):
7714         case IP_VERSION(9, 4, 0):
7715         case IP_VERSION(9, 2, 2):
7716         case IP_VERSION(9, 1, 0):
7717         case IP_VERSION(9, 4, 1):
7718         case IP_VERSION(9, 3, 0):
7719         case IP_VERSION(9, 4, 2):
7720                 adev->gfx.rlc.funcs = &gfx_v9_0_rlc_funcs;
7721                 break;
7722         default:
7723                 break;
7724         }
7725 }
7726
7727 static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev)
7728 {
7729         /* init asci gds info */
7730         switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
7731         case IP_VERSION(9, 0, 1):
7732         case IP_VERSION(9, 2, 1):
7733         case IP_VERSION(9, 4, 0):
7734                 adev->gds.gds_size = 0x10000;
7735                 break;
7736         case IP_VERSION(9, 2, 2):
7737         case IP_VERSION(9, 1, 0):
7738         case IP_VERSION(9, 4, 1):
7739                 adev->gds.gds_size = 0x1000;
7740                 break;
7741         case IP_VERSION(9, 4, 2):
7742                 /* aldebaran removed all the GDS internal memory,
7743                  * only support GWS opcode in kernel, like barrier
7744                  * semaphore.etc */
7745                 adev->gds.gds_size = 0;
7746                 break;
7747         default:
7748                 adev->gds.gds_size = 0x10000;
7749                 break;
7750         }
7751
7752         switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
7753         case IP_VERSION(9, 0, 1):
7754         case IP_VERSION(9, 4, 0):
7755                 adev->gds.gds_compute_max_wave_id = 0x7ff;
7756                 break;
7757         case IP_VERSION(9, 2, 1):
7758                 adev->gds.gds_compute_max_wave_id = 0x27f;
7759                 break;
7760         case IP_VERSION(9, 2, 2):
7761         case IP_VERSION(9, 1, 0):
7762                 if (adev->apu_flags & AMD_APU_IS_RAVEN2)
7763                         adev->gds.gds_compute_max_wave_id = 0x77; /* raven2 */
7764                 else
7765                         adev->gds.gds_compute_max_wave_id = 0x15f; /* raven1 */
7766                 break;
7767         case IP_VERSION(9, 4, 1):
7768                 adev->gds.gds_compute_max_wave_id = 0xfff;
7769                 break;
7770         case IP_VERSION(9, 4, 2):
7771                 /* deprecated for Aldebaran, no usage at all */
7772                 adev->gds.gds_compute_max_wave_id = 0;
7773                 break;
7774         default:
7775                 /* this really depends on the chip */
7776                 adev->gds.gds_compute_max_wave_id = 0x7ff;
7777                 break;
7778         }
7779
7780         adev->gds.gws_size = 64;
7781         adev->gds.oa_size = 16;
7782 }
7783
7784 static void gfx_v9_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
7785                                                  u32 bitmap)
7786 {
7787         u32 data;
7788
7789         if (!bitmap)
7790                 return;
7791
7792         data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
7793         data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
7794
7795         WREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG, data);
7796 }
7797
7798 static u32 gfx_v9_0_get_cu_active_bitmap(struct amdgpu_device *adev)
7799 {
7800         u32 data, mask;
7801
7802         data = RREG32_SOC15(GC, 0, mmCC_GC_SHADER_ARRAY_CONFIG);
7803         data |= RREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG);
7804
7805         data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
7806         data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
7807
7808         mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh);
7809
7810         return (~data) & mask;
7811 }
7812
7813 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
7814                                  struct amdgpu_cu_info *cu_info)
7815 {
7816         int i, j, k, counter, active_cu_number = 0;
7817         u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
7818         unsigned disable_masks[4 * 4];
7819
7820         if (!adev || !cu_info)
7821                 return -EINVAL;
7822
7823         /*
7824          * 16 comes from bitmap array size 4*4, and it can cover all gfx9 ASICs
7825          */
7826         if (adev->gfx.config.max_shader_engines *
7827                 adev->gfx.config.max_sh_per_se > 16)
7828                 return -EINVAL;
7829
7830         amdgpu_gfx_parse_disable_cu(disable_masks,
7831                                     adev->gfx.config.max_shader_engines,
7832                                     adev->gfx.config.max_sh_per_se);
7833
7834         mutex_lock(&adev->grbm_idx_mutex);
7835         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
7836                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
7837                         mask = 1;
7838                         ao_bitmap = 0;
7839                         counter = 0;
7840                         amdgpu_gfx_select_se_sh(adev, i, j, 0xffffffff, 0);
7841                         gfx_v9_0_set_user_cu_inactive_bitmap(
7842                                 adev, disable_masks[i * adev->gfx.config.max_sh_per_se + j]);
7843                         bitmap = gfx_v9_0_get_cu_active_bitmap(adev);
7844
7845                         /*
7846                          * The bitmap(and ao_cu_bitmap) in cu_info structure is
7847                          * 4x4 size array, and it's usually suitable for Vega
7848                          * ASICs which has 4*2 SE/SH layout.
7849                          * But for Arcturus, SE/SH layout is changed to 8*1.
7850                          * To mostly reduce the impact, we make it compatible
7851                          * with current bitmap array as below:
7852                          *    SE4,SH0 --> bitmap[0][1]
7853                          *    SE5,SH0 --> bitmap[1][1]
7854                          *    SE6,SH0 --> bitmap[2][1]
7855                          *    SE7,SH0 --> bitmap[3][1]
7856                          */
7857                         cu_info->bitmap[0][i % 4][j + i / 4] = bitmap;
7858
7859                         for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
7860                                 if (bitmap & mask) {
7861                                         if (counter < adev->gfx.config.max_cu_per_sh)
7862                                                 ao_bitmap |= mask;
7863                                         counter ++;
7864                                 }
7865                                 mask <<= 1;
7866                         }
7867                         active_cu_number += counter;
7868                         if (i < 2 && j < 2)
7869                                 ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
7870                         cu_info->ao_cu_bitmap[i % 4][j + i / 4] = ao_bitmap;
7871                 }
7872         }
7873         amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
7874         mutex_unlock(&adev->grbm_idx_mutex);
7875
7876         cu_info->number = active_cu_number;
7877         cu_info->ao_cu_mask = ao_cu_mask;
7878         cu_info->simd_per_cu = NUM_SIMD_PER_CU;
7879
7880         return 0;
7881 }
7882
7883 const struct amdgpu_ip_block_version gfx_v9_0_ip_block =
7884 {
7885         .type = AMD_IP_BLOCK_TYPE_GFX,
7886         .major = 9,
7887         .minor = 0,
7888         .rev = 0,
7889         .funcs = &gfx_v9_0_ip_funcs,
7890 };
This page took 0.507754 seconds and 4 git commands to generate.