]> Git Repo - linux.git/blob - drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
drm/amdgpu: drop the amdgpu_device argument from amdgpu_ib_free
[linux.git] / drivers / gpu / drm / amd / amdgpu / gfx_v9_0.c
1 /*
2  * Copyright 2016 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23
24 #include <linux/delay.h>
25 #include <linux/kernel.h>
26 #include <linux/firmware.h>
27 #include <linux/module.h>
28 #include <linux/pci.h>
29
30 #include "amdgpu.h"
31 #include "amdgpu_gfx.h"
32 #include "soc15.h"
33 #include "soc15d.h"
34 #include "amdgpu_atomfirmware.h"
35 #include "amdgpu_pm.h"
36
37 #include "gc/gc_9_0_offset.h"
38 #include "gc/gc_9_0_sh_mask.h"
39
40 #include "vega10_enum.h"
41
42 #include "soc15_common.h"
43 #include "clearstate_gfx9.h"
44 #include "v9_structs.h"
45
46 #include "ivsrcid/gfx/irqsrcs_gfx_9_0.h"
47
48 #include "amdgpu_ras.h"
49
50 #include "amdgpu_ring_mux.h"
51 #include "gfx_v9_4.h"
52 #include "gfx_v9_0.h"
53 #include "gfx_v9_0_cleaner_shader.h"
54 #include "gfx_v9_4_2.h"
55
56 #include "asic_reg/pwr/pwr_10_0_offset.h"
57 #include "asic_reg/pwr/pwr_10_0_sh_mask.h"
58 #include "asic_reg/gc/gc_9_0_default.h"
59
60 #define GFX9_NUM_GFX_RINGS     1
61 #define GFX9_NUM_SW_GFX_RINGS  2
62 #define GFX9_MEC_HPD_SIZE 4096
63 #define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L
64 #define RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET 0x00000000L
65
66 #define mmGCEA_PROBE_MAP                        0x070c
67 #define mmGCEA_PROBE_MAP_BASE_IDX               0
68
69 MODULE_FIRMWARE("amdgpu/vega10_ce.bin");
70 MODULE_FIRMWARE("amdgpu/vega10_pfp.bin");
71 MODULE_FIRMWARE("amdgpu/vega10_me.bin");
72 MODULE_FIRMWARE("amdgpu/vega10_mec.bin");
73 MODULE_FIRMWARE("amdgpu/vega10_mec2.bin");
74 MODULE_FIRMWARE("amdgpu/vega10_rlc.bin");
75
76 MODULE_FIRMWARE("amdgpu/vega12_ce.bin");
77 MODULE_FIRMWARE("amdgpu/vega12_pfp.bin");
78 MODULE_FIRMWARE("amdgpu/vega12_me.bin");
79 MODULE_FIRMWARE("amdgpu/vega12_mec.bin");
80 MODULE_FIRMWARE("amdgpu/vega12_mec2.bin");
81 MODULE_FIRMWARE("amdgpu/vega12_rlc.bin");
82
83 MODULE_FIRMWARE("amdgpu/vega20_ce.bin");
84 MODULE_FIRMWARE("amdgpu/vega20_pfp.bin");
85 MODULE_FIRMWARE("amdgpu/vega20_me.bin");
86 MODULE_FIRMWARE("amdgpu/vega20_mec.bin");
87 MODULE_FIRMWARE("amdgpu/vega20_mec2.bin");
88 MODULE_FIRMWARE("amdgpu/vega20_rlc.bin");
89
90 MODULE_FIRMWARE("amdgpu/raven_ce.bin");
91 MODULE_FIRMWARE("amdgpu/raven_pfp.bin");
92 MODULE_FIRMWARE("amdgpu/raven_me.bin");
93 MODULE_FIRMWARE("amdgpu/raven_mec.bin");
94 MODULE_FIRMWARE("amdgpu/raven_mec2.bin");
95 MODULE_FIRMWARE("amdgpu/raven_rlc.bin");
96
97 MODULE_FIRMWARE("amdgpu/picasso_ce.bin");
98 MODULE_FIRMWARE("amdgpu/picasso_pfp.bin");
99 MODULE_FIRMWARE("amdgpu/picasso_me.bin");
100 MODULE_FIRMWARE("amdgpu/picasso_mec.bin");
101 MODULE_FIRMWARE("amdgpu/picasso_mec2.bin");
102 MODULE_FIRMWARE("amdgpu/picasso_rlc.bin");
103 MODULE_FIRMWARE("amdgpu/picasso_rlc_am4.bin");
104
105 MODULE_FIRMWARE("amdgpu/raven2_ce.bin");
106 MODULE_FIRMWARE("amdgpu/raven2_pfp.bin");
107 MODULE_FIRMWARE("amdgpu/raven2_me.bin");
108 MODULE_FIRMWARE("amdgpu/raven2_mec.bin");
109 MODULE_FIRMWARE("amdgpu/raven2_mec2.bin");
110 MODULE_FIRMWARE("amdgpu/raven2_rlc.bin");
111 MODULE_FIRMWARE("amdgpu/raven_kicker_rlc.bin");
112
113 MODULE_FIRMWARE("amdgpu/arcturus_mec.bin");
114 MODULE_FIRMWARE("amdgpu/arcturus_rlc.bin");
115
116 MODULE_FIRMWARE("amdgpu/renoir_ce.bin");
117 MODULE_FIRMWARE("amdgpu/renoir_pfp.bin");
118 MODULE_FIRMWARE("amdgpu/renoir_me.bin");
119 MODULE_FIRMWARE("amdgpu/renoir_mec.bin");
120 MODULE_FIRMWARE("amdgpu/renoir_rlc.bin");
121
122 MODULE_FIRMWARE("amdgpu/green_sardine_ce.bin");
123 MODULE_FIRMWARE("amdgpu/green_sardine_pfp.bin");
124 MODULE_FIRMWARE("amdgpu/green_sardine_me.bin");
125 MODULE_FIRMWARE("amdgpu/green_sardine_mec.bin");
126 MODULE_FIRMWARE("amdgpu/green_sardine_mec2.bin");
127 MODULE_FIRMWARE("amdgpu/green_sardine_rlc.bin");
128
129 MODULE_FIRMWARE("amdgpu/aldebaran_mec.bin");
130 MODULE_FIRMWARE("amdgpu/aldebaran_mec2.bin");
131 MODULE_FIRMWARE("amdgpu/aldebaran_rlc.bin");
132 MODULE_FIRMWARE("amdgpu/aldebaran_sjt_mec.bin");
133 MODULE_FIRMWARE("amdgpu/aldebaran_sjt_mec2.bin");
134
135 #define mmTCP_CHAN_STEER_0_ARCT                                                         0x0b03
136 #define mmTCP_CHAN_STEER_0_ARCT_BASE_IDX                                                        0
137 #define mmTCP_CHAN_STEER_1_ARCT                                                         0x0b04
138 #define mmTCP_CHAN_STEER_1_ARCT_BASE_IDX                                                        0
139 #define mmTCP_CHAN_STEER_2_ARCT                                                         0x0b09
140 #define mmTCP_CHAN_STEER_2_ARCT_BASE_IDX                                                        0
141 #define mmTCP_CHAN_STEER_3_ARCT                                                         0x0b0a
142 #define mmTCP_CHAN_STEER_3_ARCT_BASE_IDX                                                        0
143 #define mmTCP_CHAN_STEER_4_ARCT                                                         0x0b0b
144 #define mmTCP_CHAN_STEER_4_ARCT_BASE_IDX                                                        0
145 #define mmTCP_CHAN_STEER_5_ARCT                                                         0x0b0c
146 #define mmTCP_CHAN_STEER_5_ARCT_BASE_IDX                                                        0
147
148 #define mmGOLDEN_TSC_COUNT_UPPER_Renoir                0x0025
149 #define mmGOLDEN_TSC_COUNT_UPPER_Renoir_BASE_IDX       1
150 #define mmGOLDEN_TSC_COUNT_LOWER_Renoir                0x0026
151 #define mmGOLDEN_TSC_COUNT_LOWER_Renoir_BASE_IDX       1
152
153 static const struct amdgpu_hwip_reg_entry gc_reg_list_9[] = {
154         SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS),
155         SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS2),
156         SOC15_REG_ENTRY_STR(GC, 0, mmCP_STALLED_STAT1),
157         SOC15_REG_ENTRY_STR(GC, 0, mmCP_STALLED_STAT2),
158         SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPC_STALLED_STAT1),
159         SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPF_STALLED_STAT1),
160         SOC15_REG_ENTRY_STR(GC, 0, mmCP_BUSY_STAT),
161         SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPC_BUSY_STAT),
162         SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPF_BUSY_STAT),
163         SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPF_STATUS),
164         SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_ERROR),
165         SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB_BASE),
166         SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB_RPTR),
167         SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB_WPTR),
168         SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB0_BASE),
169         SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB0_RPTR),
170         SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB0_WPTR),
171         SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB1_BASE),
172         SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB1_RPTR),
173         SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB1_WPTR),
174         SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB2_BASE),
175         SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB2_WPTR),
176         SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB2_WPTR),
177         SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB1_CMD_BUFSZ),
178         SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB2_CMD_BUFSZ),
179         SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB1_CMD_BUFSZ),
180         SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB2_CMD_BUFSZ),
181         SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB1_BASE_LO),
182         SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB1_BASE_HI),
183         SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB1_BUFSZ),
184         SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB2_BASE_LO),
185         SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB2_BASE_HI),
186         SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB2_BUFSZ),
187         SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB1_BASE_LO),
188         SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB1_BASE_HI),
189         SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB1_BUFSZ),
190         SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB2_BASE_LO),
191         SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB2_BASE_HI),
192         SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB2_BUFSZ),
193         SOC15_REG_ENTRY_STR(GC, 0, mmCPF_UTCL1_STATUS),
194         SOC15_REG_ENTRY_STR(GC, 0, mmCPC_UTCL1_STATUS),
195         SOC15_REG_ENTRY_STR(GC, 0, mmCPG_UTCL1_STATUS),
196         SOC15_REG_ENTRY_STR(GC, 0, mmGDS_PROTECTION_FAULT),
197         SOC15_REG_ENTRY_STR(GC, 0, mmGDS_VM_PROTECTION_FAULT),
198         SOC15_REG_ENTRY_STR(GC, 0, mmIA_UTCL1_STATUS),
199         SOC15_REG_ENTRY_STR(GC, 0, mmIA_UTCL1_CNTL),
200         SOC15_REG_ENTRY_STR(GC, 0, mmPA_CL_CNTL_STATUS),
201         SOC15_REG_ENTRY_STR(GC, 0, mmRLC_UTCL1_STATUS),
202         SOC15_REG_ENTRY_STR(GC, 0, mmRMI_UTCL1_STATUS),
203         SOC15_REG_ENTRY_STR(GC, 0, mmSQC_DCACHE_UTCL1_STATUS),
204         SOC15_REG_ENTRY_STR(GC, 0, mmSQC_ICACHE_UTCL1_STATUS),
205         SOC15_REG_ENTRY_STR(GC, 0, mmSQ_UTCL1_STATUS),
206         SOC15_REG_ENTRY_STR(GC, 0, mmTCP_UTCL1_STATUS),
207         SOC15_REG_ENTRY_STR(GC, 0, mmWD_UTCL1_STATUS),
208         SOC15_REG_ENTRY_STR(GC, 0, mmVM_L2_PROTECTION_FAULT_CNTL),
209         SOC15_REG_ENTRY_STR(GC, 0, mmVM_L2_PROTECTION_FAULT_STATUS),
210         SOC15_REG_ENTRY_STR(GC, 0, mmCP_DEBUG),
211         SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_CNTL),
212         SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_INSTR_PNTR),
213         SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC1_INSTR_PNTR),
214         SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC2_INSTR_PNTR),
215         SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_INSTR_PNTR),
216         SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_INSTR_PNTR),
217         SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPC_STATUS),
218         SOC15_REG_ENTRY_STR(GC, 0, mmRLC_STAT),
219         SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SMU_COMMAND),
220         SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SMU_MESSAGE),
221         SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SMU_ARGUMENT_1),
222         SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SMU_ARGUMENT_2),
223         SOC15_REG_ENTRY_STR(GC, 0, mmSMU_RLC_RESPONSE),
224         SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SAFE_MODE),
225         SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SMU_SAFE_MODE),
226         SOC15_REG_ENTRY_STR(GC, 0, mmRLC_INT_STAT),
227         SOC15_REG_ENTRY_STR(GC, 0, mmRLC_GPM_GENERAL_6),
228         /* cp header registers */
229         SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_HEADER_DUMP),
230         SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME1_HEADER_DUMP),
231         SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME2_HEADER_DUMP),
232         SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_HEADER_DUMP),
233         SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_HEADER_DUMP),
234         /* SE status registers */
235         SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS_SE0),
236         SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS_SE1),
237         SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS_SE2),
238         SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS_SE3)
239 };
240
241 static const struct amdgpu_hwip_reg_entry gc_cp_reg_list_9[] = {
242         /* compute queue registers */
243         SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_VMID),
244         SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_ACTIVE),
245         SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PERSISTENT_STATE),
246         SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PIPE_PRIORITY),
247         SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_QUEUE_PRIORITY),
248         SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_QUANTUM),
249         SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_BASE),
250         SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_BASE_HI),
251         SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_RPTR),
252         SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR),
253         SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI),
254         SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL),
255         SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_CONTROL),
256         SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_IB_BASE_ADDR),
257         SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_IB_BASE_ADDR_HI),
258         SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_IB_RPTR),
259         SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_IB_CONTROL),
260         SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_DEQUEUE_REQUEST),
261         SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_BASE_ADDR),
262         SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI),
263         SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_CONTROL),
264         SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_RPTR),
265         SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_WPTR),
266         SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_EVENTS),
267         SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_CTX_SAVE_BASE_ADDR_LO),
268         SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_CTX_SAVE_BASE_ADDR_HI),
269         SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_CTX_SAVE_CONTROL),
270         SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_CNTL_STACK_OFFSET),
271         SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_CNTL_STACK_SIZE),
272         SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_WG_STATE_OFFSET),
273         SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_CTX_SAVE_SIZE),
274         SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_GDS_RESOURCE_STATE),
275         SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_ERROR),
276         SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_WPTR_MEM),
277         SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_WPTR_LO),
278         SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_WPTR_HI),
279         SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_GFX_STATUS),
280 };
281
282 enum ta_ras_gfx_subblock {
283         /*CPC*/
284         TA_RAS_BLOCK__GFX_CPC_INDEX_START = 0,
285         TA_RAS_BLOCK__GFX_CPC_SCRATCH = TA_RAS_BLOCK__GFX_CPC_INDEX_START,
286         TA_RAS_BLOCK__GFX_CPC_UCODE,
287         TA_RAS_BLOCK__GFX_DC_STATE_ME1,
288         TA_RAS_BLOCK__GFX_DC_CSINVOC_ME1,
289         TA_RAS_BLOCK__GFX_DC_RESTORE_ME1,
290         TA_RAS_BLOCK__GFX_DC_STATE_ME2,
291         TA_RAS_BLOCK__GFX_DC_CSINVOC_ME2,
292         TA_RAS_BLOCK__GFX_DC_RESTORE_ME2,
293         TA_RAS_BLOCK__GFX_CPC_INDEX_END = TA_RAS_BLOCK__GFX_DC_RESTORE_ME2,
294         /* CPF*/
295         TA_RAS_BLOCK__GFX_CPF_INDEX_START,
296         TA_RAS_BLOCK__GFX_CPF_ROQ_ME2 = TA_RAS_BLOCK__GFX_CPF_INDEX_START,
297         TA_RAS_BLOCK__GFX_CPF_ROQ_ME1,
298         TA_RAS_BLOCK__GFX_CPF_TAG,
299         TA_RAS_BLOCK__GFX_CPF_INDEX_END = TA_RAS_BLOCK__GFX_CPF_TAG,
300         /* CPG*/
301         TA_RAS_BLOCK__GFX_CPG_INDEX_START,
302         TA_RAS_BLOCK__GFX_CPG_DMA_ROQ = TA_RAS_BLOCK__GFX_CPG_INDEX_START,
303         TA_RAS_BLOCK__GFX_CPG_DMA_TAG,
304         TA_RAS_BLOCK__GFX_CPG_TAG,
305         TA_RAS_BLOCK__GFX_CPG_INDEX_END = TA_RAS_BLOCK__GFX_CPG_TAG,
306         /* GDS*/
307         TA_RAS_BLOCK__GFX_GDS_INDEX_START,
308         TA_RAS_BLOCK__GFX_GDS_MEM = TA_RAS_BLOCK__GFX_GDS_INDEX_START,
309         TA_RAS_BLOCK__GFX_GDS_INPUT_QUEUE,
310         TA_RAS_BLOCK__GFX_GDS_OA_PHY_CMD_RAM_MEM,
311         TA_RAS_BLOCK__GFX_GDS_OA_PHY_DATA_RAM_MEM,
312         TA_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM,
313         TA_RAS_BLOCK__GFX_GDS_INDEX_END = TA_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM,
314         /* SPI*/
315         TA_RAS_BLOCK__GFX_SPI_SR_MEM,
316         /* SQ*/
317         TA_RAS_BLOCK__GFX_SQ_INDEX_START,
318         TA_RAS_BLOCK__GFX_SQ_SGPR = TA_RAS_BLOCK__GFX_SQ_INDEX_START,
319         TA_RAS_BLOCK__GFX_SQ_LDS_D,
320         TA_RAS_BLOCK__GFX_SQ_LDS_I,
321         TA_RAS_BLOCK__GFX_SQ_VGPR, /* VGPR = SP*/
322         TA_RAS_BLOCK__GFX_SQ_INDEX_END = TA_RAS_BLOCK__GFX_SQ_VGPR,
323         /* SQC (3 ranges)*/
324         TA_RAS_BLOCK__GFX_SQC_INDEX_START,
325         /* SQC range 0*/
326         TA_RAS_BLOCK__GFX_SQC_INDEX0_START = TA_RAS_BLOCK__GFX_SQC_INDEX_START,
327         TA_RAS_BLOCK__GFX_SQC_INST_UTCL1_LFIFO =
328                 TA_RAS_BLOCK__GFX_SQC_INDEX0_START,
329         TA_RAS_BLOCK__GFX_SQC_DATA_CU0_WRITE_DATA_BUF,
330         TA_RAS_BLOCK__GFX_SQC_DATA_CU0_UTCL1_LFIFO,
331         TA_RAS_BLOCK__GFX_SQC_DATA_CU1_WRITE_DATA_BUF,
332         TA_RAS_BLOCK__GFX_SQC_DATA_CU1_UTCL1_LFIFO,
333         TA_RAS_BLOCK__GFX_SQC_DATA_CU2_WRITE_DATA_BUF,
334         TA_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO,
335         TA_RAS_BLOCK__GFX_SQC_INDEX0_END =
336                 TA_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO,
337         /* SQC range 1*/
338         TA_RAS_BLOCK__GFX_SQC_INDEX1_START,
339         TA_RAS_BLOCK__GFX_SQC_INST_BANKA_TAG_RAM =
340                 TA_RAS_BLOCK__GFX_SQC_INDEX1_START,
341         TA_RAS_BLOCK__GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO,
342         TA_RAS_BLOCK__GFX_SQC_INST_BANKA_MISS_FIFO,
343         TA_RAS_BLOCK__GFX_SQC_INST_BANKA_BANK_RAM,
344         TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_TAG_RAM,
345         TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_HIT_FIFO,
346         TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_MISS_FIFO,
347         TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM,
348         TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM,
349         TA_RAS_BLOCK__GFX_SQC_INDEX1_END =
350                 TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM,
351         /* SQC range 2*/
352         TA_RAS_BLOCK__GFX_SQC_INDEX2_START,
353         TA_RAS_BLOCK__GFX_SQC_INST_BANKB_TAG_RAM =
354                 TA_RAS_BLOCK__GFX_SQC_INDEX2_START,
355         TA_RAS_BLOCK__GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO,
356         TA_RAS_BLOCK__GFX_SQC_INST_BANKB_MISS_FIFO,
357         TA_RAS_BLOCK__GFX_SQC_INST_BANKB_BANK_RAM,
358         TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_TAG_RAM,
359         TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_HIT_FIFO,
360         TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_MISS_FIFO,
361         TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM,
362         TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM,
363         TA_RAS_BLOCK__GFX_SQC_INDEX2_END =
364                 TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM,
365         TA_RAS_BLOCK__GFX_SQC_INDEX_END = TA_RAS_BLOCK__GFX_SQC_INDEX2_END,
366         /* TA*/
367         TA_RAS_BLOCK__GFX_TA_INDEX_START,
368         TA_RAS_BLOCK__GFX_TA_FS_DFIFO = TA_RAS_BLOCK__GFX_TA_INDEX_START,
369         TA_RAS_BLOCK__GFX_TA_FS_AFIFO,
370         TA_RAS_BLOCK__GFX_TA_FL_LFIFO,
371         TA_RAS_BLOCK__GFX_TA_FX_LFIFO,
372         TA_RAS_BLOCK__GFX_TA_FS_CFIFO,
373         TA_RAS_BLOCK__GFX_TA_INDEX_END = TA_RAS_BLOCK__GFX_TA_FS_CFIFO,
374         /* TCA*/
375         TA_RAS_BLOCK__GFX_TCA_INDEX_START,
376         TA_RAS_BLOCK__GFX_TCA_HOLE_FIFO = TA_RAS_BLOCK__GFX_TCA_INDEX_START,
377         TA_RAS_BLOCK__GFX_TCA_REQ_FIFO,
378         TA_RAS_BLOCK__GFX_TCA_INDEX_END = TA_RAS_BLOCK__GFX_TCA_REQ_FIFO,
379         /* TCC (5 sub-ranges)*/
380         TA_RAS_BLOCK__GFX_TCC_INDEX_START,
381         /* TCC range 0*/
382         TA_RAS_BLOCK__GFX_TCC_INDEX0_START = TA_RAS_BLOCK__GFX_TCC_INDEX_START,
383         TA_RAS_BLOCK__GFX_TCC_CACHE_DATA = TA_RAS_BLOCK__GFX_TCC_INDEX0_START,
384         TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_0_1,
385         TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_0,
386         TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_1,
387         TA_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_0,
388         TA_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_1,
389         TA_RAS_BLOCK__GFX_TCC_HIGH_RATE_TAG,
390         TA_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG,
391         TA_RAS_BLOCK__GFX_TCC_INDEX0_END = TA_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG,
392         /* TCC range 1*/
393         TA_RAS_BLOCK__GFX_TCC_INDEX1_START,
394         TA_RAS_BLOCK__GFX_TCC_IN_USE_DEC = TA_RAS_BLOCK__GFX_TCC_INDEX1_START,
395         TA_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER,
396         TA_RAS_BLOCK__GFX_TCC_INDEX1_END =
397                 TA_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER,
398         /* TCC range 2*/
399         TA_RAS_BLOCK__GFX_TCC_INDEX2_START,
400         TA_RAS_BLOCK__GFX_TCC_RETURN_DATA = TA_RAS_BLOCK__GFX_TCC_INDEX2_START,
401         TA_RAS_BLOCK__GFX_TCC_RETURN_CONTROL,
402         TA_RAS_BLOCK__GFX_TCC_UC_ATOMIC_FIFO,
403         TA_RAS_BLOCK__GFX_TCC_WRITE_RETURN,
404         TA_RAS_BLOCK__GFX_TCC_WRITE_CACHE_READ,
405         TA_RAS_BLOCK__GFX_TCC_SRC_FIFO,
406         TA_RAS_BLOCK__GFX_TCC_SRC_FIFO_NEXT_RAM,
407         TA_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO,
408         TA_RAS_BLOCK__GFX_TCC_INDEX2_END =
409                 TA_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO,
410         /* TCC range 3*/
411         TA_RAS_BLOCK__GFX_TCC_INDEX3_START,
412         TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO = TA_RAS_BLOCK__GFX_TCC_INDEX3_START,
413         TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM,
414         TA_RAS_BLOCK__GFX_TCC_INDEX3_END =
415                 TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM,
416         /* TCC range 4*/
417         TA_RAS_BLOCK__GFX_TCC_INDEX4_START,
418         TA_RAS_BLOCK__GFX_TCC_WRRET_TAG_WRITE_RETURN =
419                 TA_RAS_BLOCK__GFX_TCC_INDEX4_START,
420         TA_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER,
421         TA_RAS_BLOCK__GFX_TCC_INDEX4_END =
422                 TA_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER,
423         TA_RAS_BLOCK__GFX_TCC_INDEX_END = TA_RAS_BLOCK__GFX_TCC_INDEX4_END,
424         /* TCI*/
425         TA_RAS_BLOCK__GFX_TCI_WRITE_RAM,
426         /* TCP*/
427         TA_RAS_BLOCK__GFX_TCP_INDEX_START,
428         TA_RAS_BLOCK__GFX_TCP_CACHE_RAM = TA_RAS_BLOCK__GFX_TCP_INDEX_START,
429         TA_RAS_BLOCK__GFX_TCP_LFIFO_RAM,
430         TA_RAS_BLOCK__GFX_TCP_CMD_FIFO,
431         TA_RAS_BLOCK__GFX_TCP_VM_FIFO,
432         TA_RAS_BLOCK__GFX_TCP_DB_RAM,
433         TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO0,
434         TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1,
435         TA_RAS_BLOCK__GFX_TCP_INDEX_END = TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1,
436         /* TD*/
437         TA_RAS_BLOCK__GFX_TD_INDEX_START,
438         TA_RAS_BLOCK__GFX_TD_SS_FIFO_LO = TA_RAS_BLOCK__GFX_TD_INDEX_START,
439         TA_RAS_BLOCK__GFX_TD_SS_FIFO_HI,
440         TA_RAS_BLOCK__GFX_TD_CS_FIFO,
441         TA_RAS_BLOCK__GFX_TD_INDEX_END = TA_RAS_BLOCK__GFX_TD_CS_FIFO,
442         /* EA (3 sub-ranges)*/
443         TA_RAS_BLOCK__GFX_EA_INDEX_START,
444         /* EA range 0*/
445         TA_RAS_BLOCK__GFX_EA_INDEX0_START = TA_RAS_BLOCK__GFX_EA_INDEX_START,
446         TA_RAS_BLOCK__GFX_EA_DRAMRD_CMDMEM = TA_RAS_BLOCK__GFX_EA_INDEX0_START,
447         TA_RAS_BLOCK__GFX_EA_DRAMWR_CMDMEM,
448         TA_RAS_BLOCK__GFX_EA_DRAMWR_DATAMEM,
449         TA_RAS_BLOCK__GFX_EA_RRET_TAGMEM,
450         TA_RAS_BLOCK__GFX_EA_WRET_TAGMEM,
451         TA_RAS_BLOCK__GFX_EA_GMIRD_CMDMEM,
452         TA_RAS_BLOCK__GFX_EA_GMIWR_CMDMEM,
453         TA_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM,
454         TA_RAS_BLOCK__GFX_EA_INDEX0_END = TA_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM,
455         /* EA range 1*/
456         TA_RAS_BLOCK__GFX_EA_INDEX1_START,
457         TA_RAS_BLOCK__GFX_EA_DRAMRD_PAGEMEM = TA_RAS_BLOCK__GFX_EA_INDEX1_START,
458         TA_RAS_BLOCK__GFX_EA_DRAMWR_PAGEMEM,
459         TA_RAS_BLOCK__GFX_EA_IORD_CMDMEM,
460         TA_RAS_BLOCK__GFX_EA_IOWR_CMDMEM,
461         TA_RAS_BLOCK__GFX_EA_IOWR_DATAMEM,
462         TA_RAS_BLOCK__GFX_EA_GMIRD_PAGEMEM,
463         TA_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM,
464         TA_RAS_BLOCK__GFX_EA_INDEX1_END = TA_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM,
465         /* EA range 2*/
466         TA_RAS_BLOCK__GFX_EA_INDEX2_START,
467         TA_RAS_BLOCK__GFX_EA_MAM_D0MEM = TA_RAS_BLOCK__GFX_EA_INDEX2_START,
468         TA_RAS_BLOCK__GFX_EA_MAM_D1MEM,
469         TA_RAS_BLOCK__GFX_EA_MAM_D2MEM,
470         TA_RAS_BLOCK__GFX_EA_MAM_D3MEM,
471         TA_RAS_BLOCK__GFX_EA_INDEX2_END = TA_RAS_BLOCK__GFX_EA_MAM_D3MEM,
472         TA_RAS_BLOCK__GFX_EA_INDEX_END = TA_RAS_BLOCK__GFX_EA_INDEX2_END,
473         /* UTC VM L2 bank*/
474         TA_RAS_BLOCK__UTC_VML2_BANK_CACHE,
475         /* UTC VM walker*/
476         TA_RAS_BLOCK__UTC_VML2_WALKER,
477         /* UTC ATC L2 2MB cache*/
478         TA_RAS_BLOCK__UTC_ATCL2_CACHE_2M_BANK,
479         /* UTC ATC L2 4KB cache*/
480         TA_RAS_BLOCK__UTC_ATCL2_CACHE_4K_BANK,
481         TA_RAS_BLOCK__GFX_MAX
482 };
483
484 struct ras_gfx_subblock {
485         unsigned char *name;
486         int ta_subblock;
487         int hw_supported_error_type;
488         int sw_supported_error_type;
489 };
490
491 #define AMDGPU_RAS_SUB_BLOCK(subblock, a, b, c, d, e, f, g, h)                             \
492         [AMDGPU_RAS_BLOCK__##subblock] = {                                     \
493                 #subblock,                                                     \
494                 TA_RAS_BLOCK__##subblock,                                      \
495                 ((a) | ((b) << 1) | ((c) << 2) | ((d) << 3)),                  \
496                 (((e) << 1) | ((f) << 3) | (g) | ((h) << 2)),                  \
497         }
498
499 static const struct ras_gfx_subblock ras_gfx_subblocks[] = {
500         AMDGPU_RAS_SUB_BLOCK(GFX_CPC_SCRATCH, 0, 1, 1, 1, 1, 0, 0, 1),
501         AMDGPU_RAS_SUB_BLOCK(GFX_CPC_UCODE, 0, 1, 1, 1, 1, 0, 0, 1),
502         AMDGPU_RAS_SUB_BLOCK(GFX_DC_STATE_ME1, 1, 0, 0, 1, 0, 0, 1, 0),
503         AMDGPU_RAS_SUB_BLOCK(GFX_DC_CSINVOC_ME1, 1, 0, 0, 1, 0, 0, 0, 0),
504         AMDGPU_RAS_SUB_BLOCK(GFX_DC_RESTORE_ME1, 1, 0, 0, 1, 0, 0, 0, 0),
505         AMDGPU_RAS_SUB_BLOCK(GFX_DC_STATE_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
506         AMDGPU_RAS_SUB_BLOCK(GFX_DC_CSINVOC_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
507         AMDGPU_RAS_SUB_BLOCK(GFX_DC_RESTORE_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
508         AMDGPU_RAS_SUB_BLOCK(GFX_CPF_ROQ_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
509         AMDGPU_RAS_SUB_BLOCK(GFX_CPF_ROQ_ME1, 1, 0, 0, 1, 0, 0, 1, 0),
510         AMDGPU_RAS_SUB_BLOCK(GFX_CPF_TAG, 0, 1, 1, 1, 1, 0, 0, 1),
511         AMDGPU_RAS_SUB_BLOCK(GFX_CPG_DMA_ROQ, 1, 0, 0, 1, 0, 0, 1, 0),
512         AMDGPU_RAS_SUB_BLOCK(GFX_CPG_DMA_TAG, 0, 1, 1, 1, 0, 1, 0, 1),
513         AMDGPU_RAS_SUB_BLOCK(GFX_CPG_TAG, 0, 1, 1, 1, 1, 1, 0, 1),
514         AMDGPU_RAS_SUB_BLOCK(GFX_GDS_MEM, 0, 1, 1, 1, 0, 0, 0, 0),
515         AMDGPU_RAS_SUB_BLOCK(GFX_GDS_INPUT_QUEUE, 1, 0, 0, 1, 0, 0, 0, 0),
516         AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PHY_CMD_RAM_MEM, 0, 1, 1, 1, 0, 0, 0,
517                              0),
518         AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PHY_DATA_RAM_MEM, 1, 0, 0, 1, 0, 0, 0,
519                              0),
520         AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PIPE_MEM, 0, 1, 1, 1, 0, 0, 0, 0),
521         AMDGPU_RAS_SUB_BLOCK(GFX_SPI_SR_MEM, 1, 0, 0, 1, 0, 0, 0, 0),
522         AMDGPU_RAS_SUB_BLOCK(GFX_SQ_SGPR, 0, 1, 1, 1, 0, 0, 0, 0),
523         AMDGPU_RAS_SUB_BLOCK(GFX_SQ_LDS_D, 0, 1, 1, 1, 1, 0, 0, 1),
524         AMDGPU_RAS_SUB_BLOCK(GFX_SQ_LDS_I, 0, 1, 1, 1, 0, 0, 0, 0),
525         AMDGPU_RAS_SUB_BLOCK(GFX_SQ_VGPR, 0, 1, 1, 1, 0, 0, 0, 0),
526         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0, 1),
527         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU0_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0,
528                              0, 0),
529         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU0_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0,
530                              0),
531         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU1_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0,
532                              0, 0),
533         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU1_UTCL1_LFIFO, 0, 1, 1, 1, 1, 0, 0,
534                              0),
535         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU2_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0,
536                              0, 0),
537         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU2_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0,
538                              0),
539         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_TAG_RAM, 0, 1, 1, 1, 1, 0, 0,
540                              1),
541         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO, 1, 0, 0, 1, 0,
542                              0, 0, 0),
543         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
544                              0),
545         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
546                              0),
547         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_TAG_RAM, 0, 1, 1, 1, 0, 0, 0,
548                              0),
549         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_HIT_FIFO, 1, 0, 0, 1, 0, 0, 0,
550                              0),
551         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
552                              0),
553         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM, 1, 0, 0, 1, 0, 0,
554                              0, 0),
555         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
556                              0),
557         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_TAG_RAM, 0, 1, 1, 1, 1, 0, 0,
558                              0),
559         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO, 1, 0, 0, 1, 0,
560                              0, 0, 0),
561         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
562                              0),
563         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
564                              0),
565         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_TAG_RAM, 0, 1, 1, 1, 0, 0, 0,
566                              0),
567         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_HIT_FIFO, 1, 0, 0, 1, 0, 0, 0,
568                              0),
569         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
570                              0),
571         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM, 1, 0, 0, 1, 0, 0,
572                              0, 0),
573         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
574                              0),
575         AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_DFIFO, 0, 1, 1, 1, 1, 0, 0, 1),
576         AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_AFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
577         AMDGPU_RAS_SUB_BLOCK(GFX_TA_FL_LFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
578         AMDGPU_RAS_SUB_BLOCK(GFX_TA_FX_LFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
579         AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_CFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
580         AMDGPU_RAS_SUB_BLOCK(GFX_TCA_HOLE_FIFO, 1, 0, 0, 1, 0, 1, 1, 0),
581         AMDGPU_RAS_SUB_BLOCK(GFX_TCA_REQ_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
582         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA, 0, 1, 1, 1, 1, 0, 0, 1),
583         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_0_1, 0, 1, 1, 1, 1, 0, 0,
584                              1),
585         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_1_0, 0, 1, 1, 1, 1, 0, 0,
586                              1),
587         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_1_1, 0, 1, 1, 1, 1, 0, 0,
588                              1),
589         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DIRTY_BANK_0, 0, 1, 1, 1, 0, 0, 0,
590                              0),
591         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DIRTY_BANK_1, 0, 1, 1, 1, 0, 0, 0,
592                              0),
593         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_HIGH_RATE_TAG, 0, 1, 1, 1, 0, 0, 0, 0),
594         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LOW_RATE_TAG, 0, 1, 1, 1, 0, 0, 0, 0),
595         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_IN_USE_DEC, 1, 0, 0, 1, 0, 0, 0, 0),
596         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_IN_USE_TRANSFER, 1, 0, 0, 1, 0, 0, 0, 0),
597         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_RETURN_DATA, 1, 0, 0, 1, 0, 0, 0, 0),
598         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_RETURN_CONTROL, 1, 0, 0, 1, 0, 0, 0, 0),
599         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_UC_ATOMIC_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
600         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRITE_RETURN, 1, 0, 0, 1, 0, 1, 1, 0),
601         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRITE_CACHE_READ, 1, 0, 0, 1, 0, 0, 0, 0),
602         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_SRC_FIFO, 0, 1, 1, 1, 0, 0, 0, 0),
603         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_SRC_FIFO_NEXT_RAM, 1, 0, 0, 1, 0, 0, 1, 0),
604         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_TAG_PROBE_FIFO, 1, 0, 0, 1, 0, 0, 0,
605                              0),
606         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LATENCY_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
607         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LATENCY_FIFO_NEXT_RAM, 1, 0, 0, 1, 0, 0, 0,
608                              0),
609         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRRET_TAG_WRITE_RETURN, 1, 0, 0, 1, 0, 0,
610                              0, 0),
611         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_ATOMIC_RETURN_BUFFER, 1, 0, 0, 1, 0, 0, 0,
612                              0),
613         AMDGPU_RAS_SUB_BLOCK(GFX_TCI_WRITE_RAM, 1, 0, 0, 1, 0, 0, 0, 0),
614         AMDGPU_RAS_SUB_BLOCK(GFX_TCP_CACHE_RAM, 0, 1, 1, 1, 1, 0, 0, 1),
615         AMDGPU_RAS_SUB_BLOCK(GFX_TCP_LFIFO_RAM, 0, 1, 1, 1, 0, 0, 0, 0),
616         AMDGPU_RAS_SUB_BLOCK(GFX_TCP_CMD_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
617         AMDGPU_RAS_SUB_BLOCK(GFX_TCP_VM_FIFO, 0, 1, 1, 1, 0, 0, 0, 0),
618         AMDGPU_RAS_SUB_BLOCK(GFX_TCP_DB_RAM, 1, 0, 0, 1, 0, 0, 0, 0),
619         AMDGPU_RAS_SUB_BLOCK(GFX_TCP_UTCL1_LFIFO0, 0, 1, 1, 1, 0, 0, 0, 0),
620         AMDGPU_RAS_SUB_BLOCK(GFX_TCP_UTCL1_LFIFO1, 0, 1, 1, 1, 0, 0, 0, 0),
621         AMDGPU_RAS_SUB_BLOCK(GFX_TD_SS_FIFO_LO, 0, 1, 1, 1, 1, 0, 0, 1),
622         AMDGPU_RAS_SUB_BLOCK(GFX_TD_SS_FIFO_HI, 0, 1, 1, 1, 0, 0, 0, 0),
623         AMDGPU_RAS_SUB_BLOCK(GFX_TD_CS_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
624         AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMRD_CMDMEM, 0, 1, 1, 1, 1, 0, 0, 1),
625         AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0),
626         AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_DATAMEM, 0, 1, 1, 1, 0, 0, 0, 0),
627         AMDGPU_RAS_SUB_BLOCK(GFX_EA_RRET_TAGMEM, 0, 1, 1, 1, 0, 0, 0, 0),
628         AMDGPU_RAS_SUB_BLOCK(GFX_EA_WRET_TAGMEM, 0, 1, 1, 1, 0, 0, 0, 0),
629         AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIRD_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0),
630         AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0),
631         AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_DATAMEM, 0, 1, 1, 1, 0, 0, 0, 0),
632         AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMRD_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
633         AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
634         AMDGPU_RAS_SUB_BLOCK(GFX_EA_IORD_CMDMEM, 1, 0, 0, 1, 0, 0, 0, 0),
635         AMDGPU_RAS_SUB_BLOCK(GFX_EA_IOWR_CMDMEM, 1, 0, 0, 1, 0, 0, 0, 0),
636         AMDGPU_RAS_SUB_BLOCK(GFX_EA_IOWR_DATAMEM, 1, 0, 0, 1, 0, 0, 0, 0),
637         AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIRD_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
638         AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
639         AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D0MEM, 1, 0, 0, 1, 0, 0, 0, 0),
640         AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D1MEM, 1, 0, 0, 1, 0, 0, 0, 0),
641         AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D2MEM, 1, 0, 0, 1, 0, 0, 0, 0),
642         AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D3MEM, 1, 0, 0, 1, 0, 0, 0, 0),
643         AMDGPU_RAS_SUB_BLOCK(UTC_VML2_BANK_CACHE, 0, 1, 1, 1, 0, 0, 0, 0),
644         AMDGPU_RAS_SUB_BLOCK(UTC_VML2_WALKER, 0, 1, 1, 1, 0, 0, 0, 0),
645         AMDGPU_RAS_SUB_BLOCK(UTC_ATCL2_CACHE_2M_BANK, 1, 0, 0, 1, 0, 0, 0, 0),
646         AMDGPU_RAS_SUB_BLOCK(UTC_ATCL2_CACHE_4K_BANK, 0, 1, 1, 1, 0, 0, 0, 0),
647 };
648
649 static const struct soc15_reg_golden golden_settings_gc_9_0[] =
650 {
651         SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000400),
652         SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0x80000000, 0x80000000),
653         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
654         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
655         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
656         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
657         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000),
658         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800),
659         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800),
660         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x00ffff87),
661         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x00ffff8f),
662         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000),
663         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
664         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68),
665         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197),
666         SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
667         SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff),
668         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800),
669         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800),
670         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000)
671 };
672
673 static const struct soc15_reg_golden golden_settings_gc_9_0_vg10[] =
674 {
675         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0x0000f000, 0x00012107),
676         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
677         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080),
678         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080),
679         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080),
680         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042),
681         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x2a114042),
682         SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080),
683         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0x00008000, 0x00048000),
684         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080),
685         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080),
686         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080),
687         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080),
688         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080),
689         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000),
690         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x01000107),
691         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x00001800, 0x00000800),
692         SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080)
693 };
694
695 static const struct soc15_reg_golden golden_settings_gc_9_0_vg20[] =
696 {
697         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x0f000080, 0x04000080),
698         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000),
699         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
700         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xf3e777ff, 0x22014042),
701         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xf3e777ff, 0x22014042),
702         SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0x00003e00, 0x00000400),
703         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xff840000, 0x04040000),
704         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00030000),
705         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff010f, 0x01000107),
706         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0x000b0000, 0x000b0000),
707         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01000000, 0x01000000)
708 };
709
710 static const struct soc15_reg_golden golden_settings_gc_9_1[] =
711 {
712         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
713         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080),
714         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080),
715         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080),
716         SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420),
717         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
718         SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080),
719         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
720         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
721         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
722         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080),
723         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080),
724         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080),
725         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080),
726         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080),
727         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
728         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
729         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003120),
730         SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
731         SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000000ff),
732         SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080),
733         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800),
734         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800),
735         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000)
736 };
737
738 static const struct soc15_reg_golden golden_settings_gc_9_1_rv1[] =
739 {
740         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
741         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24000042),
742         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24000042),
743         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04048000),
744         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_MODE_CNTL_1, 0x06000000, 0x06000000),
745         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000),
746         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x00000800)
747 };
748
749 static const struct soc15_reg_golden golden_settings_gc_9_1_rv2[] =
750 {
751         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0xff7fffff, 0x04000000),
752         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
753         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0xff7fffff, 0x0a000000),
754         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x7f0fffff, 0x08000080),
755         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0xff8fffff, 0x08000080),
756         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x7f8fffff, 0x08000080),
757         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x26013041),
758         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x26013041),
759         SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x3f8fffff, 0x08000080),
760         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
761         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0xff0fffff, 0x08000080),
762         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0xff0fffff, 0x08000080),
763         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0xff0fffff, 0x08000080),
764         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0xff0fffff, 0x08000080),
765         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0xff0fffff, 0x08000080),
766         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
767         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010),
768         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000),
769         SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x3f8fffff, 0x08000080),
770 };
771
772 static const struct soc15_reg_golden golden_settings_gc_9_1_rn[] =
773 {
774         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
775         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0xff7fffff, 0x0a000000),
776         SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000400),
777         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xf3e777ff, 0x24000042),
778         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xf3e777ff, 0x24000042),
779         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
780         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
781         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
782         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
783         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
784         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003120),
785         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCEA_PROBE_MAP, 0xffffffff, 0x0000cccc),
786 };
787
788 static const struct soc15_reg_golden golden_settings_gc_9_x_common[] =
789 {
790         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_SD_CNTL, 0xffffffff, 0x000001ff),
791         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_INDEX, 0xffffffff, 0x00000000),
792         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_DATA, 0xffffffff, 0x2544c382)
793 };
794
795 static const struct soc15_reg_golden golden_settings_gc_9_2_1[] =
796 {
797         SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420),
798         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
799         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
800         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
801         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
802         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000),
803         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800),
804         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800),
805         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x0000ff87),
806         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x0000ff8f),
807         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000),
808         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
809         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68),
810         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197),
811         SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
812         SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff)
813 };
814
815 static const struct soc15_reg_golden golden_settings_gc_9_2_1_vg12[] =
816 {
817         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x00000080, 0x04000080),
818         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
819         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000),
820         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24104041),
821         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24104041),
822         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
823         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff03ff, 0x01000107),
824         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
825         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x76325410),
826         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000),
827         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800),
828         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800),
829         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000)
830 };
831
832 static const struct soc15_reg_golden golden_settings_gc_9_4_1_arct[] =
833 {
834         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042),
835         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x10b0000),
836         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_0_ARCT, 0x3fffffff, 0x346f0a4e),
837         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_1_ARCT, 0x3fffffff, 0x1c642ca),
838         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_2_ARCT, 0x3fffffff, 0x26f45098),
839         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_3_ARCT, 0x3fffffff, 0x2ebd9fe3),
840         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_4_ARCT, 0x3fffffff, 0xb90f5b1),
841         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_5_ARCT, 0x3ff, 0x135),
842         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_CONFIG, 0xffffffff, 0x011A0000),
843         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_FIFO_SIZES, 0xffffffff, 0x00000f00),
844         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_UTCL1_CNTL1, 0x30000000, 0x30000000)
845 };
846
847 static const struct soc15_reg_rlcg rlcg_access_gc_9_0[] = {
848         {SOC15_REG_ENTRY(GC, 0, mmGRBM_GFX_INDEX)},
849         {SOC15_REG_ENTRY(GC, 0, mmSQ_IND_INDEX)},
850 };
851
852 static const u32 GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[] =
853 {
854         mmRLC_SRM_INDEX_CNTL_ADDR_0 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
855         mmRLC_SRM_INDEX_CNTL_ADDR_1 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
856         mmRLC_SRM_INDEX_CNTL_ADDR_2 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
857         mmRLC_SRM_INDEX_CNTL_ADDR_3 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
858         mmRLC_SRM_INDEX_CNTL_ADDR_4 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
859         mmRLC_SRM_INDEX_CNTL_ADDR_5 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
860         mmRLC_SRM_INDEX_CNTL_ADDR_6 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
861         mmRLC_SRM_INDEX_CNTL_ADDR_7 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
862 };
863
864 static const u32 GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[] =
865 {
866         mmRLC_SRM_INDEX_CNTL_DATA_0 - mmRLC_SRM_INDEX_CNTL_DATA_0,
867         mmRLC_SRM_INDEX_CNTL_DATA_1 - mmRLC_SRM_INDEX_CNTL_DATA_0,
868         mmRLC_SRM_INDEX_CNTL_DATA_2 - mmRLC_SRM_INDEX_CNTL_DATA_0,
869         mmRLC_SRM_INDEX_CNTL_DATA_3 - mmRLC_SRM_INDEX_CNTL_DATA_0,
870         mmRLC_SRM_INDEX_CNTL_DATA_4 - mmRLC_SRM_INDEX_CNTL_DATA_0,
871         mmRLC_SRM_INDEX_CNTL_DATA_5 - mmRLC_SRM_INDEX_CNTL_DATA_0,
872         mmRLC_SRM_INDEX_CNTL_DATA_6 - mmRLC_SRM_INDEX_CNTL_DATA_0,
873         mmRLC_SRM_INDEX_CNTL_DATA_7 - mmRLC_SRM_INDEX_CNTL_DATA_0,
874 };
875
876 #define VEGA10_GB_ADDR_CONFIG_GOLDEN 0x2a114042
877 #define VEGA12_GB_ADDR_CONFIG_GOLDEN 0x24104041
878 #define RAVEN_GB_ADDR_CONFIG_GOLDEN 0x24000042
879 #define RAVEN2_GB_ADDR_CONFIG_GOLDEN 0x26013041
880
881 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev);
882 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev);
883 static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev);
884 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev);
885 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
886                                 struct amdgpu_cu_info *cu_info);
887 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev);
888 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume, bool usegds);
889 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring);
890 static void gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev,
891                                           void *ras_error_status);
892 static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev,
893                                      void *inject_if, uint32_t instance_mask);
894 static void gfx_v9_0_reset_ras_error_count(struct amdgpu_device *adev);
895 static void gfx_v9_0_update_spm_vmid_internal(struct amdgpu_device *adev,
896                                               unsigned int vmid);
897 static void gfx_v9_0_set_safe_mode(struct amdgpu_device *adev, int xcc_id);
898 static void gfx_v9_0_unset_safe_mode(struct amdgpu_device *adev, int xcc_id);
899
900 static void gfx_v9_0_kiq_set_resources(struct amdgpu_ring *kiq_ring,
901                                 uint64_t queue_mask)
902 {
903         struct amdgpu_device *adev = kiq_ring->adev;
904         u64 shader_mc_addr;
905
906         /* Cleaner shader MC address */
907         shader_mc_addr = adev->gfx.cleaner_shader_gpu_addr >> 8;
908
909         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
910         amdgpu_ring_write(kiq_ring,
911                 PACKET3_SET_RESOURCES_VMID_MASK(0) |
912                 /* vmid_mask:0* queue_type:0 (KIQ) */
913                 PACKET3_SET_RESOURCES_QUEUE_TYPE(0));
914         amdgpu_ring_write(kiq_ring,
915                         lower_32_bits(queue_mask));     /* queue mask lo */
916         amdgpu_ring_write(kiq_ring,
917                         upper_32_bits(queue_mask));     /* queue mask hi */
918         amdgpu_ring_write(kiq_ring, lower_32_bits(shader_mc_addr)); /* cleaner shader addr lo */
919         amdgpu_ring_write(kiq_ring, upper_32_bits(shader_mc_addr)); /* cleaner shader addr hi */
920         amdgpu_ring_write(kiq_ring, 0); /* oac mask */
921         amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */
922 }
923
924 static void gfx_v9_0_kiq_map_queues(struct amdgpu_ring *kiq_ring,
925                                  struct amdgpu_ring *ring)
926 {
927         uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
928         uint64_t wptr_addr = ring->wptr_gpu_addr;
929         uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
930
931         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
932         /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
933         amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
934                          PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */
935                          PACKET3_MAP_QUEUES_VMID(0) | /* VMID */
936                          PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
937                          PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
938                          PACKET3_MAP_QUEUES_ME((ring->me == 1 ? 0 : 1)) |
939                          /*queue_type: normal compute queue */
940                          PACKET3_MAP_QUEUES_QUEUE_TYPE(0) |
941                          /* alloc format: all_on_one_pipe */
942                          PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) |
943                          PACKET3_MAP_QUEUES_ENGINE_SEL(eng_sel) |
944                          /* num_queues: must be 1 */
945                          PACKET3_MAP_QUEUES_NUM_QUEUES(1));
946         amdgpu_ring_write(kiq_ring,
947                         PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index));
948         amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
949         amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
950         amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
951         amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
952 }
953
954 static void gfx_v9_0_kiq_unmap_queues(struct amdgpu_ring *kiq_ring,
955                                    struct amdgpu_ring *ring,
956                                    enum amdgpu_unmap_queues_action action,
957                                    u64 gpu_addr, u64 seq)
958 {
959         uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
960
961         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
962         amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
963                           PACKET3_UNMAP_QUEUES_ACTION(action) |
964                           PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) |
965                           PACKET3_UNMAP_QUEUES_ENGINE_SEL(eng_sel) |
966                           PACKET3_UNMAP_QUEUES_NUM_QUEUES(1));
967         amdgpu_ring_write(kiq_ring,
968                         PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
969
970         if (action == PREEMPT_QUEUES_NO_UNMAP) {
971                 amdgpu_ring_write(kiq_ring, lower_32_bits(ring->wptr & ring->buf_mask));
972                 amdgpu_ring_write(kiq_ring, 0);
973                 amdgpu_ring_write(kiq_ring, 0);
974
975         } else {
976                 amdgpu_ring_write(kiq_ring, 0);
977                 amdgpu_ring_write(kiq_ring, 0);
978                 amdgpu_ring_write(kiq_ring, 0);
979         }
980 }
981
982 static void gfx_v9_0_kiq_query_status(struct amdgpu_ring *kiq_ring,
983                                    struct amdgpu_ring *ring,
984                                    u64 addr,
985                                    u64 seq)
986 {
987         uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
988
989         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_QUERY_STATUS, 5));
990         amdgpu_ring_write(kiq_ring,
991                           PACKET3_QUERY_STATUS_CONTEXT_ID(0) |
992                           PACKET3_QUERY_STATUS_INTERRUPT_SEL(0) |
993                           PACKET3_QUERY_STATUS_COMMAND(2));
994         /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
995         amdgpu_ring_write(kiq_ring,
996                         PACKET3_QUERY_STATUS_DOORBELL_OFFSET(ring->doorbell_index) |
997                         PACKET3_QUERY_STATUS_ENG_SEL(eng_sel));
998         amdgpu_ring_write(kiq_ring, lower_32_bits(addr));
999         amdgpu_ring_write(kiq_ring, upper_32_bits(addr));
1000         amdgpu_ring_write(kiq_ring, lower_32_bits(seq));
1001         amdgpu_ring_write(kiq_ring, upper_32_bits(seq));
1002 }
1003
1004 static void gfx_v9_0_kiq_invalidate_tlbs(struct amdgpu_ring *kiq_ring,
1005                                 uint16_t pasid, uint32_t flush_type,
1006                                 bool all_hub)
1007 {
1008         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0));
1009         amdgpu_ring_write(kiq_ring,
1010                         PACKET3_INVALIDATE_TLBS_DST_SEL(1) |
1011                         PACKET3_INVALIDATE_TLBS_ALL_HUB(all_hub) |
1012                         PACKET3_INVALIDATE_TLBS_PASID(pasid) |
1013                         PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type));
1014 }
1015
1016
1017 static void gfx_v9_0_kiq_reset_hw_queue(struct amdgpu_ring *kiq_ring, uint32_t queue_type,
1018                                         uint32_t me_id, uint32_t pipe_id, uint32_t queue_id,
1019                                         uint32_t xcc_id, uint32_t vmid)
1020 {
1021         struct amdgpu_device *adev = kiq_ring->adev;
1022         unsigned i;
1023
1024         /* enter save mode */
1025         amdgpu_gfx_rlc_enter_safe_mode(adev, xcc_id);
1026         mutex_lock(&adev->srbm_mutex);
1027         soc15_grbm_select(adev, me_id, pipe_id, queue_id, 0, 0);
1028
1029         if (queue_type == AMDGPU_RING_TYPE_COMPUTE) {
1030                 WREG32_SOC15(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 0x2);
1031                 WREG32_SOC15(GC, 0, mmSPI_COMPUTE_QUEUE_RESET, 0x1);
1032                 /* wait till dequeue take effects */
1033                 for (i = 0; i < adev->usec_timeout; i++) {
1034                         if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
1035                                 break;
1036                         udelay(1);
1037                 }
1038                 if (i >= adev->usec_timeout)
1039                         dev_err(adev->dev, "fail to wait on hqd deactive\n");
1040         } else {
1041                 dev_err(adev->dev, "reset queue_type(%d) not supported\n", queue_type);
1042         }
1043
1044         soc15_grbm_select(adev, 0, 0, 0, 0, 0);
1045         mutex_unlock(&adev->srbm_mutex);
1046         /* exit safe mode */
1047         amdgpu_gfx_rlc_exit_safe_mode(adev, xcc_id);
1048 }
1049
1050 static const struct kiq_pm4_funcs gfx_v9_0_kiq_pm4_funcs = {
1051         .kiq_set_resources = gfx_v9_0_kiq_set_resources,
1052         .kiq_map_queues = gfx_v9_0_kiq_map_queues,
1053         .kiq_unmap_queues = gfx_v9_0_kiq_unmap_queues,
1054         .kiq_query_status = gfx_v9_0_kiq_query_status,
1055         .kiq_invalidate_tlbs = gfx_v9_0_kiq_invalidate_tlbs,
1056         .kiq_reset_hw_queue = gfx_v9_0_kiq_reset_hw_queue,
1057         .set_resources_size = 8,
1058         .map_queues_size = 7,
1059         .unmap_queues_size = 6,
1060         .query_status_size = 7,
1061         .invalidate_tlbs_size = 2,
1062 };
1063
1064 static void gfx_v9_0_set_kiq_pm4_funcs(struct amdgpu_device *adev)
1065 {
1066         adev->gfx.kiq[0].pmf = &gfx_v9_0_kiq_pm4_funcs;
1067 }
1068
1069 static void gfx_v9_0_init_golden_registers(struct amdgpu_device *adev)
1070 {
1071         switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
1072         case IP_VERSION(9, 0, 1):
1073                 soc15_program_register_sequence(adev,
1074                                                 golden_settings_gc_9_0,
1075                                                 ARRAY_SIZE(golden_settings_gc_9_0));
1076                 soc15_program_register_sequence(adev,
1077                                                 golden_settings_gc_9_0_vg10,
1078                                                 ARRAY_SIZE(golden_settings_gc_9_0_vg10));
1079                 break;
1080         case IP_VERSION(9, 2, 1):
1081                 soc15_program_register_sequence(adev,
1082                                                 golden_settings_gc_9_2_1,
1083                                                 ARRAY_SIZE(golden_settings_gc_9_2_1));
1084                 soc15_program_register_sequence(adev,
1085                                                 golden_settings_gc_9_2_1_vg12,
1086                                                 ARRAY_SIZE(golden_settings_gc_9_2_1_vg12));
1087                 break;
1088         case IP_VERSION(9, 4, 0):
1089                 soc15_program_register_sequence(adev,
1090                                                 golden_settings_gc_9_0,
1091                                                 ARRAY_SIZE(golden_settings_gc_9_0));
1092                 soc15_program_register_sequence(adev,
1093                                                 golden_settings_gc_9_0_vg20,
1094                                                 ARRAY_SIZE(golden_settings_gc_9_0_vg20));
1095                 break;
1096         case IP_VERSION(9, 4, 1):
1097                 soc15_program_register_sequence(adev,
1098                                                 golden_settings_gc_9_4_1_arct,
1099                                                 ARRAY_SIZE(golden_settings_gc_9_4_1_arct));
1100                 break;
1101         case IP_VERSION(9, 2, 2):
1102         case IP_VERSION(9, 1, 0):
1103                 soc15_program_register_sequence(adev, golden_settings_gc_9_1,
1104                                                 ARRAY_SIZE(golden_settings_gc_9_1));
1105                 if (adev->apu_flags & AMD_APU_IS_RAVEN2)
1106                         soc15_program_register_sequence(adev,
1107                                                         golden_settings_gc_9_1_rv2,
1108                                                         ARRAY_SIZE(golden_settings_gc_9_1_rv2));
1109                 else
1110                         soc15_program_register_sequence(adev,
1111                                                         golden_settings_gc_9_1_rv1,
1112                                                         ARRAY_SIZE(golden_settings_gc_9_1_rv1));
1113                 break;
1114          case IP_VERSION(9, 3, 0):
1115                 soc15_program_register_sequence(adev,
1116                                                 golden_settings_gc_9_1_rn,
1117                                                 ARRAY_SIZE(golden_settings_gc_9_1_rn));
1118                 return; /* for renoir, don't need common goldensetting */
1119         case IP_VERSION(9, 4, 2):
1120                 gfx_v9_4_2_init_golden_registers(adev,
1121                                                  adev->smuio.funcs->get_die_id(adev));
1122                 break;
1123         default:
1124                 break;
1125         }
1126
1127         if ((amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 4, 1)) &&
1128             (amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 4, 2)))
1129                 soc15_program_register_sequence(adev, golden_settings_gc_9_x_common,
1130                                                 (const u32)ARRAY_SIZE(golden_settings_gc_9_x_common));
1131 }
1132
1133 static void gfx_v9_0_write_data_to_reg(struct amdgpu_ring *ring, int eng_sel,
1134                                        bool wc, uint32_t reg, uint32_t val)
1135 {
1136         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
1137         amdgpu_ring_write(ring, WRITE_DATA_ENGINE_SEL(eng_sel) |
1138                                 WRITE_DATA_DST_SEL(0) |
1139                                 (wc ? WR_CONFIRM : 0));
1140         amdgpu_ring_write(ring, reg);
1141         amdgpu_ring_write(ring, 0);
1142         amdgpu_ring_write(ring, val);
1143 }
1144
1145 static void gfx_v9_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel,
1146                                   int mem_space, int opt, uint32_t addr0,
1147                                   uint32_t addr1, uint32_t ref, uint32_t mask,
1148                                   uint32_t inv)
1149 {
1150         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
1151         amdgpu_ring_write(ring,
1152                                  /* memory (1) or register (0) */
1153                                  (WAIT_REG_MEM_MEM_SPACE(mem_space) |
1154                                  WAIT_REG_MEM_OPERATION(opt) | /* wait */
1155                                  WAIT_REG_MEM_FUNCTION(3) |  /* equal */
1156                                  WAIT_REG_MEM_ENGINE(eng_sel)));
1157
1158         if (mem_space)
1159                 BUG_ON(addr0 & 0x3); /* Dword align */
1160         amdgpu_ring_write(ring, addr0);
1161         amdgpu_ring_write(ring, addr1);
1162         amdgpu_ring_write(ring, ref);
1163         amdgpu_ring_write(ring, mask);
1164         amdgpu_ring_write(ring, inv); /* poll interval */
1165 }
1166
1167 static int gfx_v9_0_ring_test_ring(struct amdgpu_ring *ring)
1168 {
1169         struct amdgpu_device *adev = ring->adev;
1170         uint32_t scratch = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0);
1171         uint32_t tmp = 0;
1172         unsigned i;
1173         int r;
1174
1175         WREG32(scratch, 0xCAFEDEAD);
1176         r = amdgpu_ring_alloc(ring, 3);
1177         if (r)
1178                 return r;
1179
1180         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
1181         amdgpu_ring_write(ring, scratch - PACKET3_SET_UCONFIG_REG_START);
1182         amdgpu_ring_write(ring, 0xDEADBEEF);
1183         amdgpu_ring_commit(ring);
1184
1185         for (i = 0; i < adev->usec_timeout; i++) {
1186                 tmp = RREG32(scratch);
1187                 if (tmp == 0xDEADBEEF)
1188                         break;
1189                 udelay(1);
1190         }
1191
1192         if (i >= adev->usec_timeout)
1193                 r = -ETIMEDOUT;
1194         return r;
1195 }
1196
1197 static int gfx_v9_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
1198 {
1199         struct amdgpu_device *adev = ring->adev;
1200         struct amdgpu_ib ib;
1201         struct dma_fence *f = NULL;
1202
1203         unsigned index;
1204         uint64_t gpu_addr;
1205         uint32_t tmp;
1206         long r;
1207
1208         r = amdgpu_device_wb_get(adev, &index);
1209         if (r)
1210                 return r;
1211
1212         gpu_addr = adev->wb.gpu_addr + (index * 4);
1213         adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
1214         memset(&ib, 0, sizeof(ib));
1215
1216         r = amdgpu_ib_get(adev, NULL, 20, AMDGPU_IB_POOL_DIRECT, &ib);
1217         if (r)
1218                 goto err1;
1219
1220         ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3);
1221         ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
1222         ib.ptr[2] = lower_32_bits(gpu_addr);
1223         ib.ptr[3] = upper_32_bits(gpu_addr);
1224         ib.ptr[4] = 0xDEADBEEF;
1225         ib.length_dw = 5;
1226
1227         r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
1228         if (r)
1229                 goto err2;
1230
1231         r = dma_fence_wait_timeout(f, false, timeout);
1232         if (r == 0) {
1233                 r = -ETIMEDOUT;
1234                 goto err2;
1235         } else if (r < 0) {
1236                 goto err2;
1237         }
1238
1239         tmp = adev->wb.wb[index];
1240         if (tmp == 0xDEADBEEF)
1241                 r = 0;
1242         else
1243                 r = -EINVAL;
1244
1245 err2:
1246         amdgpu_ib_free(&ib, NULL);
1247         dma_fence_put(f);
1248 err1:
1249         amdgpu_device_wb_free(adev, index);
1250         return r;
1251 }
1252
1253
1254 static void gfx_v9_0_free_microcode(struct amdgpu_device *adev)
1255 {
1256         amdgpu_ucode_release(&adev->gfx.pfp_fw);
1257         amdgpu_ucode_release(&adev->gfx.me_fw);
1258         amdgpu_ucode_release(&adev->gfx.ce_fw);
1259         amdgpu_ucode_release(&adev->gfx.rlc_fw);
1260         amdgpu_ucode_release(&adev->gfx.mec_fw);
1261         amdgpu_ucode_release(&adev->gfx.mec2_fw);
1262
1263         kfree(adev->gfx.rlc.register_list_format);
1264 }
1265
1266 static void gfx_v9_0_check_fw_write_wait(struct amdgpu_device *adev)
1267 {
1268         adev->gfx.me_fw_write_wait = false;
1269         adev->gfx.mec_fw_write_wait = false;
1270
1271         if ((amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 4, 1)) &&
1272             ((adev->gfx.mec_fw_version < 0x000001a5) ||
1273              (adev->gfx.mec_feature_version < 46) ||
1274              (adev->gfx.pfp_fw_version < 0x000000b7) ||
1275              (adev->gfx.pfp_feature_version < 46)))
1276                 DRM_WARN_ONCE("CP firmware version too old, please update!");
1277
1278         switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
1279         case IP_VERSION(9, 0, 1):
1280                 if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1281                     (adev->gfx.me_feature_version >= 42) &&
1282                     (adev->gfx.pfp_fw_version >=  0x000000b1) &&
1283                     (adev->gfx.pfp_feature_version >= 42))
1284                         adev->gfx.me_fw_write_wait = true;
1285
1286                 if ((adev->gfx.mec_fw_version >=  0x00000193) &&
1287                     (adev->gfx.mec_feature_version >= 42))
1288                         adev->gfx.mec_fw_write_wait = true;
1289                 break;
1290         case IP_VERSION(9, 2, 1):
1291                 if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1292                     (adev->gfx.me_feature_version >= 44) &&
1293                     (adev->gfx.pfp_fw_version >=  0x000000b2) &&
1294                     (adev->gfx.pfp_feature_version >= 44))
1295                         adev->gfx.me_fw_write_wait = true;
1296
1297                 if ((adev->gfx.mec_fw_version >=  0x00000196) &&
1298                     (adev->gfx.mec_feature_version >= 44))
1299                         adev->gfx.mec_fw_write_wait = true;
1300                 break;
1301         case IP_VERSION(9, 4, 0):
1302                 if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1303                     (adev->gfx.me_feature_version >= 44) &&
1304                     (adev->gfx.pfp_fw_version >=  0x000000b2) &&
1305                     (adev->gfx.pfp_feature_version >= 44))
1306                         adev->gfx.me_fw_write_wait = true;
1307
1308                 if ((adev->gfx.mec_fw_version >=  0x00000197) &&
1309                     (adev->gfx.mec_feature_version >= 44))
1310                         adev->gfx.mec_fw_write_wait = true;
1311                 break;
1312         case IP_VERSION(9, 1, 0):
1313         case IP_VERSION(9, 2, 2):
1314                 if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1315                     (adev->gfx.me_feature_version >= 42) &&
1316                     (adev->gfx.pfp_fw_version >=  0x000000b1) &&
1317                     (adev->gfx.pfp_feature_version >= 42))
1318                         adev->gfx.me_fw_write_wait = true;
1319
1320                 if ((adev->gfx.mec_fw_version >=  0x00000192) &&
1321                     (adev->gfx.mec_feature_version >= 42))
1322                         adev->gfx.mec_fw_write_wait = true;
1323                 break;
1324         default:
1325                 adev->gfx.me_fw_write_wait = true;
1326                 adev->gfx.mec_fw_write_wait = true;
1327                 break;
1328         }
1329 }
1330
1331 struct amdgpu_gfxoff_quirk {
1332         u16 chip_vendor;
1333         u16 chip_device;
1334         u16 subsys_vendor;
1335         u16 subsys_device;
1336         u8 revision;
1337 };
1338
1339 static const struct amdgpu_gfxoff_quirk amdgpu_gfxoff_quirk_list[] = {
1340         /* https://bugzilla.kernel.org/show_bug.cgi?id=204689 */
1341         { 0x1002, 0x15dd, 0x1002, 0x15dd, 0xc8 },
1342         /* https://bugzilla.kernel.org/show_bug.cgi?id=207171 */
1343         { 0x1002, 0x15dd, 0x103c, 0x83e7, 0xd3 },
1344         /* GFXOFF is unstable on C6 parts with a VBIOS 113-RAVEN-114 */
1345         { 0x1002, 0x15dd, 0x1002, 0x15dd, 0xc6 },
1346         /* Apple MacBook Pro (15-inch, 2019) Radeon Pro Vega 20 4 GB */
1347         { 0x1002, 0x69af, 0x106b, 0x019a, 0xc0 },
1348         /* https://bbs.openkylin.top/t/topic/171497 */
1349         { 0x1002, 0x15d8, 0x19e5, 0x3e14, 0xc2 },
1350         /* HP 705G4 DM with R5 2400G */
1351         { 0x1002, 0x15dd, 0x103c, 0x8464, 0xd6 },
1352         { 0, 0, 0, 0, 0 },
1353 };
1354
1355 static bool gfx_v9_0_should_disable_gfxoff(struct pci_dev *pdev)
1356 {
1357         const struct amdgpu_gfxoff_quirk *p = amdgpu_gfxoff_quirk_list;
1358
1359         while (p && p->chip_device != 0) {
1360                 if (pdev->vendor == p->chip_vendor &&
1361                     pdev->device == p->chip_device &&
1362                     pdev->subsystem_vendor == p->subsys_vendor &&
1363                     pdev->subsystem_device == p->subsys_device &&
1364                     pdev->revision == p->revision) {
1365                         return true;
1366                 }
1367                 ++p;
1368         }
1369         return false;
1370 }
1371
1372 static bool is_raven_kicker(struct amdgpu_device *adev)
1373 {
1374         if (adev->pm.fw_version >= 0x41e2b)
1375                 return true;
1376         else
1377                 return false;
1378 }
1379
1380 static bool check_if_enlarge_doorbell_range(struct amdgpu_device *adev)
1381 {
1382         if ((amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 3, 0)) &&
1383             (adev->gfx.me_fw_version >= 0x000000a5) &&
1384             (adev->gfx.me_feature_version >= 52))
1385                 return true;
1386         else
1387                 return false;
1388 }
1389
1390 static void gfx_v9_0_check_if_need_gfxoff(struct amdgpu_device *adev)
1391 {
1392         if (gfx_v9_0_should_disable_gfxoff(adev->pdev))
1393                 adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
1394
1395         switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
1396         case IP_VERSION(9, 0, 1):
1397         case IP_VERSION(9, 2, 1):
1398         case IP_VERSION(9, 4, 0):
1399                 break;
1400         case IP_VERSION(9, 2, 2):
1401         case IP_VERSION(9, 1, 0):
1402                 if (!((adev->apu_flags & AMD_APU_IS_RAVEN2) ||
1403                       (adev->apu_flags & AMD_APU_IS_PICASSO)) &&
1404                     ((!is_raven_kicker(adev) &&
1405                       adev->gfx.rlc_fw_version < 531) ||
1406                      (adev->gfx.rlc_feature_version < 1) ||
1407                      !adev->gfx.rlc.is_rlc_v2_1))
1408                         adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
1409
1410                 if (adev->pm.pp_feature & PP_GFXOFF_MASK)
1411                         adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG |
1412                                 AMD_PG_SUPPORT_CP |
1413                                 AMD_PG_SUPPORT_RLC_SMU_HS;
1414                 break;
1415         case IP_VERSION(9, 3, 0):
1416                 if (adev->pm.pp_feature & PP_GFXOFF_MASK)
1417                         adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG |
1418                                 AMD_PG_SUPPORT_CP |
1419                                 AMD_PG_SUPPORT_RLC_SMU_HS;
1420                 break;
1421         default:
1422                 break;
1423         }
1424 }
1425
1426 static int gfx_v9_0_init_cp_gfx_microcode(struct amdgpu_device *adev,
1427                                           char *chip_name)
1428 {
1429         int err;
1430
1431         err = amdgpu_ucode_request(adev, &adev->gfx.pfp_fw,
1432                                    AMDGPU_UCODE_REQUIRED,
1433                                    "amdgpu/%s_pfp.bin", chip_name);
1434         if (err)
1435                 goto out;
1436         amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_PFP);
1437
1438         err = amdgpu_ucode_request(adev, &adev->gfx.me_fw,
1439                                    AMDGPU_UCODE_REQUIRED,
1440                                    "amdgpu/%s_me.bin", chip_name);
1441         if (err)
1442                 goto out;
1443         amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_ME);
1444
1445         err = amdgpu_ucode_request(adev, &adev->gfx.ce_fw,
1446                                    AMDGPU_UCODE_REQUIRED,
1447                                    "amdgpu/%s_ce.bin", chip_name);
1448         if (err)
1449                 goto out;
1450         amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_CE);
1451
1452 out:
1453         if (err) {
1454                 amdgpu_ucode_release(&adev->gfx.pfp_fw);
1455                 amdgpu_ucode_release(&adev->gfx.me_fw);
1456                 amdgpu_ucode_release(&adev->gfx.ce_fw);
1457         }
1458         return err;
1459 }
1460
1461 static int gfx_v9_0_init_rlc_microcode(struct amdgpu_device *adev,
1462                                        char *chip_name)
1463 {
1464         int err;
1465         const struct rlc_firmware_header_v2_0 *rlc_hdr;
1466         uint16_t version_major;
1467         uint16_t version_minor;
1468         uint32_t smu_version;
1469
1470         /*
1471          * For Picasso && AM4 SOCKET board, we use picasso_rlc_am4.bin
1472          * instead of picasso_rlc.bin.
1473          * Judgment method:
1474          * PCO AM4: revision >= 0xC8 && revision <= 0xCF
1475          *          or revision >= 0xD8 && revision <= 0xDF
1476          * otherwise is PCO FP5
1477          */
1478         if (!strcmp(chip_name, "picasso") &&
1479                 (((adev->pdev->revision >= 0xC8) && (adev->pdev->revision <= 0xCF)) ||
1480                 ((adev->pdev->revision >= 0xD8) && (adev->pdev->revision <= 0xDF))))
1481                 err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw,
1482                                            AMDGPU_UCODE_REQUIRED,
1483                                            "amdgpu/%s_rlc_am4.bin", chip_name);
1484         else if (!strcmp(chip_name, "raven") && (amdgpu_pm_load_smu_firmware(adev, &smu_version) == 0) &&
1485                 (smu_version >= 0x41e2b))
1486                 /**
1487                 *SMC is loaded by SBIOS on APU and it's able to get the SMU version directly.
1488                 */
1489                 err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw,
1490                                            AMDGPU_UCODE_REQUIRED,
1491                                            "amdgpu/%s_kicker_rlc.bin", chip_name);
1492         else
1493                 err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw,
1494                                            AMDGPU_UCODE_REQUIRED,
1495                                            "amdgpu/%s_rlc.bin", chip_name);
1496         if (err)
1497                 goto out;
1498
1499         rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
1500         version_major = le16_to_cpu(rlc_hdr->header.header_version_major);
1501         version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor);
1502         err = amdgpu_gfx_rlc_init_microcode(adev, version_major, version_minor);
1503 out:
1504         if (err)
1505                 amdgpu_ucode_release(&adev->gfx.rlc_fw);
1506
1507         return err;
1508 }
1509
1510 static bool gfx_v9_0_load_mec2_fw_bin_support(struct amdgpu_device *adev)
1511 {
1512         if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 2) ||
1513             amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 1) ||
1514             amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 3, 0))
1515                 return false;
1516
1517         return true;
1518 }
1519
1520 static int gfx_v9_0_init_cp_compute_microcode(struct amdgpu_device *adev,
1521                                               char *chip_name)
1522 {
1523         int err;
1524
1525         if (amdgpu_sriov_vf(adev) && (adev->asic_type == CHIP_ALDEBARAN))
1526                 err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw,
1527                                    AMDGPU_UCODE_REQUIRED,
1528                                    "amdgpu/%s_sjt_mec.bin", chip_name);
1529         else
1530                 err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw,
1531                                            AMDGPU_UCODE_REQUIRED,
1532                                            "amdgpu/%s_mec.bin", chip_name);
1533         if (err)
1534                 goto out;
1535
1536         amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC1);
1537         amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC1_JT);
1538
1539         if (gfx_v9_0_load_mec2_fw_bin_support(adev)) {
1540                 if (amdgpu_sriov_vf(adev) && (adev->asic_type == CHIP_ALDEBARAN))
1541                         err = amdgpu_ucode_request(adev, &adev->gfx.mec2_fw,
1542                                                    AMDGPU_UCODE_REQUIRED,
1543                                                    "amdgpu/%s_sjt_mec2.bin", chip_name);
1544                 else
1545                         err = amdgpu_ucode_request(adev, &adev->gfx.mec2_fw,
1546                                                    AMDGPU_UCODE_REQUIRED,
1547                                                    "amdgpu/%s_mec2.bin", chip_name);
1548                 if (!err) {
1549                         amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC2);
1550                         amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC2_JT);
1551                 } else {
1552                         err = 0;
1553                         amdgpu_ucode_release(&adev->gfx.mec2_fw);
1554                 }
1555         } else {
1556                 adev->gfx.mec2_fw_version = adev->gfx.mec_fw_version;
1557                 adev->gfx.mec2_feature_version = adev->gfx.mec_feature_version;
1558         }
1559
1560         gfx_v9_0_check_if_need_gfxoff(adev);
1561         gfx_v9_0_check_fw_write_wait(adev);
1562
1563 out:
1564         if (err)
1565                 amdgpu_ucode_release(&adev->gfx.mec_fw);
1566         return err;
1567 }
1568
1569 static int gfx_v9_0_init_microcode(struct amdgpu_device *adev)
1570 {
1571         char ucode_prefix[30];
1572         int r;
1573
1574         DRM_DEBUG("\n");
1575         amdgpu_ucode_ip_version_decode(adev, GC_HWIP, ucode_prefix, sizeof(ucode_prefix));
1576
1577         /* No CPG in Arcturus */
1578         if (adev->gfx.num_gfx_rings) {
1579                 r = gfx_v9_0_init_cp_gfx_microcode(adev, ucode_prefix);
1580                 if (r)
1581                         return r;
1582         }
1583
1584         r = gfx_v9_0_init_rlc_microcode(adev, ucode_prefix);
1585         if (r)
1586                 return r;
1587
1588         r = gfx_v9_0_init_cp_compute_microcode(adev, ucode_prefix);
1589         if (r)
1590                 return r;
1591
1592         return r;
1593 }
1594
1595 static u32 gfx_v9_0_get_csb_size(struct amdgpu_device *adev)
1596 {
1597         u32 count = 0;
1598         const struct cs_section_def *sect = NULL;
1599         const struct cs_extent_def *ext = NULL;
1600
1601         /* begin clear state */
1602         count += 2;
1603         /* context control state */
1604         count += 3;
1605
1606         for (sect = gfx9_cs_data; sect->section != NULL; ++sect) {
1607                 for (ext = sect->section; ext->extent != NULL; ++ext) {
1608                         if (sect->id == SECT_CONTEXT)
1609                                 count += 2 + ext->reg_count;
1610                         else
1611                                 return 0;
1612                 }
1613         }
1614
1615         /* end clear state */
1616         count += 2;
1617         /* clear state */
1618         count += 2;
1619
1620         return count;
1621 }
1622
1623 static void gfx_v9_0_get_csb_buffer(struct amdgpu_device *adev,
1624                                     volatile u32 *buffer)
1625 {
1626         u32 count = 0, i;
1627         const struct cs_section_def *sect = NULL;
1628         const struct cs_extent_def *ext = NULL;
1629
1630         if (adev->gfx.rlc.cs_data == NULL)
1631                 return;
1632         if (buffer == NULL)
1633                 return;
1634
1635         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1636         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1637
1638         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
1639         buffer[count++] = cpu_to_le32(0x80000000);
1640         buffer[count++] = cpu_to_le32(0x80000000);
1641
1642         for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
1643                 for (ext = sect->section; ext->extent != NULL; ++ext) {
1644                         if (sect->id == SECT_CONTEXT) {
1645                                 buffer[count++] =
1646                                         cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
1647                                 buffer[count++] = cpu_to_le32(ext->reg_index -
1648                                                 PACKET3_SET_CONTEXT_REG_START);
1649                                 for (i = 0; i < ext->reg_count; i++)
1650                                         buffer[count++] = cpu_to_le32(ext->extent[i]);
1651                         } else {
1652                                 return;
1653                         }
1654                 }
1655         }
1656
1657         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1658         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
1659
1660         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
1661         buffer[count++] = cpu_to_le32(0);
1662 }
1663
1664 static void gfx_v9_0_init_always_on_cu_mask(struct amdgpu_device *adev)
1665 {
1666         struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
1667         uint32_t pg_always_on_cu_num = 2;
1668         uint32_t always_on_cu_num;
1669         uint32_t i, j, k;
1670         uint32_t mask, cu_bitmap, counter;
1671
1672         if (adev->flags & AMD_IS_APU)
1673                 always_on_cu_num = 4;
1674         else if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 2, 1))
1675                 always_on_cu_num = 8;
1676         else
1677                 always_on_cu_num = 12;
1678
1679         mutex_lock(&adev->grbm_idx_mutex);
1680         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
1681                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
1682                         mask = 1;
1683                         cu_bitmap = 0;
1684                         counter = 0;
1685                         amdgpu_gfx_select_se_sh(adev, i, j, 0xffffffff, 0);
1686
1687                         for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
1688                                 if (cu_info->bitmap[0][i][j] & mask) {
1689                                         if (counter == pg_always_on_cu_num)
1690                                                 WREG32_SOC15(GC, 0, mmRLC_PG_ALWAYS_ON_CU_MASK, cu_bitmap);
1691                                         if (counter < always_on_cu_num)
1692                                                 cu_bitmap |= mask;
1693                                         else
1694                                                 break;
1695                                         counter++;
1696                                 }
1697                                 mask <<= 1;
1698                         }
1699
1700                         WREG32_SOC15(GC, 0, mmRLC_LB_ALWAYS_ACTIVE_CU_MASK, cu_bitmap);
1701                         cu_info->ao_cu_bitmap[i][j] = cu_bitmap;
1702                 }
1703         }
1704         amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
1705         mutex_unlock(&adev->grbm_idx_mutex);
1706 }
1707
1708 static void gfx_v9_0_init_lbpw(struct amdgpu_device *adev)
1709 {
1710         uint32_t data;
1711
1712         /* set mmRLC_LB_THR_CONFIG_1/2/3/4 */
1713         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F);
1714         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x0333A5A7);
1715         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077);
1716         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x30 | 0x40 << 8 | 0x02FA << 16));
1717
1718         /* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */
1719         WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000);
1720
1721         /* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */
1722         WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000500);
1723
1724         mutex_lock(&adev->grbm_idx_mutex);
1725         /* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/
1726         amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
1727         WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff);
1728
1729         /* set mmRLC_LB_PARAMS = 0x003F_1006 */
1730         data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003);
1731         data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010);
1732         data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F);
1733         WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data);
1734
1735         /* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */
1736         data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7);
1737         data &= 0x0000FFFF;
1738         data |= 0x00C00000;
1739         WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data);
1740
1741         /*
1742          * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xF (4 CUs AON for Raven),
1743          * programmed in gfx_v9_0_init_always_on_cu_mask()
1744          */
1745
1746         /* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved,
1747          * but used for RLC_LB_CNTL configuration */
1748         data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK;
1749         data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09);
1750         data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000);
1751         WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data);
1752         mutex_unlock(&adev->grbm_idx_mutex);
1753
1754         gfx_v9_0_init_always_on_cu_mask(adev);
1755 }
1756
1757 static void gfx_v9_4_init_lbpw(struct amdgpu_device *adev)
1758 {
1759         uint32_t data;
1760
1761         /* set mmRLC_LB_THR_CONFIG_1/2/3/4 */
1762         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F);
1763         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x033388F8);
1764         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077);
1765         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x10 | 0x27 << 8 | 0x02FA << 16));
1766
1767         /* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */
1768         WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000);
1769
1770         /* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */
1771         WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000800);
1772
1773         mutex_lock(&adev->grbm_idx_mutex);
1774         /* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/
1775         amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
1776         WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff);
1777
1778         /* set mmRLC_LB_PARAMS = 0x003F_1006 */
1779         data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003);
1780         data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010);
1781         data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F);
1782         WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data);
1783
1784         /* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */
1785         data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7);
1786         data &= 0x0000FFFF;
1787         data |= 0x00C00000;
1788         WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data);
1789
1790         /*
1791          * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xFFF (12 CUs AON),
1792          * programmed in gfx_v9_0_init_always_on_cu_mask()
1793          */
1794
1795         /* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved,
1796          * but used for RLC_LB_CNTL configuration */
1797         data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK;
1798         data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09);
1799         data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000);
1800         WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data);
1801         mutex_unlock(&adev->grbm_idx_mutex);
1802
1803         gfx_v9_0_init_always_on_cu_mask(adev);
1804 }
1805
1806 static void gfx_v9_0_enable_lbpw(struct amdgpu_device *adev, bool enable)
1807 {
1808         WREG32_FIELD15(GC, 0, RLC_LB_CNTL, LOAD_BALANCE_ENABLE, enable ? 1 : 0);
1809 }
1810
1811 static int gfx_v9_0_cp_jump_table_num(struct amdgpu_device *adev)
1812 {
1813         if (gfx_v9_0_load_mec2_fw_bin_support(adev))
1814                 return 5;
1815         else
1816                 return 4;
1817 }
1818
1819 static void gfx_v9_0_init_rlcg_reg_access_ctrl(struct amdgpu_device *adev)
1820 {
1821         struct amdgpu_rlcg_reg_access_ctrl *reg_access_ctrl;
1822
1823         reg_access_ctrl = &adev->gfx.rlc.reg_access_ctrl[0];
1824         reg_access_ctrl->scratch_reg0 = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0);
1825         reg_access_ctrl->scratch_reg1 = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG1);
1826         reg_access_ctrl->scratch_reg2 = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG2);
1827         reg_access_ctrl->scratch_reg3 = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG3);
1828         reg_access_ctrl->grbm_cntl = SOC15_REG_OFFSET(GC, 0, mmGRBM_GFX_CNTL);
1829         reg_access_ctrl->grbm_idx = SOC15_REG_OFFSET(GC, 0, mmGRBM_GFX_INDEX);
1830         reg_access_ctrl->spare_int = SOC15_REG_OFFSET(GC, 0, mmRLC_SPARE_INT);
1831         adev->gfx.rlc.rlcg_reg_access_supported = true;
1832 }
1833
1834 static int gfx_v9_0_rlc_init(struct amdgpu_device *adev)
1835 {
1836         const struct cs_section_def *cs_data;
1837         int r;
1838
1839         adev->gfx.rlc.cs_data = gfx9_cs_data;
1840
1841         cs_data = adev->gfx.rlc.cs_data;
1842
1843         if (cs_data) {
1844                 /* init clear state block */
1845                 r = amdgpu_gfx_rlc_init_csb(adev);
1846                 if (r)
1847                         return r;
1848         }
1849
1850         if (adev->flags & AMD_IS_APU) {
1851                 /* TODO: double check the cp_table_size for RV */
1852                 adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
1853                 r = amdgpu_gfx_rlc_init_cpt(adev);
1854                 if (r)
1855                         return r;
1856         }
1857
1858         return 0;
1859 }
1860
1861 static void gfx_v9_0_mec_fini(struct amdgpu_device *adev)
1862 {
1863         amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
1864         amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_obj, NULL, NULL);
1865 }
1866
1867 static int gfx_v9_0_mec_init(struct amdgpu_device *adev)
1868 {
1869         int r;
1870         u32 *hpd;
1871         const __le32 *fw_data;
1872         unsigned fw_size;
1873         u32 *fw;
1874         size_t mec_hpd_size;
1875
1876         const struct gfx_firmware_header_v1_0 *mec_hdr;
1877
1878         bitmap_zero(adev->gfx.mec_bitmap[0].queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
1879
1880         /* take ownership of the relevant compute queues */
1881         amdgpu_gfx_compute_queue_acquire(adev);
1882         mec_hpd_size = adev->gfx.num_compute_rings * GFX9_MEC_HPD_SIZE;
1883         if (mec_hpd_size) {
1884                 r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
1885                                               AMDGPU_GEM_DOMAIN_VRAM |
1886                                               AMDGPU_GEM_DOMAIN_GTT,
1887                                               &adev->gfx.mec.hpd_eop_obj,
1888                                               &adev->gfx.mec.hpd_eop_gpu_addr,
1889                                               (void **)&hpd);
1890                 if (r) {
1891                         dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
1892                         gfx_v9_0_mec_fini(adev);
1893                         return r;
1894                 }
1895
1896                 memset(hpd, 0, mec_hpd_size);
1897
1898                 amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
1899                 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1900         }
1901
1902         mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1903
1904         fw_data = (const __le32 *)
1905                 (adev->gfx.mec_fw->data +
1906                  le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
1907         fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes);
1908
1909         r = amdgpu_bo_create_reserved(adev, mec_hdr->header.ucode_size_bytes,
1910                                       PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
1911                                       &adev->gfx.mec.mec_fw_obj,
1912                                       &adev->gfx.mec.mec_fw_gpu_addr,
1913                                       (void **)&fw);
1914         if (r) {
1915                 dev_warn(adev->dev, "(%d) create mec firmware bo failed\n", r);
1916                 gfx_v9_0_mec_fini(adev);
1917                 return r;
1918         }
1919
1920         memcpy(fw, fw_data, fw_size);
1921
1922         amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj);
1923         amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj);
1924
1925         return 0;
1926 }
1927
1928 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
1929 {
1930         WREG32_SOC15_RLC(GC, 0, mmSQ_IND_INDEX,
1931                 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
1932                 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
1933                 (address << SQ_IND_INDEX__INDEX__SHIFT) |
1934                 (SQ_IND_INDEX__FORCE_READ_MASK));
1935         return RREG32_SOC15(GC, 0, mmSQ_IND_DATA);
1936 }
1937
1938 static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
1939                            uint32_t wave, uint32_t thread,
1940                            uint32_t regno, uint32_t num, uint32_t *out)
1941 {
1942         WREG32_SOC15_RLC(GC, 0, mmSQ_IND_INDEX,
1943                 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
1944                 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
1945                 (regno << SQ_IND_INDEX__INDEX__SHIFT) |
1946                 (thread << SQ_IND_INDEX__THREAD_ID__SHIFT) |
1947                 (SQ_IND_INDEX__FORCE_READ_MASK) |
1948                 (SQ_IND_INDEX__AUTO_INCR_MASK));
1949         while (num--)
1950                 *(out++) = RREG32_SOC15(GC, 0, mmSQ_IND_DATA);
1951 }
1952
1953 static void gfx_v9_0_read_wave_data(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
1954 {
1955         /* type 1 wave data */
1956         dst[(*no_fields)++] = 1;
1957         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS);
1958         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO);
1959         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI);
1960         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO);
1961         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI);
1962         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID);
1963         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0);
1964         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1);
1965         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC);
1966         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC);
1967         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS);
1968         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS);
1969         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0);
1970         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0);
1971         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_MODE);
1972 }
1973
1974 static void gfx_v9_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd,
1975                                      uint32_t wave, uint32_t start,
1976                                      uint32_t size, uint32_t *dst)
1977 {
1978         wave_read_regs(
1979                 adev, simd, wave, 0,
1980                 start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
1981 }
1982
1983 static void gfx_v9_0_read_wave_vgprs(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd,
1984                                      uint32_t wave, uint32_t thread,
1985                                      uint32_t start, uint32_t size,
1986                                      uint32_t *dst)
1987 {
1988         wave_read_regs(
1989                 adev, simd, wave, thread,
1990                 start + SQIND_WAVE_VGPRS_OFFSET, size, dst);
1991 }
1992
1993 static void gfx_v9_0_select_me_pipe_q(struct amdgpu_device *adev,
1994                                   u32 me, u32 pipe, u32 q, u32 vm, u32 xcc_id)
1995 {
1996         soc15_grbm_select(adev, me, pipe, q, vm, 0);
1997 }
1998
1999 static const struct amdgpu_gfx_funcs gfx_v9_0_gfx_funcs = {
2000         .get_gpu_clock_counter = &gfx_v9_0_get_gpu_clock_counter,
2001         .select_se_sh = &gfx_v9_0_select_se_sh,
2002         .read_wave_data = &gfx_v9_0_read_wave_data,
2003         .read_wave_sgprs = &gfx_v9_0_read_wave_sgprs,
2004         .read_wave_vgprs = &gfx_v9_0_read_wave_vgprs,
2005         .select_me_pipe_q = &gfx_v9_0_select_me_pipe_q,
2006 };
2007
2008 const struct amdgpu_ras_block_hw_ops  gfx_v9_0_ras_ops = {
2009                 .ras_error_inject = &gfx_v9_0_ras_error_inject,
2010                 .query_ras_error_count = &gfx_v9_0_query_ras_error_count,
2011                 .reset_ras_error_count = &gfx_v9_0_reset_ras_error_count,
2012 };
2013
2014 static struct amdgpu_gfx_ras gfx_v9_0_ras = {
2015         .ras_block = {
2016                 .hw_ops = &gfx_v9_0_ras_ops,
2017         },
2018 };
2019
2020 static int gfx_v9_0_gpu_early_init(struct amdgpu_device *adev)
2021 {
2022         u32 gb_addr_config;
2023         int err;
2024
2025         switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
2026         case IP_VERSION(9, 0, 1):
2027                 adev->gfx.config.max_hw_contexts = 8;
2028                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2029                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2030                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2031                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2032                 gb_addr_config = VEGA10_GB_ADDR_CONFIG_GOLDEN;
2033                 break;
2034         case IP_VERSION(9, 2, 1):
2035                 adev->gfx.config.max_hw_contexts = 8;
2036                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2037                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2038                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2039                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2040                 gb_addr_config = VEGA12_GB_ADDR_CONFIG_GOLDEN;
2041                 DRM_INFO("fix gfx.config for vega12\n");
2042                 break;
2043         case IP_VERSION(9, 4, 0):
2044                 adev->gfx.ras = &gfx_v9_0_ras;
2045                 adev->gfx.config.max_hw_contexts = 8;
2046                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2047                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2048                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2049                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2050                 gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
2051                 gb_addr_config &= ~0xf3e777ff;
2052                 gb_addr_config |= 0x22014042;
2053                 /* check vbios table if gpu info is not available */
2054                 err = amdgpu_atomfirmware_get_gfx_info(adev);
2055                 if (err)
2056                         return err;
2057                 break;
2058         case IP_VERSION(9, 2, 2):
2059         case IP_VERSION(9, 1, 0):
2060                 adev->gfx.config.max_hw_contexts = 8;
2061                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2062                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2063                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2064                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2065                 if (adev->apu_flags & AMD_APU_IS_RAVEN2)
2066                         gb_addr_config = RAVEN2_GB_ADDR_CONFIG_GOLDEN;
2067                 else
2068                         gb_addr_config = RAVEN_GB_ADDR_CONFIG_GOLDEN;
2069                 break;
2070         case IP_VERSION(9, 4, 1):
2071                 adev->gfx.ras = &gfx_v9_4_ras;
2072                 adev->gfx.config.max_hw_contexts = 8;
2073                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2074                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2075                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2076                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2077                 gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
2078                 gb_addr_config &= ~0xf3e777ff;
2079                 gb_addr_config |= 0x22014042;
2080                 break;
2081         case IP_VERSION(9, 3, 0):
2082                 adev->gfx.config.max_hw_contexts = 8;
2083                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2084                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2085                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x80;
2086                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2087                 gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
2088                 gb_addr_config &= ~0xf3e777ff;
2089                 gb_addr_config |= 0x22010042;
2090                 break;
2091         case IP_VERSION(9, 4, 2):
2092                 adev->gfx.ras = &gfx_v9_4_2_ras;
2093                 adev->gfx.config.max_hw_contexts = 8;
2094                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2095                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2096                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2097                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2098                 gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
2099                 gb_addr_config &= ~0xf3e777ff;
2100                 gb_addr_config |= 0x22014042;
2101                 /* check vbios table if gpu info is not available */
2102                 err = amdgpu_atomfirmware_get_gfx_info(adev);
2103                 if (err)
2104                         return err;
2105                 break;
2106         default:
2107                 BUG();
2108                 break;
2109         }
2110
2111         adev->gfx.config.gb_addr_config = gb_addr_config;
2112
2113         adev->gfx.config.gb_addr_config_fields.num_pipes = 1 <<
2114                         REG_GET_FIELD(
2115                                         adev->gfx.config.gb_addr_config,
2116                                         GB_ADDR_CONFIG,
2117                                         NUM_PIPES);
2118
2119         adev->gfx.config.max_tile_pipes =
2120                 adev->gfx.config.gb_addr_config_fields.num_pipes;
2121
2122         adev->gfx.config.gb_addr_config_fields.num_banks = 1 <<
2123                         REG_GET_FIELD(
2124                                         adev->gfx.config.gb_addr_config,
2125                                         GB_ADDR_CONFIG,
2126                                         NUM_BANKS);
2127         adev->gfx.config.gb_addr_config_fields.max_compress_frags = 1 <<
2128                         REG_GET_FIELD(
2129                                         adev->gfx.config.gb_addr_config,
2130                                         GB_ADDR_CONFIG,
2131                                         MAX_COMPRESSED_FRAGS);
2132         adev->gfx.config.gb_addr_config_fields.num_rb_per_se = 1 <<
2133                         REG_GET_FIELD(
2134                                         adev->gfx.config.gb_addr_config,
2135                                         GB_ADDR_CONFIG,
2136                                         NUM_RB_PER_SE);
2137         adev->gfx.config.gb_addr_config_fields.num_se = 1 <<
2138                         REG_GET_FIELD(
2139                                         adev->gfx.config.gb_addr_config,
2140                                         GB_ADDR_CONFIG,
2141                                         NUM_SHADER_ENGINES);
2142         adev->gfx.config.gb_addr_config_fields.pipe_interleave_size = 1 << (8 +
2143                         REG_GET_FIELD(
2144                                         adev->gfx.config.gb_addr_config,
2145                                         GB_ADDR_CONFIG,
2146                                         PIPE_INTERLEAVE_SIZE));
2147
2148         return 0;
2149 }
2150
2151 static int gfx_v9_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
2152                                       int mec, int pipe, int queue)
2153 {
2154         unsigned irq_type;
2155         struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
2156         unsigned int hw_prio;
2157
2158         ring = &adev->gfx.compute_ring[ring_id];
2159
2160         /* mec0 is me1 */
2161         ring->me = mec + 1;
2162         ring->pipe = pipe;
2163         ring->queue = queue;
2164
2165         ring->ring_obj = NULL;
2166         ring->use_doorbell = true;
2167         ring->doorbell_index = (adev->doorbell_index.mec_ring0 + ring_id) << 1;
2168         ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
2169                                 + (ring_id * GFX9_MEC_HPD_SIZE);
2170         ring->vm_hub = AMDGPU_GFXHUB(0);
2171         sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
2172
2173         irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
2174                 + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
2175                 + ring->pipe;
2176         hw_prio = amdgpu_gfx_is_high_priority_compute_queue(adev, ring) ?
2177                         AMDGPU_RING_PRIO_2 : AMDGPU_RING_PRIO_DEFAULT;
2178         /* type-2 packets are deprecated on MEC, use type-3 instead */
2179         return amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, irq_type,
2180                                 hw_prio, NULL);
2181 }
2182
2183 static void gfx_v9_0_alloc_ip_dump(struct amdgpu_device *adev)
2184 {
2185         uint32_t reg_count = ARRAY_SIZE(gc_reg_list_9);
2186         uint32_t *ptr;
2187         uint32_t inst;
2188
2189         ptr = kcalloc(reg_count, sizeof(uint32_t), GFP_KERNEL);
2190         if (!ptr) {
2191                 DRM_ERROR("Failed to allocate memory for GFX IP Dump\n");
2192                 adev->gfx.ip_dump_core = NULL;
2193         } else {
2194                 adev->gfx.ip_dump_core = ptr;
2195         }
2196
2197         /* Allocate memory for compute queue registers for all the instances */
2198         reg_count = ARRAY_SIZE(gc_cp_reg_list_9);
2199         inst = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe_per_mec *
2200                 adev->gfx.mec.num_queue_per_pipe;
2201
2202         ptr = kcalloc(reg_count * inst, sizeof(uint32_t), GFP_KERNEL);
2203         if (!ptr) {
2204                 DRM_ERROR("Failed to allocate memory for Compute Queues IP Dump\n");
2205                 adev->gfx.ip_dump_compute_queues = NULL;
2206         } else {
2207                 adev->gfx.ip_dump_compute_queues = ptr;
2208         }
2209 }
2210
2211 static int gfx_v9_0_sw_init(struct amdgpu_ip_block *ip_block)
2212 {
2213         int i, j, k, r, ring_id;
2214         int xcc_id = 0;
2215         struct amdgpu_ring *ring;
2216         struct amdgpu_device *adev = ip_block->adev;
2217         unsigned int hw_prio;
2218
2219         switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
2220         case IP_VERSION(9, 0, 1):
2221         case IP_VERSION(9, 2, 1):
2222         case IP_VERSION(9, 4, 0):
2223         case IP_VERSION(9, 2, 2):
2224         case IP_VERSION(9, 1, 0):
2225         case IP_VERSION(9, 4, 1):
2226         case IP_VERSION(9, 3, 0):
2227         case IP_VERSION(9, 4, 2):
2228                 adev->gfx.mec.num_mec = 2;
2229                 break;
2230         default:
2231                 adev->gfx.mec.num_mec = 1;
2232                 break;
2233         }
2234
2235         switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
2236         case IP_VERSION(9, 4, 2):
2237                 adev->gfx.cleaner_shader_ptr = gfx_9_4_2_cleaner_shader_hex;
2238                 adev->gfx.cleaner_shader_size = sizeof(gfx_9_4_2_cleaner_shader_hex);
2239                 if (adev->gfx.mec_fw_version >= 88) {
2240                         adev->gfx.enable_cleaner_shader = true;
2241                         r = amdgpu_gfx_cleaner_shader_sw_init(adev, adev->gfx.cleaner_shader_size);
2242                         if (r) {
2243                                 adev->gfx.enable_cleaner_shader = false;
2244                                 dev_err(adev->dev, "Failed to initialize cleaner shader\n");
2245                         }
2246                 }
2247                 break;
2248         default:
2249                 adev->gfx.enable_cleaner_shader = false;
2250                 break;
2251         }
2252
2253         adev->gfx.mec.num_pipe_per_mec = 4;
2254         adev->gfx.mec.num_queue_per_pipe = 8;
2255
2256         /* EOP Event */
2257         r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_EOP_INTERRUPT, &adev->gfx.eop_irq);
2258         if (r)
2259                 return r;
2260
2261         /* Bad opcode Event */
2262         r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP,
2263                               GFX_9_0__SRCID__CP_BAD_OPCODE_ERROR,
2264                               &adev->gfx.bad_op_irq);
2265         if (r)
2266                 return r;
2267
2268         /* Privileged reg */
2269         r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_REG_FAULT,
2270                               &adev->gfx.priv_reg_irq);
2271         if (r)
2272                 return r;
2273
2274         /* Privileged inst */
2275         r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_INSTR_FAULT,
2276                               &adev->gfx.priv_inst_irq);
2277         if (r)
2278                 return r;
2279
2280         /* ECC error */
2281         r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_ECC_ERROR,
2282                               &adev->gfx.cp_ecc_error_irq);
2283         if (r)
2284                 return r;
2285
2286         /* FUE error */
2287         r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_FUE_ERROR,
2288                               &adev->gfx.cp_ecc_error_irq);
2289         if (r)
2290                 return r;
2291
2292         adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
2293
2294         if (adev->gfx.rlc.funcs) {
2295                 if (adev->gfx.rlc.funcs->init) {
2296                         r = adev->gfx.rlc.funcs->init(adev);
2297                         if (r) {
2298                                 dev_err(adev->dev, "Failed to init rlc BOs!\n");
2299                                 return r;
2300                         }
2301                 }
2302         }
2303
2304         r = gfx_v9_0_mec_init(adev);
2305         if (r) {
2306                 DRM_ERROR("Failed to init MEC BOs!\n");
2307                 return r;
2308         }
2309
2310         /* set up the gfx ring */
2311         for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
2312                 ring = &adev->gfx.gfx_ring[i];
2313                 ring->ring_obj = NULL;
2314                 if (!i)
2315                         sprintf(ring->name, "gfx");
2316                 else
2317                         sprintf(ring->name, "gfx_%d", i);
2318                 ring->use_doorbell = true;
2319                 ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1;
2320
2321                 /* disable scheduler on the real ring */
2322                 ring->no_scheduler = adev->gfx.mcbp;
2323                 ring->vm_hub = AMDGPU_GFXHUB(0);
2324                 r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
2325                                      AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP,
2326                                      AMDGPU_RING_PRIO_DEFAULT, NULL);
2327                 if (r)
2328                         return r;
2329         }
2330
2331         /* set up the software rings */
2332         if (adev->gfx.mcbp && adev->gfx.num_gfx_rings) {
2333                 for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++) {
2334                         ring = &adev->gfx.sw_gfx_ring[i];
2335                         ring->ring_obj = NULL;
2336                         sprintf(ring->name, amdgpu_sw_ring_name(i));
2337                         ring->use_doorbell = true;
2338                         ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1;
2339                         ring->is_sw_ring = true;
2340                         hw_prio = amdgpu_sw_ring_priority(i);
2341                         ring->vm_hub = AMDGPU_GFXHUB(0);
2342                         r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
2343                                              AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP, hw_prio,
2344                                              NULL);
2345                         if (r)
2346                                 return r;
2347                         ring->wptr = 0;
2348                 }
2349
2350                 /* init the muxer and add software rings */
2351                 r = amdgpu_ring_mux_init(&adev->gfx.muxer, &adev->gfx.gfx_ring[0],
2352                                          GFX9_NUM_SW_GFX_RINGS);
2353                 if (r) {
2354                         DRM_ERROR("amdgpu_ring_mux_init failed(%d)\n", r);
2355                         return r;
2356                 }
2357                 for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++) {
2358                         r = amdgpu_ring_mux_add_sw_ring(&adev->gfx.muxer,
2359                                                         &adev->gfx.sw_gfx_ring[i]);
2360                         if (r) {
2361                                 DRM_ERROR("amdgpu_ring_mux_add_sw_ring failed(%d)\n", r);
2362                                 return r;
2363                         }
2364                 }
2365         }
2366
2367         /* set up the compute queues - allocate horizontally across pipes */
2368         ring_id = 0;
2369         for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
2370                 for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
2371                         for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
2372                                 if (!amdgpu_gfx_is_mec_queue_enabled(adev, 0, i,
2373                                                                      k, j))
2374                                         continue;
2375
2376                                 r = gfx_v9_0_compute_ring_init(adev,
2377                                                                ring_id,
2378                                                                i, k, j);
2379                                 if (r)
2380                                         return r;
2381
2382                                 ring_id++;
2383                         }
2384                 }
2385         }
2386
2387         /* TODO: Add queue reset mask when FW fully supports it */
2388         adev->gfx.gfx_supported_reset =
2389                 amdgpu_get_soft_full_reset_mask(&adev->gfx.gfx_ring[0]);
2390         adev->gfx.compute_supported_reset =
2391                 amdgpu_get_soft_full_reset_mask(&adev->gfx.compute_ring[0]);
2392
2393         r = amdgpu_gfx_kiq_init(adev, GFX9_MEC_HPD_SIZE, 0);
2394         if (r) {
2395                 DRM_ERROR("Failed to init KIQ BOs!\n");
2396                 return r;
2397         }
2398
2399         r = amdgpu_gfx_kiq_init_ring(adev, xcc_id);
2400         if (r)
2401                 return r;
2402
2403         /* create MQD for all compute queues as wel as KIQ for SRIOV case */
2404         r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct v9_mqd_allocation), 0);
2405         if (r)
2406                 return r;
2407
2408         adev->gfx.ce_ram_size = 0x8000;
2409
2410         r = gfx_v9_0_gpu_early_init(adev);
2411         if (r)
2412                 return r;
2413
2414         if (amdgpu_gfx_ras_sw_init(adev)) {
2415                 dev_err(adev->dev, "Failed to initialize gfx ras block!\n");
2416                 return -EINVAL;
2417         }
2418
2419         gfx_v9_0_alloc_ip_dump(adev);
2420
2421         r = amdgpu_gfx_sysfs_init(adev);
2422         if (r)
2423                 return r;
2424
2425         return 0;
2426 }
2427
2428
2429 static int gfx_v9_0_sw_fini(struct amdgpu_ip_block *ip_block)
2430 {
2431         int i;
2432         struct amdgpu_device *adev = ip_block->adev;
2433
2434         if (adev->gfx.mcbp && adev->gfx.num_gfx_rings) {
2435                 for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++)
2436                         amdgpu_ring_fini(&adev->gfx.sw_gfx_ring[i]);
2437                 amdgpu_ring_mux_fini(&adev->gfx.muxer);
2438         }
2439
2440         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2441                 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
2442         for (i = 0; i < adev->gfx.num_compute_rings; i++)
2443                 amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
2444
2445         amdgpu_gfx_mqd_sw_fini(adev, 0);
2446         amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq[0].ring);
2447         amdgpu_gfx_kiq_fini(adev, 0);
2448
2449         amdgpu_gfx_cleaner_shader_sw_fini(adev);
2450
2451         gfx_v9_0_mec_fini(adev);
2452         amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj,
2453                                 &adev->gfx.rlc.clear_state_gpu_addr,
2454                                 (void **)&adev->gfx.rlc.cs_ptr);
2455         if (adev->flags & AMD_IS_APU) {
2456                 amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj,
2457                                 &adev->gfx.rlc.cp_table_gpu_addr,
2458                                 (void **)&adev->gfx.rlc.cp_table_ptr);
2459         }
2460         gfx_v9_0_free_microcode(adev);
2461
2462         amdgpu_gfx_sysfs_fini(adev);
2463
2464         kfree(adev->gfx.ip_dump_core);
2465         kfree(adev->gfx.ip_dump_compute_queues);
2466
2467         return 0;
2468 }
2469
2470
2471 static void gfx_v9_0_tiling_mode_table_init(struct amdgpu_device *adev)
2472 {
2473         /* TODO */
2474 }
2475
2476 void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num,
2477                            u32 instance, int xcc_id)
2478 {
2479         u32 data;
2480
2481         if (instance == 0xffffffff)
2482                 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
2483         else
2484                 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
2485
2486         if (se_num == 0xffffffff)
2487                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
2488         else
2489                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
2490
2491         if (sh_num == 0xffffffff)
2492                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
2493         else
2494                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
2495
2496         WREG32_SOC15_RLC_SHADOW(GC, 0, mmGRBM_GFX_INDEX, data);
2497 }
2498
2499 static u32 gfx_v9_0_get_rb_active_bitmap(struct amdgpu_device *adev)
2500 {
2501         u32 data, mask;
2502
2503         data = RREG32_SOC15(GC, 0, mmCC_RB_BACKEND_DISABLE);
2504         data |= RREG32_SOC15(GC, 0, mmGC_USER_RB_BACKEND_DISABLE);
2505
2506         data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK;
2507         data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT;
2508
2509         mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se /
2510                                          adev->gfx.config.max_sh_per_se);
2511
2512         return (~data) & mask;
2513 }
2514
2515 static void gfx_v9_0_setup_rb(struct amdgpu_device *adev)
2516 {
2517         int i, j;
2518         u32 data;
2519         u32 active_rbs = 0;
2520         u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
2521                                         adev->gfx.config.max_sh_per_se;
2522
2523         mutex_lock(&adev->grbm_idx_mutex);
2524         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
2525                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
2526                         amdgpu_gfx_select_se_sh(adev, i, j, 0xffffffff, 0);
2527                         data = gfx_v9_0_get_rb_active_bitmap(adev);
2528                         active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
2529                                                rb_bitmap_width_per_sh);
2530                 }
2531         }
2532         amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
2533         mutex_unlock(&adev->grbm_idx_mutex);
2534
2535         adev->gfx.config.backend_enable_mask = active_rbs;
2536         adev->gfx.config.num_rbs = hweight32(active_rbs);
2537 }
2538
2539 static void gfx_v9_0_debug_trap_config_init(struct amdgpu_device *adev,
2540                                 uint32_t first_vmid,
2541                                 uint32_t last_vmid)
2542 {
2543         uint32_t data;
2544         uint32_t trap_config_vmid_mask = 0;
2545         int i;
2546
2547         /* Calculate trap config vmid mask */
2548         for (i = first_vmid; i < last_vmid; i++)
2549                 trap_config_vmid_mask |= (1 << i);
2550
2551         data = REG_SET_FIELD(0, SPI_GDBG_TRAP_CONFIG,
2552                         VMID_SEL, trap_config_vmid_mask);
2553         data = REG_SET_FIELD(data, SPI_GDBG_TRAP_CONFIG,
2554                         TRAP_EN, 1);
2555         WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_CONFIG), data);
2556         WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_MASK), 0);
2557
2558         WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_DATA0), 0);
2559         WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_DATA1), 0);
2560 }
2561
2562 #define DEFAULT_SH_MEM_BASES    (0x6000)
2563 static void gfx_v9_0_init_compute_vmid(struct amdgpu_device *adev)
2564 {
2565         int i;
2566         uint32_t sh_mem_config;
2567         uint32_t sh_mem_bases;
2568
2569         /*
2570          * Configure apertures:
2571          * LDS:         0x60000000'00000000 - 0x60000001'00000000 (4GB)
2572          * Scratch:     0x60000001'00000000 - 0x60000002'00000000 (4GB)
2573          * GPUVM:       0x60010000'00000000 - 0x60020000'00000000 (1TB)
2574          */
2575         sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
2576
2577         sh_mem_config = SH_MEM_ADDRESS_MODE_64 |
2578                         SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
2579                         SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT;
2580
2581         mutex_lock(&adev->srbm_mutex);
2582         for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
2583                 soc15_grbm_select(adev, 0, 0, 0, i, 0);
2584                 /* CP and shaders */
2585                 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, sh_mem_config);
2586                 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, sh_mem_bases);
2587         }
2588         soc15_grbm_select(adev, 0, 0, 0, 0, 0);
2589         mutex_unlock(&adev->srbm_mutex);
2590
2591         /* Initialize all compute VMIDs to have no GDS, GWS, or OA
2592            access. These should be enabled by FW for target VMIDs. */
2593         for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
2594                 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * i, 0);
2595                 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * i, 0);
2596                 WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, i, 0);
2597                 WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, i, 0);
2598         }
2599 }
2600
2601 static void gfx_v9_0_init_gds_vmid(struct amdgpu_device *adev)
2602 {
2603         int vmid;
2604
2605         /*
2606          * Initialize all compute and user-gfx VMIDs to have no GDS, GWS, or OA
2607          * access. Compute VMIDs should be enabled by FW for target VMIDs,
2608          * the driver can enable them for graphics. VMID0 should maintain
2609          * access so that HWS firmware can save/restore entries.
2610          */
2611         for (vmid = 1; vmid < AMDGPU_NUM_VMID; vmid++) {
2612                 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * vmid, 0);
2613                 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * vmid, 0);
2614                 WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, vmid, 0);
2615                 WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, vmid, 0);
2616         }
2617 }
2618
2619 static void gfx_v9_0_init_sq_config(struct amdgpu_device *adev)
2620 {
2621         uint32_t tmp;
2622
2623         switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
2624         case IP_VERSION(9, 4, 1):
2625                 tmp = RREG32_SOC15(GC, 0, mmSQ_CONFIG);
2626                 tmp = REG_SET_FIELD(tmp, SQ_CONFIG, DISABLE_BARRIER_WAITCNT,
2627                                 !READ_ONCE(adev->barrier_has_auto_waitcnt));
2628                 WREG32_SOC15(GC, 0, mmSQ_CONFIG, tmp);
2629                 break;
2630         default:
2631                 break;
2632         }
2633 }
2634
2635 static void gfx_v9_0_constants_init(struct amdgpu_device *adev)
2636 {
2637         u32 tmp;
2638         int i;
2639
2640         WREG32_FIELD15_RLC(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff);
2641
2642         gfx_v9_0_tiling_mode_table_init(adev);
2643
2644         if (adev->gfx.num_gfx_rings)
2645                 gfx_v9_0_setup_rb(adev);
2646         gfx_v9_0_get_cu_info(adev, &adev->gfx.cu_info);
2647         adev->gfx.config.db_debug2 = RREG32_SOC15(GC, 0, mmDB_DEBUG2);
2648
2649         /* XXX SH_MEM regs */
2650         /* where to put LDS, scratch, GPUVM in FSA64 space */
2651         mutex_lock(&adev->srbm_mutex);
2652         for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB(0)].num_ids; i++) {
2653                 soc15_grbm_select(adev, 0, 0, 0, i, 0);
2654                 /* CP and shaders */
2655                 if (i == 0) {
2656                         tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
2657                                             SH_MEM_ALIGNMENT_MODE_UNALIGNED);
2658                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE,
2659                                             !!adev->gmc.noretry);
2660                         WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp);
2661                         WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, 0);
2662                 } else {
2663                         tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
2664                                             SH_MEM_ALIGNMENT_MODE_UNALIGNED);
2665                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE,
2666                                             !!adev->gmc.noretry);
2667                         WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp);
2668                         tmp = REG_SET_FIELD(0, SH_MEM_BASES, PRIVATE_BASE,
2669                                 (adev->gmc.private_aperture_start >> 48));
2670                         tmp = REG_SET_FIELD(tmp, SH_MEM_BASES, SHARED_BASE,
2671                                 (adev->gmc.shared_aperture_start >> 48));
2672                         WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, tmp);
2673                 }
2674         }
2675         soc15_grbm_select(adev, 0, 0, 0, 0, 0);
2676
2677         mutex_unlock(&adev->srbm_mutex);
2678
2679         gfx_v9_0_init_compute_vmid(adev);
2680         gfx_v9_0_init_gds_vmid(adev);
2681         gfx_v9_0_init_sq_config(adev);
2682 }
2683
2684 static void gfx_v9_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
2685 {
2686         u32 i, j, k;
2687         u32 mask;
2688
2689         mutex_lock(&adev->grbm_idx_mutex);
2690         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
2691                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
2692                         amdgpu_gfx_select_se_sh(adev, i, j, 0xffffffff, 0);
2693                         for (k = 0; k < adev->usec_timeout; k++) {
2694                                 if (RREG32_SOC15(GC, 0, mmRLC_SERDES_CU_MASTER_BUSY) == 0)
2695                                         break;
2696                                 udelay(1);
2697                         }
2698                         if (k == adev->usec_timeout) {
2699                                 amdgpu_gfx_select_se_sh(adev, 0xffffffff,
2700                                                       0xffffffff, 0xffffffff, 0);
2701                                 mutex_unlock(&adev->grbm_idx_mutex);
2702                                 DRM_INFO("Timeout wait for RLC serdes %u,%u\n",
2703                                          i, j);
2704                                 return;
2705                         }
2706                 }
2707         }
2708         amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
2709         mutex_unlock(&adev->grbm_idx_mutex);
2710
2711         mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
2712                 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
2713                 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
2714                 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
2715         for (k = 0; k < adev->usec_timeout; k++) {
2716                 if ((RREG32_SOC15(GC, 0, mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
2717                         break;
2718                 udelay(1);
2719         }
2720 }
2721
2722 static void gfx_v9_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
2723                                                bool enable)
2724 {
2725         u32 tmp;
2726
2727         /* These interrupts should be enabled to drive DS clock */
2728
2729         tmp= RREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0);
2730
2731         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
2732         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
2733         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
2734         if (adev->gfx.num_gfx_rings)
2735                 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
2736
2737         WREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0, tmp);
2738 }
2739
2740 static void gfx_v9_0_init_csb(struct amdgpu_device *adev)
2741 {
2742         adev->gfx.rlc.funcs->get_csb_buffer(adev, adev->gfx.rlc.cs_ptr);
2743         /* csib */
2744         WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_HI),
2745                         adev->gfx.rlc.clear_state_gpu_addr >> 32);
2746         WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_LO),
2747                         adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
2748         WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_LENGTH),
2749                         adev->gfx.rlc.clear_state_size);
2750 }
2751
2752 static void gfx_v9_1_parse_ind_reg_list(int *register_list_format,
2753                                 int indirect_offset,
2754                                 int list_size,
2755                                 int *unique_indirect_regs,
2756                                 int unique_indirect_reg_count,
2757                                 int *indirect_start_offsets,
2758                                 int *indirect_start_offsets_count,
2759                                 int max_start_offsets_count)
2760 {
2761         int idx;
2762
2763         for (; indirect_offset < list_size; indirect_offset++) {
2764                 WARN_ON(*indirect_start_offsets_count >= max_start_offsets_count);
2765                 indirect_start_offsets[*indirect_start_offsets_count] = indirect_offset;
2766                 *indirect_start_offsets_count = *indirect_start_offsets_count + 1;
2767
2768                 while (register_list_format[indirect_offset] != 0xFFFFFFFF) {
2769                         indirect_offset += 2;
2770
2771                         /* look for the matching indice */
2772                         for (idx = 0; idx < unique_indirect_reg_count; idx++) {
2773                                 if (unique_indirect_regs[idx] ==
2774                                         register_list_format[indirect_offset] ||
2775                                         !unique_indirect_regs[idx])
2776                                         break;
2777                         }
2778
2779                         BUG_ON(idx >= unique_indirect_reg_count);
2780
2781                         if (!unique_indirect_regs[idx])
2782                                 unique_indirect_regs[idx] = register_list_format[indirect_offset];
2783
2784                         indirect_offset++;
2785                 }
2786         }
2787 }
2788
2789 static int gfx_v9_1_init_rlc_save_restore_list(struct amdgpu_device *adev)
2790 {
2791         int unique_indirect_regs[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
2792         int unique_indirect_reg_count = 0;
2793
2794         int indirect_start_offsets[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
2795         int indirect_start_offsets_count = 0;
2796
2797         int list_size = 0;
2798         int i = 0, j = 0;
2799         u32 tmp = 0;
2800
2801         u32 *register_list_format =
2802                 kmemdup(adev->gfx.rlc.register_list_format,
2803                         adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
2804         if (!register_list_format)
2805                 return -ENOMEM;
2806
2807         /* setup unique_indirect_regs array and indirect_start_offsets array */
2808         unique_indirect_reg_count = ARRAY_SIZE(unique_indirect_regs);
2809         gfx_v9_1_parse_ind_reg_list(register_list_format,
2810                                     adev->gfx.rlc.reg_list_format_direct_reg_list_length,
2811                                     adev->gfx.rlc.reg_list_format_size_bytes >> 2,
2812                                     unique_indirect_regs,
2813                                     unique_indirect_reg_count,
2814                                     indirect_start_offsets,
2815                                     &indirect_start_offsets_count,
2816                                     ARRAY_SIZE(indirect_start_offsets));
2817
2818         /* enable auto inc in case it is disabled */
2819         tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL));
2820         tmp |= RLC_SRM_CNTL__AUTO_INCR_ADDR_MASK;
2821         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL), tmp);
2822
2823         /* write register_restore table to offset 0x0 using RLC_SRM_ARAM_ADDR/DATA */
2824         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_ADDR),
2825                 RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET);
2826         for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
2827                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_DATA),
2828                         adev->gfx.rlc.register_restore[i]);
2829
2830         /* load indirect register */
2831         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2832                 adev->gfx.rlc.reg_list_format_start);
2833
2834         /* direct register portion */
2835         for (i = 0; i < adev->gfx.rlc.reg_list_format_direct_reg_list_length; i++)
2836                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA),
2837                         register_list_format[i]);
2838
2839         /* indirect register portion */
2840         while (i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2)) {
2841                 if (register_list_format[i] == 0xFFFFFFFF) {
2842                         WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2843                         continue;
2844                 }
2845
2846                 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2847                 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2848
2849                 for (j = 0; j < unique_indirect_reg_count; j++) {
2850                         if (register_list_format[i] == unique_indirect_regs[j]) {
2851                                 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, j);
2852                                 break;
2853                         }
2854                 }
2855
2856                 BUG_ON(j >= unique_indirect_reg_count);
2857
2858                 i++;
2859         }
2860
2861         /* set save/restore list size */
2862         list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
2863         list_size = list_size >> 1;
2864         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2865                 adev->gfx.rlc.reg_restore_list_size);
2866         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA), list_size);
2867
2868         /* write the starting offsets to RLC scratch ram */
2869         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2870                 adev->gfx.rlc.starting_offsets_start);
2871         for (i = 0; i < ARRAY_SIZE(indirect_start_offsets); i++)
2872                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA),
2873                        indirect_start_offsets[i]);
2874
2875         /* load unique indirect regs*/
2876         for (i = 0; i < ARRAY_SIZE(unique_indirect_regs); i++) {
2877                 if (unique_indirect_regs[i] != 0) {
2878                         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_ADDR_0)
2879                                + GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[i],
2880                                unique_indirect_regs[i] & 0x3FFFF);
2881
2882                         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_DATA_0)
2883                                + GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[i],
2884                                unique_indirect_regs[i] >> 20);
2885                 }
2886         }
2887
2888         kfree(register_list_format);
2889         return 0;
2890 }
2891
2892 static void gfx_v9_0_enable_save_restore_machine(struct amdgpu_device *adev)
2893 {
2894         WREG32_FIELD15(GC, 0, RLC_SRM_CNTL, SRM_ENABLE, 1);
2895 }
2896
2897 static void pwr_10_0_gfxip_control_over_cgpg(struct amdgpu_device *adev,
2898                                              bool enable)
2899 {
2900         uint32_t data = 0;
2901         uint32_t default_data = 0;
2902
2903         default_data = data = RREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS));
2904         if (enable) {
2905                 /* enable GFXIP control over CGPG */
2906                 data |= PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK;
2907                 if(default_data != data)
2908                         WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2909
2910                 /* update status */
2911                 data &= ~PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK;
2912                 data |= (2 << PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT);
2913                 if(default_data != data)
2914                         WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2915         } else {
2916                 /* restore GFXIP control over GCPG */
2917                 data &= ~PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK;
2918                 if(default_data != data)
2919                         WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2920         }
2921 }
2922
2923 static void gfx_v9_0_init_gfx_power_gating(struct amdgpu_device *adev)
2924 {
2925         uint32_t data = 0;
2926
2927         if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
2928                               AMD_PG_SUPPORT_GFX_SMG |
2929                               AMD_PG_SUPPORT_GFX_DMG)) {
2930                 /* init IDLE_POLL_COUNT = 60 */
2931                 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL));
2932                 data &= ~CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT_MASK;
2933                 data |= (0x60 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
2934                 WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL), data);
2935
2936                 /* init RLC PG Delay */
2937                 data = 0;
2938                 data |= (0x10 << RLC_PG_DELAY__POWER_UP_DELAY__SHIFT);
2939                 data |= (0x10 << RLC_PG_DELAY__POWER_DOWN_DELAY__SHIFT);
2940                 data |= (0x10 << RLC_PG_DELAY__CMD_PROPAGATE_DELAY__SHIFT);
2941                 data |= (0x40 << RLC_PG_DELAY__MEM_SLEEP_DELAY__SHIFT);
2942                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY), data);
2943
2944                 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2));
2945                 data &= ~RLC_PG_DELAY_2__SERDES_CMD_DELAY_MASK;
2946                 data |= (0x4 << RLC_PG_DELAY_2__SERDES_CMD_DELAY__SHIFT);
2947                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2), data);
2948
2949                 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3));
2950                 data &= ~RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG_MASK;
2951                 data |= (0xff << RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG__SHIFT);
2952                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3), data);
2953
2954                 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL));
2955                 data &= ~RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD_MASK;
2956
2957                 /* program GRBM_REG_SAVE_GFX_IDLE_THRESHOLD to 0x55f0 */
2958                 data |= (0x55f0 << RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD__SHIFT);
2959                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL), data);
2960                 if (amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 3, 0))
2961                         pwr_10_0_gfxip_control_over_cgpg(adev, true);
2962         }
2963 }
2964
2965 static void gfx_v9_0_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
2966                                                 bool enable)
2967 {
2968         uint32_t data = 0;
2969         uint32_t default_data = 0;
2970
2971         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2972         data = REG_SET_FIELD(data, RLC_PG_CNTL,
2973                              SMU_CLK_SLOWDOWN_ON_PU_ENABLE,
2974                              enable ? 1 : 0);
2975         if (default_data != data)
2976                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2977 }
2978
2979 static void gfx_v9_0_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
2980                                                 bool enable)
2981 {
2982         uint32_t data = 0;
2983         uint32_t default_data = 0;
2984
2985         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2986         data = REG_SET_FIELD(data, RLC_PG_CNTL,
2987                              SMU_CLK_SLOWDOWN_ON_PD_ENABLE,
2988                              enable ? 1 : 0);
2989         if(default_data != data)
2990                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2991 }
2992
2993 static void gfx_v9_0_enable_cp_power_gating(struct amdgpu_device *adev,
2994                                         bool enable)
2995 {
2996         uint32_t data = 0;
2997         uint32_t default_data = 0;
2998
2999         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
3000         data = REG_SET_FIELD(data, RLC_PG_CNTL,
3001                              CP_PG_DISABLE,
3002                              enable ? 0 : 1);
3003         if(default_data != data)
3004                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
3005 }
3006
3007 static void gfx_v9_0_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
3008                                                 bool enable)
3009 {
3010         uint32_t data, default_data;
3011
3012         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
3013         data = REG_SET_FIELD(data, RLC_PG_CNTL,
3014                              GFX_POWER_GATING_ENABLE,
3015                              enable ? 1 : 0);
3016         if(default_data != data)
3017                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
3018 }
3019
3020 static void gfx_v9_0_enable_gfx_pipeline_powergating(struct amdgpu_device *adev,
3021                                                 bool enable)
3022 {
3023         uint32_t data, default_data;
3024
3025         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
3026         data = REG_SET_FIELD(data, RLC_PG_CNTL,
3027                              GFX_PIPELINE_PG_ENABLE,
3028                              enable ? 1 : 0);
3029         if(default_data != data)
3030                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
3031
3032         if (!enable)
3033                 /* read any GFX register to wake up GFX */
3034                 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmDB_RENDER_CONTROL));
3035 }
3036
3037 static void gfx_v9_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
3038                                                        bool enable)
3039 {
3040         uint32_t data, default_data;
3041
3042         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
3043         data = REG_SET_FIELD(data, RLC_PG_CNTL,
3044                              STATIC_PER_CU_PG_ENABLE,
3045                              enable ? 1 : 0);
3046         if(default_data != data)
3047                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
3048 }
3049
3050 static void gfx_v9_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
3051                                                 bool enable)
3052 {
3053         uint32_t data, default_data;
3054
3055         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
3056         data = REG_SET_FIELD(data, RLC_PG_CNTL,
3057                              DYN_PER_CU_PG_ENABLE,
3058                              enable ? 1 : 0);
3059         if(default_data != data)
3060                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
3061 }
3062
3063 static void gfx_v9_0_init_pg(struct amdgpu_device *adev)
3064 {
3065         gfx_v9_0_init_csb(adev);
3066
3067         /*
3068          * Rlc save restore list is workable since v2_1.
3069          * And it's needed by gfxoff feature.
3070          */
3071         if (adev->gfx.rlc.is_rlc_v2_1) {
3072                 if (amdgpu_ip_version(adev, GC_HWIP, 0) ==
3073                             IP_VERSION(9, 2, 1) ||
3074                     (adev->apu_flags & AMD_APU_IS_RAVEN2))
3075                         gfx_v9_1_init_rlc_save_restore_list(adev);
3076                 gfx_v9_0_enable_save_restore_machine(adev);
3077         }
3078
3079         if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
3080                               AMD_PG_SUPPORT_GFX_SMG |
3081                               AMD_PG_SUPPORT_GFX_DMG |
3082                               AMD_PG_SUPPORT_CP |
3083                               AMD_PG_SUPPORT_GDS |
3084                               AMD_PG_SUPPORT_RLC_SMU_HS)) {
3085                 WREG32_SOC15(GC, 0, mmRLC_JUMP_TABLE_RESTORE,
3086                              adev->gfx.rlc.cp_table_gpu_addr >> 8);
3087                 gfx_v9_0_init_gfx_power_gating(adev);
3088         }
3089 }
3090
3091 static void gfx_v9_0_rlc_stop(struct amdgpu_device *adev)
3092 {
3093         WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 0);
3094         gfx_v9_0_enable_gui_idle_interrupt(adev, false);
3095         gfx_v9_0_wait_for_rlc_serdes(adev);
3096 }
3097
3098 static void gfx_v9_0_rlc_reset(struct amdgpu_device *adev)
3099 {
3100         WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
3101         udelay(50);
3102         WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
3103         udelay(50);
3104 }
3105
3106 static void gfx_v9_0_rlc_start(struct amdgpu_device *adev)
3107 {
3108 #ifdef AMDGPU_RLC_DEBUG_RETRY
3109         u32 rlc_ucode_ver;
3110 #endif
3111
3112         WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 1);
3113         udelay(50);
3114
3115         /* carrizo do enable cp interrupt after cp inited */
3116         if (!(adev->flags & AMD_IS_APU)) {
3117                 gfx_v9_0_enable_gui_idle_interrupt(adev, true);
3118                 udelay(50);
3119         }
3120
3121 #ifdef AMDGPU_RLC_DEBUG_RETRY
3122         /* RLC_GPM_GENERAL_6 : RLC Ucode version */
3123         rlc_ucode_ver = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_6);
3124         if(rlc_ucode_ver == 0x108) {
3125                 DRM_INFO("Using rlc debug ucode. mmRLC_GPM_GENERAL_6 ==0x08%x / fw_ver == %i \n",
3126                                 rlc_ucode_ver, adev->gfx.rlc_fw_version);
3127                 /* RLC_GPM_TIMER_INT_3 : Timer interval in RefCLK cycles,
3128                  * default is 0x9C4 to create a 100us interval */
3129                 WREG32_SOC15(GC, 0, mmRLC_GPM_TIMER_INT_3, 0x9C4);
3130                 /* RLC_GPM_GENERAL_12 : Minimum gap between wptr and rptr
3131                  * to disable the page fault retry interrupts, default is
3132                  * 0x100 (256) */
3133                 WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_12, 0x100);
3134         }
3135 #endif
3136 }
3137
3138 static int gfx_v9_0_rlc_load_microcode(struct amdgpu_device *adev)
3139 {
3140         const struct rlc_firmware_header_v2_0 *hdr;
3141         const __le32 *fw_data;
3142         unsigned i, fw_size;
3143
3144         if (!adev->gfx.rlc_fw)
3145                 return -EINVAL;
3146
3147         hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
3148         amdgpu_ucode_print_rlc_hdr(&hdr->header);
3149
3150         fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
3151                            le32_to_cpu(hdr->header.ucode_array_offset_bytes));
3152         fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
3153
3154         WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR,
3155                         RLCG_UCODE_LOADING_START_ADDRESS);
3156         for (i = 0; i < fw_size; i++)
3157                 WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
3158         WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
3159
3160         return 0;
3161 }
3162
3163 static int gfx_v9_0_rlc_resume(struct amdgpu_device *adev)
3164 {
3165         int r;
3166
3167         if (amdgpu_sriov_vf(adev)) {
3168                 gfx_v9_0_init_csb(adev);
3169                 return 0;
3170         }
3171
3172         adev->gfx.rlc.funcs->stop(adev);
3173
3174         /* disable CG */
3175         WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, 0);
3176
3177         gfx_v9_0_init_pg(adev);
3178
3179         if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
3180                 /* legacy rlc firmware loading */
3181                 r = gfx_v9_0_rlc_load_microcode(adev);
3182                 if (r)
3183                         return r;
3184         }
3185
3186         switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
3187         case IP_VERSION(9, 2, 2):
3188         case IP_VERSION(9, 1, 0):
3189                 gfx_v9_0_init_lbpw(adev);
3190                 if (amdgpu_lbpw == 0)
3191                         gfx_v9_0_enable_lbpw(adev, false);
3192                 else
3193                         gfx_v9_0_enable_lbpw(adev, true);
3194                 break;
3195         case IP_VERSION(9, 4, 0):
3196                 gfx_v9_4_init_lbpw(adev);
3197                 if (amdgpu_lbpw > 0)
3198                         gfx_v9_0_enable_lbpw(adev, true);
3199                 else
3200                         gfx_v9_0_enable_lbpw(adev, false);
3201                 break;
3202         default:
3203                 break;
3204         }
3205
3206         gfx_v9_0_update_spm_vmid_internal(adev, 0xf);
3207
3208         adev->gfx.rlc.funcs->start(adev);
3209
3210         return 0;
3211 }
3212
3213 static void gfx_v9_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
3214 {
3215         u32 tmp = RREG32_SOC15(GC, 0, mmCP_ME_CNTL);
3216
3217         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_INVALIDATE_ICACHE, enable ? 0 : 1);
3218         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_INVALIDATE_ICACHE, enable ? 0 : 1);
3219         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_INVALIDATE_ICACHE, enable ? 0 : 1);
3220         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_PIPE0_RESET, enable ? 0 : 1);
3221         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_PIPE1_RESET, enable ? 0 : 1);
3222         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_PIPE0_RESET, enable ? 0 : 1);
3223         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_PIPE1_RESET, enable ? 0 : 1);
3224         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_PIPE0_RESET, enable ? 0 : 1);
3225         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_PIPE1_RESET, enable ? 0 : 1);
3226         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, enable ? 0 : 1);
3227         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, enable ? 0 : 1);
3228         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, enable ? 0 : 1);
3229         WREG32_SOC15_RLC(GC, 0, mmCP_ME_CNTL, tmp);
3230         udelay(50);
3231 }
3232
3233 static int gfx_v9_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
3234 {
3235         const struct gfx_firmware_header_v1_0 *pfp_hdr;
3236         const struct gfx_firmware_header_v1_0 *ce_hdr;
3237         const struct gfx_firmware_header_v1_0 *me_hdr;
3238         const __le32 *fw_data;
3239         unsigned i, fw_size;
3240
3241         if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
3242                 return -EINVAL;
3243
3244         pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
3245                 adev->gfx.pfp_fw->data;
3246         ce_hdr = (const struct gfx_firmware_header_v1_0 *)
3247                 adev->gfx.ce_fw->data;
3248         me_hdr = (const struct gfx_firmware_header_v1_0 *)
3249                 adev->gfx.me_fw->data;
3250
3251         amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
3252         amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
3253         amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
3254
3255         gfx_v9_0_cp_gfx_enable(adev, false);
3256
3257         /* PFP */
3258         fw_data = (const __le32 *)
3259                 (adev->gfx.pfp_fw->data +
3260                  le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
3261         fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
3262         WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, 0);
3263         for (i = 0; i < fw_size; i++)
3264                 WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
3265         WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
3266
3267         /* CE */
3268         fw_data = (const __le32 *)
3269                 (adev->gfx.ce_fw->data +
3270                  le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
3271         fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
3272         WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, 0);
3273         for (i = 0; i < fw_size; i++)
3274                 WREG32_SOC15(GC, 0, mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
3275         WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
3276
3277         /* ME */
3278         fw_data = (const __le32 *)
3279                 (adev->gfx.me_fw->data +
3280                  le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
3281         fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
3282         WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, 0);
3283         for (i = 0; i < fw_size; i++)
3284                 WREG32_SOC15(GC, 0, mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
3285         WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version);
3286
3287         return 0;
3288 }
3289
3290 static int gfx_v9_0_cp_gfx_start(struct amdgpu_device *adev)
3291 {
3292         struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
3293         const struct cs_section_def *sect = NULL;
3294         const struct cs_extent_def *ext = NULL;
3295         int r, i, tmp;
3296
3297         /* init the CP */
3298         WREG32_SOC15(GC, 0, mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
3299         WREG32_SOC15(GC, 0, mmCP_DEVICE_ID, 1);
3300
3301         gfx_v9_0_cp_gfx_enable(adev, true);
3302
3303         /* Now only limit the quirk on the APU gfx9 series and already
3304          * confirmed that the APU gfx10/gfx11 needn't such update.
3305          */
3306         if (adev->flags & AMD_IS_APU &&
3307                         adev->in_s3 && !pm_resume_via_firmware()) {
3308                 DRM_INFO("Will skip the CSB packet resubmit\n");
3309                 return 0;
3310         }
3311         r = amdgpu_ring_alloc(ring, gfx_v9_0_get_csb_size(adev) + 4 + 3);
3312         if (r) {
3313                 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
3314                 return r;
3315         }
3316
3317         amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3318         amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3319
3320         amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
3321         amdgpu_ring_write(ring, 0x80000000);
3322         amdgpu_ring_write(ring, 0x80000000);
3323
3324         for (sect = gfx9_cs_data; sect->section != NULL; ++sect) {
3325                 for (ext = sect->section; ext->extent != NULL; ++ext) {
3326                         if (sect->id == SECT_CONTEXT) {
3327                                 amdgpu_ring_write(ring,
3328                                        PACKET3(PACKET3_SET_CONTEXT_REG,
3329                                                ext->reg_count));
3330                                 amdgpu_ring_write(ring,
3331                                        ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
3332                                 for (i = 0; i < ext->reg_count; i++)
3333                                         amdgpu_ring_write(ring, ext->extent[i]);
3334                         }
3335                 }
3336         }
3337
3338         amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3339         amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
3340
3341         amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3342         amdgpu_ring_write(ring, 0);
3343
3344         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
3345         amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
3346         amdgpu_ring_write(ring, 0x8000);
3347         amdgpu_ring_write(ring, 0x8000);
3348
3349         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG,1));
3350         tmp = (PACKET3_SET_UCONFIG_REG_INDEX_TYPE |
3351                 (SOC15_REG_OFFSET(GC, 0, mmVGT_INDEX_TYPE) - PACKET3_SET_UCONFIG_REG_START));
3352         amdgpu_ring_write(ring, tmp);
3353         amdgpu_ring_write(ring, 0);
3354
3355         amdgpu_ring_commit(ring);
3356
3357         return 0;
3358 }
3359
3360 static int gfx_v9_0_cp_gfx_resume(struct amdgpu_device *adev)
3361 {
3362         struct amdgpu_ring *ring;
3363         u32 tmp;
3364         u32 rb_bufsz;
3365         u64 rb_addr, rptr_addr, wptr_gpu_addr;
3366
3367         /* Set the write pointer delay */
3368         WREG32_SOC15(GC, 0, mmCP_RB_WPTR_DELAY, 0);
3369
3370         /* set the RB to use vmid 0 */
3371         WREG32_SOC15(GC, 0, mmCP_RB_VMID, 0);
3372
3373         /* Set ring buffer size */
3374         ring = &adev->gfx.gfx_ring[0];
3375         rb_bufsz = order_base_2(ring->ring_size / 8);
3376         tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
3377         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
3378 #ifdef __BIG_ENDIAN
3379         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
3380 #endif
3381         WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp);
3382
3383         /* Initialize the ring buffer's write pointers */
3384         ring->wptr = 0;
3385         WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
3386         WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
3387
3388         /* set the wb address whether it's enabled or not */
3389         rptr_addr = ring->rptr_gpu_addr;
3390         WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
3391         WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & CP_RB_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK);
3392
3393         wptr_gpu_addr = ring->wptr_gpu_addr;
3394         WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr));
3395         WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr));
3396
3397         mdelay(1);
3398         WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp);
3399
3400         rb_addr = ring->gpu_addr >> 8;
3401         WREG32_SOC15(GC, 0, mmCP_RB0_BASE, rb_addr);
3402         WREG32_SOC15(GC, 0, mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
3403
3404         tmp = RREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL);
3405         if (ring->use_doorbell) {
3406                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3407                                     DOORBELL_OFFSET, ring->doorbell_index);
3408                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3409                                     DOORBELL_EN, 1);
3410         } else {
3411                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0);
3412         }
3413         WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL, tmp);
3414
3415         tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
3416                         DOORBELL_RANGE_LOWER, ring->doorbell_index);
3417         WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
3418
3419         WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_UPPER,
3420                        CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
3421
3422
3423         /* start the ring */
3424         gfx_v9_0_cp_gfx_start(adev);
3425
3426         return 0;
3427 }
3428
3429 static void gfx_v9_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
3430 {
3431         if (enable) {
3432                 WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL, 0);
3433         } else {
3434                 WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL,
3435                                  (CP_MEC_CNTL__MEC_INVALIDATE_ICACHE_MASK |
3436                                   CP_MEC_CNTL__MEC_ME1_PIPE0_RESET_MASK |
3437                                   CP_MEC_CNTL__MEC_ME1_PIPE1_RESET_MASK |
3438                                   CP_MEC_CNTL__MEC_ME1_PIPE2_RESET_MASK |
3439                                   CP_MEC_CNTL__MEC_ME1_PIPE3_RESET_MASK |
3440                                   CP_MEC_CNTL__MEC_ME2_PIPE0_RESET_MASK |
3441                                   CP_MEC_CNTL__MEC_ME2_PIPE1_RESET_MASK |
3442                                   CP_MEC_CNTL__MEC_ME1_HALT_MASK |
3443                                   CP_MEC_CNTL__MEC_ME2_HALT_MASK));
3444                 adev->gfx.kiq[0].ring.sched.ready = false;
3445         }
3446         udelay(50);
3447 }
3448
3449 static int gfx_v9_0_cp_compute_load_microcode(struct amdgpu_device *adev)
3450 {
3451         const struct gfx_firmware_header_v1_0 *mec_hdr;
3452         const __le32 *fw_data;
3453         unsigned i;
3454         u32 tmp;
3455
3456         if (!adev->gfx.mec_fw)
3457                 return -EINVAL;
3458
3459         gfx_v9_0_cp_compute_enable(adev, false);
3460
3461         mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
3462         amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
3463
3464         fw_data = (const __le32 *)
3465                 (adev->gfx.mec_fw->data +
3466                  le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
3467         tmp = 0;
3468         tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, VMID, 0);
3469         tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0);
3470         WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_CNTL, tmp);
3471
3472         WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_LO,
3473                 adev->gfx.mec.mec_fw_gpu_addr & 0xFFFFF000);
3474         WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_HI,
3475                 upper_32_bits(adev->gfx.mec.mec_fw_gpu_addr));
3476
3477         /* MEC1 */
3478         WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR,
3479                          mec_hdr->jt_offset);
3480         for (i = 0; i < mec_hdr->jt_size; i++)
3481                 WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_DATA,
3482                         le32_to_cpup(fw_data + mec_hdr->jt_offset + i));
3483
3484         WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR,
3485                         adev->gfx.mec_fw_version);
3486         /* Todo : Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */
3487
3488         return 0;
3489 }
3490
3491 /* KIQ functions */
3492 static void gfx_v9_0_kiq_setting(struct amdgpu_ring *ring)
3493 {
3494         uint32_t tmp;
3495         struct amdgpu_device *adev = ring->adev;
3496
3497         /* tell RLC which is KIQ queue */
3498         tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS);
3499         tmp &= 0xffffff00;
3500         tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
3501         WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp | 0x80);
3502 }
3503
3504 static void gfx_v9_0_mqd_set_priority(struct amdgpu_ring *ring, struct v9_mqd *mqd)
3505 {
3506         struct amdgpu_device *adev = ring->adev;
3507
3508         if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
3509                 if (amdgpu_gfx_is_high_priority_compute_queue(adev, ring)) {
3510                         mqd->cp_hqd_pipe_priority = AMDGPU_GFX_PIPE_PRIO_HIGH;
3511                         mqd->cp_hqd_queue_priority =
3512                                 AMDGPU_GFX_QUEUE_PRIORITY_MAXIMUM;
3513                 }
3514         }
3515 }
3516
3517 static int gfx_v9_0_mqd_init(struct amdgpu_ring *ring)
3518 {
3519         struct amdgpu_device *adev = ring->adev;
3520         struct v9_mqd *mqd = ring->mqd_ptr;
3521         uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
3522         uint32_t tmp;
3523
3524         mqd->header = 0xC0310800;
3525         mqd->compute_pipelinestat_enable = 0x00000001;
3526         mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
3527         mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
3528         mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
3529         mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
3530         mqd->compute_static_thread_mgmt_se4 = 0xffffffff;
3531         mqd->compute_static_thread_mgmt_se5 = 0xffffffff;
3532         mqd->compute_static_thread_mgmt_se6 = 0xffffffff;
3533         mqd->compute_static_thread_mgmt_se7 = 0xffffffff;
3534         mqd->compute_misc_reserved = 0x00000003;
3535
3536         mqd->dynamic_cu_mask_addr_lo =
3537                 lower_32_bits(ring->mqd_gpu_addr
3538                               + offsetof(struct v9_mqd_allocation, dynamic_cu_mask));
3539         mqd->dynamic_cu_mask_addr_hi =
3540                 upper_32_bits(ring->mqd_gpu_addr
3541                               + offsetof(struct v9_mqd_allocation, dynamic_cu_mask));
3542
3543         eop_base_addr = ring->eop_gpu_addr >> 8;
3544         mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
3545         mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
3546
3547         /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
3548         tmp = RREG32_SOC15(GC, 0, mmCP_HQD_EOP_CONTROL);
3549         tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
3550                         (order_base_2(GFX9_MEC_HPD_SIZE / 4) - 1));
3551
3552         mqd->cp_hqd_eop_control = tmp;
3553
3554         /* enable doorbell? */
3555         tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
3556
3557         if (ring->use_doorbell) {
3558                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3559                                     DOORBELL_OFFSET, ring->doorbell_index);
3560                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3561                                     DOORBELL_EN, 1);
3562                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3563                                     DOORBELL_SOURCE, 0);
3564                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3565                                     DOORBELL_HIT, 0);
3566         } else {
3567                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3568                                          DOORBELL_EN, 0);
3569         }
3570
3571         mqd->cp_hqd_pq_doorbell_control = tmp;
3572
3573         /* disable the queue if it's active */
3574         ring->wptr = 0;
3575         mqd->cp_hqd_dequeue_request = 0;
3576         mqd->cp_hqd_pq_rptr = 0;
3577         mqd->cp_hqd_pq_wptr_lo = 0;
3578         mqd->cp_hqd_pq_wptr_hi = 0;
3579
3580         /* set the pointer to the MQD */
3581         mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
3582         mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
3583
3584         /* set MQD vmid to 0 */
3585         tmp = RREG32_SOC15(GC, 0, mmCP_MQD_CONTROL);
3586         tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
3587         mqd->cp_mqd_control = tmp;
3588
3589         /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3590         hqd_gpu_addr = ring->gpu_addr >> 8;
3591         mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
3592         mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
3593
3594         /* set up the HQD, this is similar to CP_RB0_CNTL */
3595         tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_CONTROL);
3596         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
3597                             (order_base_2(ring->ring_size / 4) - 1));
3598         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
3599                         (order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1));
3600 #ifdef __BIG_ENDIAN
3601         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
3602 #endif
3603         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
3604         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
3605         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
3606         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
3607         mqd->cp_hqd_pq_control = tmp;
3608
3609         /* set the wb address whether it's enabled or not */
3610         wb_gpu_addr = ring->rptr_gpu_addr;
3611         mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
3612         mqd->cp_hqd_pq_rptr_report_addr_hi =
3613                 upper_32_bits(wb_gpu_addr) & 0xffff;
3614
3615         /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
3616         wb_gpu_addr = ring->wptr_gpu_addr;
3617         mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
3618         mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
3619
3620         /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3621         ring->wptr = 0;
3622         mqd->cp_hqd_pq_rptr = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR);
3623
3624         /* set the vmid for the queue */
3625         mqd->cp_hqd_vmid = 0;
3626
3627         tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PERSISTENT_STATE);
3628         tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
3629         mqd->cp_hqd_persistent_state = tmp;
3630
3631         /* set MIN_IB_AVAIL_SIZE */
3632         tmp = RREG32_SOC15(GC, 0, mmCP_HQD_IB_CONTROL);
3633         tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
3634         mqd->cp_hqd_ib_control = tmp;
3635
3636         /* set static priority for a queue/ring */
3637         gfx_v9_0_mqd_set_priority(ring, mqd);
3638         mqd->cp_hqd_quantum = RREG32_SOC15(GC, 0, mmCP_HQD_QUANTUM);
3639
3640         /* map_queues packet doesn't need activate the queue,
3641          * so only kiq need set this field.
3642          */
3643         if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)
3644                 mqd->cp_hqd_active = 1;
3645
3646         return 0;
3647 }
3648
3649 static int gfx_v9_0_kiq_init_register(struct amdgpu_ring *ring)
3650 {
3651         struct amdgpu_device *adev = ring->adev;
3652         struct v9_mqd *mqd = ring->mqd_ptr;
3653         int j;
3654
3655         /* disable wptr polling */
3656         WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
3657
3658         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR,
3659                mqd->cp_hqd_eop_base_addr_lo);
3660         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI,
3661                mqd->cp_hqd_eop_base_addr_hi);
3662
3663         /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
3664         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_CONTROL,
3665                mqd->cp_hqd_eop_control);
3666
3667         /* enable doorbell? */
3668         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
3669                mqd->cp_hqd_pq_doorbell_control);
3670
3671         /* disable the queue if it's active */
3672         if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) {
3673                 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
3674                 for (j = 0; j < adev->usec_timeout; j++) {
3675                         if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
3676                                 break;
3677                         udelay(1);
3678                 }
3679                 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST,
3680                        mqd->cp_hqd_dequeue_request);
3681                 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR,
3682                        mqd->cp_hqd_pq_rptr);
3683                 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO,
3684                        mqd->cp_hqd_pq_wptr_lo);
3685                 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI,
3686                        mqd->cp_hqd_pq_wptr_hi);
3687         }
3688
3689         /* set the pointer to the MQD */
3690         WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR,
3691                mqd->cp_mqd_base_addr_lo);
3692         WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR_HI,
3693                mqd->cp_mqd_base_addr_hi);
3694
3695         /* set MQD vmid to 0 */
3696         WREG32_SOC15_RLC(GC, 0, mmCP_MQD_CONTROL,
3697                mqd->cp_mqd_control);
3698
3699         /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3700         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE,
3701                mqd->cp_hqd_pq_base_lo);
3702         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE_HI,
3703                mqd->cp_hqd_pq_base_hi);
3704
3705         /* set up the HQD, this is similar to CP_RB0_CNTL */
3706         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_CONTROL,
3707                mqd->cp_hqd_pq_control);
3708
3709         /* set the wb address whether it's enabled or not */
3710         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR,
3711                                 mqd->cp_hqd_pq_rptr_report_addr_lo);
3712         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
3713                                 mqd->cp_hqd_pq_rptr_report_addr_hi);
3714
3715         /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
3716         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR,
3717                mqd->cp_hqd_pq_wptr_poll_addr_lo);
3718         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
3719                mqd->cp_hqd_pq_wptr_poll_addr_hi);
3720
3721         /* enable the doorbell if requested */
3722         if (ring->use_doorbell) {
3723                 WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_LOWER,
3724                                         (adev->doorbell_index.kiq * 2) << 2);
3725                 /* If GC has entered CGPG, ringing doorbell > first page
3726                  * doesn't wakeup GC. Enlarge CP_MEC_DOORBELL_RANGE_UPPER to
3727                  * workaround this issue. And this change has to align with firmware
3728                  * update.
3729                  */
3730                 if (check_if_enlarge_doorbell_range(adev))
3731                         WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER,
3732                                         (adev->doorbell.size - 4));
3733                 else
3734                         WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER,
3735                                         (adev->doorbell_index.userqueue_end * 2) << 2);
3736         }
3737
3738         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
3739                mqd->cp_hqd_pq_doorbell_control);
3740
3741         /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3742         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO,
3743                mqd->cp_hqd_pq_wptr_lo);
3744         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI,
3745                mqd->cp_hqd_pq_wptr_hi);
3746
3747         /* set the vmid for the queue */
3748         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_VMID, mqd->cp_hqd_vmid);
3749
3750         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE,
3751                mqd->cp_hqd_persistent_state);
3752
3753         /* activate the queue */
3754         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE,
3755                mqd->cp_hqd_active);
3756
3757         if (ring->use_doorbell)
3758                 WREG32_FIELD15(GC, 0, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
3759
3760         return 0;
3761 }
3762
3763 static int gfx_v9_0_kiq_fini_register(struct amdgpu_ring *ring)
3764 {
3765         struct amdgpu_device *adev = ring->adev;
3766         int j;
3767
3768         /* disable the queue if it's active */
3769         if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) {
3770
3771                 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
3772
3773                 for (j = 0; j < adev->usec_timeout; j++) {
3774                         if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
3775                                 break;
3776                         udelay(1);
3777                 }
3778
3779                 if (j == AMDGPU_MAX_USEC_TIMEOUT) {
3780                         DRM_DEBUG("KIQ dequeue request failed.\n");
3781
3782                         /* Manual disable if dequeue request times out */
3783                         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE, 0);
3784                 }
3785
3786                 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST,
3787                       0);
3788         }
3789
3790         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IQ_TIMER, 0);
3791         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IB_CONTROL, 0);
3792         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE, 0);
3793         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0x40000000);
3794         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0);
3795         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR, 0);
3796         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI, 0);
3797         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO, 0);
3798
3799         return 0;
3800 }
3801
3802 static int gfx_v9_0_kiq_init_queue(struct amdgpu_ring *ring)
3803 {
3804         struct amdgpu_device *adev = ring->adev;
3805         struct v9_mqd *mqd = ring->mqd_ptr;
3806         struct v9_mqd *tmp_mqd;
3807
3808         gfx_v9_0_kiq_setting(ring);
3809
3810         /* GPU could be in bad state during probe, driver trigger the reset
3811          * after load the SMU, in this case , the mqd is not be initialized.
3812          * driver need to re-init the mqd.
3813          * check mqd->cp_hqd_pq_control since this value should not be 0
3814          */
3815         tmp_mqd = (struct v9_mqd *)adev->gfx.kiq[0].mqd_backup;
3816         if (amdgpu_in_reset(adev) && tmp_mqd->cp_hqd_pq_control){
3817                 /* for GPU_RESET case , reset MQD to a clean status */
3818                 if (adev->gfx.kiq[0].mqd_backup)
3819                         memcpy(mqd, adev->gfx.kiq[0].mqd_backup, sizeof(struct v9_mqd_allocation));
3820
3821                 /* reset ring buffer */
3822                 ring->wptr = 0;
3823                 amdgpu_ring_clear_ring(ring);
3824
3825                 mutex_lock(&adev->srbm_mutex);
3826                 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0, 0);
3827                 gfx_v9_0_kiq_init_register(ring);
3828                 soc15_grbm_select(adev, 0, 0, 0, 0, 0);
3829                 mutex_unlock(&adev->srbm_mutex);
3830         } else {
3831                 memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation));
3832                 ((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
3833                 ((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
3834                 if (amdgpu_sriov_vf(adev) && adev->in_suspend)
3835                         amdgpu_ring_clear_ring(ring);
3836                 mutex_lock(&adev->srbm_mutex);
3837                 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0, 0);
3838                 gfx_v9_0_mqd_init(ring);
3839                 gfx_v9_0_kiq_init_register(ring);
3840                 soc15_grbm_select(adev, 0, 0, 0, 0, 0);
3841                 mutex_unlock(&adev->srbm_mutex);
3842
3843                 if (adev->gfx.kiq[0].mqd_backup)
3844                         memcpy(adev->gfx.kiq[0].mqd_backup, mqd, sizeof(struct v9_mqd_allocation));
3845         }
3846
3847         return 0;
3848 }
3849
3850 static int gfx_v9_0_kcq_init_queue(struct amdgpu_ring *ring, bool restore)
3851 {
3852         struct amdgpu_device *adev = ring->adev;
3853         struct v9_mqd *mqd = ring->mqd_ptr;
3854         int mqd_idx = ring - &adev->gfx.compute_ring[0];
3855         struct v9_mqd *tmp_mqd;
3856
3857         /* Same as above kiq init, driver need to re-init the mqd if mqd->cp_hqd_pq_control
3858          * is not be initialized before
3859          */
3860         tmp_mqd = (struct v9_mqd *)adev->gfx.mec.mqd_backup[mqd_idx];
3861
3862         if (!restore && (!tmp_mqd->cp_hqd_pq_control ||
3863             (!amdgpu_in_reset(adev) && !adev->in_suspend))) {
3864                 memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation));
3865                 ((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
3866                 ((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
3867                 mutex_lock(&adev->srbm_mutex);
3868                 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0, 0);
3869                 gfx_v9_0_mqd_init(ring);
3870                 soc15_grbm_select(adev, 0, 0, 0, 0, 0);
3871                 mutex_unlock(&adev->srbm_mutex);
3872
3873                 if (adev->gfx.mec.mqd_backup[mqd_idx])
3874                         memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation));
3875         } else {
3876                 /* restore MQD to a clean status */
3877                 if (adev->gfx.mec.mqd_backup[mqd_idx])
3878                         memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation));
3879                 /* reset ring buffer */
3880                 ring->wptr = 0;
3881                 atomic64_set((atomic64_t *)ring->wptr_cpu_addr, 0);
3882                 amdgpu_ring_clear_ring(ring);
3883         }
3884
3885         return 0;
3886 }
3887
3888 static int gfx_v9_0_kiq_resume(struct amdgpu_device *adev)
3889 {
3890         struct amdgpu_ring *ring;
3891         int r;
3892
3893         ring = &adev->gfx.kiq[0].ring;
3894
3895         r = amdgpu_bo_reserve(ring->mqd_obj, false);
3896         if (unlikely(r != 0))
3897                 return r;
3898
3899         r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
3900         if (unlikely(r != 0)) {
3901                 amdgpu_bo_unreserve(ring->mqd_obj);
3902                 return r;
3903         }
3904
3905         gfx_v9_0_kiq_init_queue(ring);
3906         amdgpu_bo_kunmap(ring->mqd_obj);
3907         ring->mqd_ptr = NULL;
3908         amdgpu_bo_unreserve(ring->mqd_obj);
3909         return 0;
3910 }
3911
3912 static int gfx_v9_0_kcq_resume(struct amdgpu_device *adev)
3913 {
3914         struct amdgpu_ring *ring = NULL;
3915         int r = 0, i;
3916
3917         gfx_v9_0_cp_compute_enable(adev, true);
3918
3919         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3920                 ring = &adev->gfx.compute_ring[i];
3921
3922                 r = amdgpu_bo_reserve(ring->mqd_obj, false);
3923                 if (unlikely(r != 0))
3924                         goto done;
3925                 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
3926                 if (!r) {
3927                         r = gfx_v9_0_kcq_init_queue(ring, false);
3928                         amdgpu_bo_kunmap(ring->mqd_obj);
3929                         ring->mqd_ptr = NULL;
3930                 }
3931                 amdgpu_bo_unreserve(ring->mqd_obj);
3932                 if (r)
3933                         goto done;
3934         }
3935
3936         r = amdgpu_gfx_enable_kcq(adev, 0);
3937 done:
3938         return r;
3939 }
3940
3941 static int gfx_v9_0_cp_resume(struct amdgpu_device *adev)
3942 {
3943         int r, i;
3944         struct amdgpu_ring *ring;
3945
3946         if (!(adev->flags & AMD_IS_APU))
3947                 gfx_v9_0_enable_gui_idle_interrupt(adev, false);
3948
3949         if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
3950                 if (adev->gfx.num_gfx_rings) {
3951                         /* legacy firmware loading */
3952                         r = gfx_v9_0_cp_gfx_load_microcode(adev);
3953                         if (r)
3954                                 return r;
3955                 }
3956
3957                 r = gfx_v9_0_cp_compute_load_microcode(adev);
3958                 if (r)
3959                         return r;
3960         }
3961
3962         if (adev->gfx.num_gfx_rings)
3963                 gfx_v9_0_cp_gfx_enable(adev, false);
3964         gfx_v9_0_cp_compute_enable(adev, false);
3965
3966         r = gfx_v9_0_kiq_resume(adev);
3967         if (r)
3968                 return r;
3969
3970         if (adev->gfx.num_gfx_rings) {
3971                 r = gfx_v9_0_cp_gfx_resume(adev);
3972                 if (r)
3973                         return r;
3974         }
3975
3976         r = gfx_v9_0_kcq_resume(adev);
3977         if (r)
3978                 return r;
3979
3980         if (adev->gfx.num_gfx_rings) {
3981                 ring = &adev->gfx.gfx_ring[0];
3982                 r = amdgpu_ring_test_helper(ring);
3983                 if (r)
3984                         return r;
3985         }
3986
3987         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3988                 ring = &adev->gfx.compute_ring[i];
3989                 amdgpu_ring_test_helper(ring);
3990         }
3991
3992         gfx_v9_0_enable_gui_idle_interrupt(adev, true);
3993
3994         return 0;
3995 }
3996
3997 static void gfx_v9_0_init_tcp_config(struct amdgpu_device *adev)
3998 {
3999         u32 tmp;
4000
4001         if (amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 4, 1) &&
4002             amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 4, 2))
4003                 return;
4004
4005         tmp = RREG32_SOC15(GC, 0, mmTCP_ADDR_CONFIG);
4006         tmp = REG_SET_FIELD(tmp, TCP_ADDR_CONFIG, ENABLE64KHASH,
4007                                 adev->df.hash_status.hash_64k);
4008         tmp = REG_SET_FIELD(tmp, TCP_ADDR_CONFIG, ENABLE2MHASH,
4009                                 adev->df.hash_status.hash_2m);
4010         tmp = REG_SET_FIELD(tmp, TCP_ADDR_CONFIG, ENABLE1GHASH,
4011                                 adev->df.hash_status.hash_1g);
4012         WREG32_SOC15(GC, 0, mmTCP_ADDR_CONFIG, tmp);
4013 }
4014
4015 static void gfx_v9_0_cp_enable(struct amdgpu_device *adev, bool enable)
4016 {
4017         if (adev->gfx.num_gfx_rings)
4018                 gfx_v9_0_cp_gfx_enable(adev, enable);
4019         gfx_v9_0_cp_compute_enable(adev, enable);
4020 }
4021
4022 static int gfx_v9_0_hw_init(struct amdgpu_ip_block *ip_block)
4023 {
4024         int r;
4025         struct amdgpu_device *adev = ip_block->adev;
4026
4027         amdgpu_gfx_cleaner_shader_init(adev, adev->gfx.cleaner_shader_size,
4028                                        adev->gfx.cleaner_shader_ptr);
4029
4030         if (!amdgpu_sriov_vf(adev))
4031                 gfx_v9_0_init_golden_registers(adev);
4032
4033         gfx_v9_0_constants_init(adev);
4034
4035         gfx_v9_0_init_tcp_config(adev);
4036
4037         r = adev->gfx.rlc.funcs->resume(adev);
4038         if (r)
4039                 return r;
4040
4041         r = gfx_v9_0_cp_resume(adev);
4042         if (r)
4043                 return r;
4044
4045         if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 2))
4046                 gfx_v9_4_2_set_power_brake_sequence(adev);
4047
4048         return r;
4049 }
4050
4051 static int gfx_v9_0_hw_fini(struct amdgpu_ip_block *ip_block)
4052 {
4053         struct amdgpu_device *adev = ip_block->adev;
4054
4055         if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
4056                 amdgpu_irq_put(adev, &adev->gfx.cp_ecc_error_irq, 0);
4057         amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
4058         amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
4059         amdgpu_irq_put(adev, &adev->gfx.bad_op_irq, 0);
4060
4061         /* DF freeze and kcq disable will fail */
4062         if (!amdgpu_ras_intr_triggered())
4063                 /* disable KCQ to avoid CPC touch memory not valid anymore */
4064                 amdgpu_gfx_disable_kcq(adev, 0);
4065
4066         if (amdgpu_sriov_vf(adev)) {
4067                 gfx_v9_0_cp_gfx_enable(adev, false);
4068                 /* must disable polling for SRIOV when hw finished, otherwise
4069                  * CPC engine may still keep fetching WB address which is already
4070                  * invalid after sw finished and trigger DMAR reading error in
4071                  * hypervisor side.
4072                  */
4073                 WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
4074                 return 0;
4075         }
4076
4077         /* Use deinitialize sequence from CAIL when unbinding device from driver,
4078          * otherwise KIQ is hanging when binding back
4079          */
4080         if (!amdgpu_in_reset(adev) && !adev->in_suspend) {
4081                 mutex_lock(&adev->srbm_mutex);
4082                 soc15_grbm_select(adev, adev->gfx.kiq[0].ring.me,
4083                                 adev->gfx.kiq[0].ring.pipe,
4084                                 adev->gfx.kiq[0].ring.queue, 0, 0);
4085                 gfx_v9_0_kiq_fini_register(&adev->gfx.kiq[0].ring);
4086                 soc15_grbm_select(adev, 0, 0, 0, 0, 0);
4087                 mutex_unlock(&adev->srbm_mutex);
4088         }
4089
4090         gfx_v9_0_cp_enable(adev, false);
4091
4092         /* Skip stopping RLC with A+A reset or when RLC controls GFX clock */
4093         if ((adev->gmc.xgmi.connected_to_cpu && amdgpu_in_reset(adev)) ||
4094             (amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(9, 4, 2))) {
4095                 dev_dbg(adev->dev, "Skipping RLC halt\n");
4096                 return 0;
4097         }
4098
4099         adev->gfx.rlc.funcs->stop(adev);
4100         return 0;
4101 }
4102
4103 static int gfx_v9_0_suspend(struct amdgpu_ip_block *ip_block)
4104 {
4105         return gfx_v9_0_hw_fini(ip_block);
4106 }
4107
4108 static int gfx_v9_0_resume(struct amdgpu_ip_block *ip_block)
4109 {
4110         return gfx_v9_0_hw_init(ip_block);
4111 }
4112
4113 static bool gfx_v9_0_is_idle(void *handle)
4114 {
4115         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4116
4117         if (REG_GET_FIELD(RREG32_SOC15(GC, 0, mmGRBM_STATUS),
4118                                 GRBM_STATUS, GUI_ACTIVE))
4119                 return false;
4120         else
4121                 return true;
4122 }
4123
4124 static int gfx_v9_0_wait_for_idle(struct amdgpu_ip_block *ip_block)
4125 {
4126         unsigned i;
4127         struct amdgpu_device *adev = ip_block->adev;
4128
4129         for (i = 0; i < adev->usec_timeout; i++) {
4130                 if (gfx_v9_0_is_idle(adev))
4131                         return 0;
4132                 udelay(1);
4133         }
4134         return -ETIMEDOUT;
4135 }
4136
4137 static int gfx_v9_0_soft_reset(struct amdgpu_ip_block *ip_block)
4138 {
4139         u32 grbm_soft_reset = 0;
4140         u32 tmp;
4141         struct amdgpu_device *adev = ip_block->adev;
4142
4143         /* GRBM_STATUS */
4144         tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS);
4145         if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
4146                    GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
4147                    GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
4148                    GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
4149                    GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
4150                    GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK)) {
4151                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4152                                                 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
4153                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4154                                                 GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
4155         }
4156
4157         if (tmp & (GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
4158                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4159                                                 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
4160         }
4161
4162         /* GRBM_STATUS2 */
4163         tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS2);
4164         if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
4165                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4166                                                 GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
4167
4168
4169         if (grbm_soft_reset) {
4170                 /* stop the rlc */
4171                 adev->gfx.rlc.funcs->stop(adev);
4172
4173                 if (adev->gfx.num_gfx_rings)
4174                         /* Disable GFX parsing/prefetching */
4175                         gfx_v9_0_cp_gfx_enable(adev, false);
4176
4177                 /* Disable MEC parsing/prefetching */
4178                 gfx_v9_0_cp_compute_enable(adev, false);
4179
4180                 if (grbm_soft_reset) {
4181                         tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
4182                         tmp |= grbm_soft_reset;
4183                         dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
4184                         WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
4185                         tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
4186
4187                         udelay(50);
4188
4189                         tmp &= ~grbm_soft_reset;
4190                         WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
4191                         tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
4192                 }
4193
4194                 /* Wait a little for things to settle down */
4195                 udelay(50);
4196         }
4197         return 0;
4198 }
4199
4200 static uint64_t gfx_v9_0_kiq_read_clock(struct amdgpu_device *adev)
4201 {
4202         signed long r, cnt = 0;
4203         unsigned long flags;
4204         uint32_t seq, reg_val_offs = 0;
4205         uint64_t value = 0;
4206         struct amdgpu_kiq *kiq = &adev->gfx.kiq[0];
4207         struct amdgpu_ring *ring = &kiq->ring;
4208
4209         BUG_ON(!ring->funcs->emit_rreg);
4210
4211         spin_lock_irqsave(&kiq->ring_lock, flags);
4212         if (amdgpu_device_wb_get(adev, &reg_val_offs)) {
4213                 pr_err("critical bug! too many kiq readers\n");
4214                 goto failed_unlock;
4215         }
4216         amdgpu_ring_alloc(ring, 32);
4217         amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
4218         amdgpu_ring_write(ring, 9 |     /* src: register*/
4219                                 (5 << 8) |      /* dst: memory */
4220                                 (1 << 16) |     /* count sel */
4221                                 (1 << 20));     /* write confirm */
4222         amdgpu_ring_write(ring, 0);
4223         amdgpu_ring_write(ring, 0);
4224         amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
4225                                 reg_val_offs * 4));
4226         amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
4227                                 reg_val_offs * 4));
4228         r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT);
4229         if (r)
4230                 goto failed_undo;
4231
4232         amdgpu_ring_commit(ring);
4233         spin_unlock_irqrestore(&kiq->ring_lock, flags);
4234
4235         r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
4236
4237         /* don't wait anymore for gpu reset case because this way may
4238          * block gpu_recover() routine forever, e.g. this virt_kiq_rreg
4239          * is triggered in TTM and ttm_bo_lock_delayed_workqueue() will
4240          * never return if we keep waiting in virt_kiq_rreg, which cause
4241          * gpu_recover() hang there.
4242          *
4243          * also don't wait anymore for IRQ context
4244          * */
4245         if (r < 1 && (amdgpu_in_reset(adev)))
4246                 goto failed_kiq_read;
4247
4248         might_sleep();
4249         while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) {
4250                 msleep(MAX_KIQ_REG_BAILOUT_INTERVAL);
4251                 r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
4252         }
4253
4254         if (cnt > MAX_KIQ_REG_TRY)
4255                 goto failed_kiq_read;
4256
4257         mb();
4258         value = (uint64_t)adev->wb.wb[reg_val_offs] |
4259                 (uint64_t)adev->wb.wb[reg_val_offs + 1 ] << 32ULL;
4260         amdgpu_device_wb_free(adev, reg_val_offs);
4261         return value;
4262
4263 failed_undo:
4264         amdgpu_ring_undo(ring);
4265 failed_unlock:
4266         spin_unlock_irqrestore(&kiq->ring_lock, flags);
4267 failed_kiq_read:
4268         if (reg_val_offs)
4269                 amdgpu_device_wb_free(adev, reg_val_offs);
4270         pr_err("failed to read gpu clock\n");
4271         return ~0;
4272 }
4273
4274 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev)
4275 {
4276         uint64_t clock, clock_lo, clock_hi, hi_check;
4277
4278         switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
4279         case IP_VERSION(9, 3, 0):
4280                 preempt_disable();
4281                 clock_hi = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_UPPER_Renoir);
4282                 clock_lo = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_LOWER_Renoir);
4283                 hi_check = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_UPPER_Renoir);
4284                 /* The SMUIO TSC clock frequency is 100MHz, which sets 32-bit carry over
4285                  * roughly every 42 seconds.
4286                  */
4287                 if (hi_check != clock_hi) {
4288                         clock_lo = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_LOWER_Renoir);
4289                         clock_hi = hi_check;
4290                 }
4291                 preempt_enable();
4292                 clock = clock_lo | (clock_hi << 32ULL);
4293                 break;
4294         default:
4295                 amdgpu_gfx_off_ctrl(adev, false);
4296                 mutex_lock(&adev->gfx.gpu_clock_mutex);
4297                 if (amdgpu_ip_version(adev, GC_HWIP, 0) ==
4298                             IP_VERSION(9, 0, 1) &&
4299                     amdgpu_sriov_runtime(adev)) {
4300                         clock = gfx_v9_0_kiq_read_clock(adev);
4301                 } else {
4302                         WREG32_SOC15(GC, 0, mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
4303                         clock = (uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_LSB) |
4304                                 ((uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
4305                 }
4306                 mutex_unlock(&adev->gfx.gpu_clock_mutex);
4307                 amdgpu_gfx_off_ctrl(adev, true);
4308                 break;
4309         }
4310         return clock;
4311 }
4312
4313 static void gfx_v9_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
4314                                           uint32_t vmid,
4315                                           uint32_t gds_base, uint32_t gds_size,
4316                                           uint32_t gws_base, uint32_t gws_size,
4317                                           uint32_t oa_base, uint32_t oa_size)
4318 {
4319         struct amdgpu_device *adev = ring->adev;
4320
4321         /* GDS Base */
4322         gfx_v9_0_write_data_to_reg(ring, 0, false,
4323                                    SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_BASE) + 2 * vmid,
4324                                    gds_base);
4325
4326         /* GDS Size */
4327         gfx_v9_0_write_data_to_reg(ring, 0, false,
4328                                    SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE) + 2 * vmid,
4329                                    gds_size);
4330
4331         /* GWS */
4332         gfx_v9_0_write_data_to_reg(ring, 0, false,
4333                                    SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID0) + vmid,
4334                                    gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
4335
4336         /* OA */
4337         gfx_v9_0_write_data_to_reg(ring, 0, false,
4338                                    SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID0) + vmid,
4339                                    (1 << (oa_size + oa_base)) - (1 << oa_base));
4340 }
4341
4342 static const u32 vgpr_init_compute_shader[] =
4343 {
4344         0xb07c0000, 0xbe8000ff,
4345         0x000000f8, 0xbf110800,
4346         0x7e000280, 0x7e020280,
4347         0x7e040280, 0x7e060280,
4348         0x7e080280, 0x7e0a0280,
4349         0x7e0c0280, 0x7e0e0280,
4350         0x80808800, 0xbe803200,
4351         0xbf84fff5, 0xbf9c0000,
4352         0xd28c0001, 0x0001007f,
4353         0xd28d0001, 0x0002027e,
4354         0x10020288, 0xb8810904,
4355         0xb7814000, 0xd1196a01,
4356         0x00000301, 0xbe800087,
4357         0xbefc00c1, 0xd89c4000,
4358         0x00020201, 0xd89cc080,
4359         0x00040401, 0x320202ff,
4360         0x00000800, 0x80808100,
4361         0xbf84fff8, 0x7e020280,
4362         0xbf810000, 0x00000000,
4363 };
4364
4365 static const u32 sgpr_init_compute_shader[] =
4366 {
4367         0xb07c0000, 0xbe8000ff,
4368         0x0000005f, 0xbee50080,
4369         0xbe812c65, 0xbe822c65,
4370         0xbe832c65, 0xbe842c65,
4371         0xbe852c65, 0xb77c0005,
4372         0x80808500, 0xbf84fff8,
4373         0xbe800080, 0xbf810000,
4374 };
4375
4376 static const u32 vgpr_init_compute_shader_arcturus[] = {
4377         0xd3d94000, 0x18000080, 0xd3d94001, 0x18000080, 0xd3d94002, 0x18000080,
4378         0xd3d94003, 0x18000080, 0xd3d94004, 0x18000080, 0xd3d94005, 0x18000080,
4379         0xd3d94006, 0x18000080, 0xd3d94007, 0x18000080, 0xd3d94008, 0x18000080,
4380         0xd3d94009, 0x18000080, 0xd3d9400a, 0x18000080, 0xd3d9400b, 0x18000080,
4381         0xd3d9400c, 0x18000080, 0xd3d9400d, 0x18000080, 0xd3d9400e, 0x18000080,
4382         0xd3d9400f, 0x18000080, 0xd3d94010, 0x18000080, 0xd3d94011, 0x18000080,
4383         0xd3d94012, 0x18000080, 0xd3d94013, 0x18000080, 0xd3d94014, 0x18000080,
4384         0xd3d94015, 0x18000080, 0xd3d94016, 0x18000080, 0xd3d94017, 0x18000080,
4385         0xd3d94018, 0x18000080, 0xd3d94019, 0x18000080, 0xd3d9401a, 0x18000080,
4386         0xd3d9401b, 0x18000080, 0xd3d9401c, 0x18000080, 0xd3d9401d, 0x18000080,
4387         0xd3d9401e, 0x18000080, 0xd3d9401f, 0x18000080, 0xd3d94020, 0x18000080,
4388         0xd3d94021, 0x18000080, 0xd3d94022, 0x18000080, 0xd3d94023, 0x18000080,
4389         0xd3d94024, 0x18000080, 0xd3d94025, 0x18000080, 0xd3d94026, 0x18000080,
4390         0xd3d94027, 0x18000080, 0xd3d94028, 0x18000080, 0xd3d94029, 0x18000080,
4391         0xd3d9402a, 0x18000080, 0xd3d9402b, 0x18000080, 0xd3d9402c, 0x18000080,
4392         0xd3d9402d, 0x18000080, 0xd3d9402e, 0x18000080, 0xd3d9402f, 0x18000080,
4393         0xd3d94030, 0x18000080, 0xd3d94031, 0x18000080, 0xd3d94032, 0x18000080,
4394         0xd3d94033, 0x18000080, 0xd3d94034, 0x18000080, 0xd3d94035, 0x18000080,
4395         0xd3d94036, 0x18000080, 0xd3d94037, 0x18000080, 0xd3d94038, 0x18000080,
4396         0xd3d94039, 0x18000080, 0xd3d9403a, 0x18000080, 0xd3d9403b, 0x18000080,
4397         0xd3d9403c, 0x18000080, 0xd3d9403d, 0x18000080, 0xd3d9403e, 0x18000080,
4398         0xd3d9403f, 0x18000080, 0xd3d94040, 0x18000080, 0xd3d94041, 0x18000080,
4399         0xd3d94042, 0x18000080, 0xd3d94043, 0x18000080, 0xd3d94044, 0x18000080,
4400         0xd3d94045, 0x18000080, 0xd3d94046, 0x18000080, 0xd3d94047, 0x18000080,
4401         0xd3d94048, 0x18000080, 0xd3d94049, 0x18000080, 0xd3d9404a, 0x18000080,
4402         0xd3d9404b, 0x18000080, 0xd3d9404c, 0x18000080, 0xd3d9404d, 0x18000080,
4403         0xd3d9404e, 0x18000080, 0xd3d9404f, 0x18000080, 0xd3d94050, 0x18000080,
4404         0xd3d94051, 0x18000080, 0xd3d94052, 0x18000080, 0xd3d94053, 0x18000080,
4405         0xd3d94054, 0x18000080, 0xd3d94055, 0x18000080, 0xd3d94056, 0x18000080,
4406         0xd3d94057, 0x18000080, 0xd3d94058, 0x18000080, 0xd3d94059, 0x18000080,
4407         0xd3d9405a, 0x18000080, 0xd3d9405b, 0x18000080, 0xd3d9405c, 0x18000080,
4408         0xd3d9405d, 0x18000080, 0xd3d9405e, 0x18000080, 0xd3d9405f, 0x18000080,
4409         0xd3d94060, 0x18000080, 0xd3d94061, 0x18000080, 0xd3d94062, 0x18000080,
4410         0xd3d94063, 0x18000080, 0xd3d94064, 0x18000080, 0xd3d94065, 0x18000080,
4411         0xd3d94066, 0x18000080, 0xd3d94067, 0x18000080, 0xd3d94068, 0x18000080,
4412         0xd3d94069, 0x18000080, 0xd3d9406a, 0x18000080, 0xd3d9406b, 0x18000080,
4413         0xd3d9406c, 0x18000080, 0xd3d9406d, 0x18000080, 0xd3d9406e, 0x18000080,
4414         0xd3d9406f, 0x18000080, 0xd3d94070, 0x18000080, 0xd3d94071, 0x18000080,
4415         0xd3d94072, 0x18000080, 0xd3d94073, 0x18000080, 0xd3d94074, 0x18000080,
4416         0xd3d94075, 0x18000080, 0xd3d94076, 0x18000080, 0xd3d94077, 0x18000080,
4417         0xd3d94078, 0x18000080, 0xd3d94079, 0x18000080, 0xd3d9407a, 0x18000080,
4418         0xd3d9407b, 0x18000080, 0xd3d9407c, 0x18000080, 0xd3d9407d, 0x18000080,
4419         0xd3d9407e, 0x18000080, 0xd3d9407f, 0x18000080, 0xd3d94080, 0x18000080,
4420         0xd3d94081, 0x18000080, 0xd3d94082, 0x18000080, 0xd3d94083, 0x18000080,
4421         0xd3d94084, 0x18000080, 0xd3d94085, 0x18000080, 0xd3d94086, 0x18000080,
4422         0xd3d94087, 0x18000080, 0xd3d94088, 0x18000080, 0xd3d94089, 0x18000080,
4423         0xd3d9408a, 0x18000080, 0xd3d9408b, 0x18000080, 0xd3d9408c, 0x18000080,
4424         0xd3d9408d, 0x18000080, 0xd3d9408e, 0x18000080, 0xd3d9408f, 0x18000080,
4425         0xd3d94090, 0x18000080, 0xd3d94091, 0x18000080, 0xd3d94092, 0x18000080,
4426         0xd3d94093, 0x18000080, 0xd3d94094, 0x18000080, 0xd3d94095, 0x18000080,
4427         0xd3d94096, 0x18000080, 0xd3d94097, 0x18000080, 0xd3d94098, 0x18000080,
4428         0xd3d94099, 0x18000080, 0xd3d9409a, 0x18000080, 0xd3d9409b, 0x18000080,
4429         0xd3d9409c, 0x18000080, 0xd3d9409d, 0x18000080, 0xd3d9409e, 0x18000080,
4430         0xd3d9409f, 0x18000080, 0xd3d940a0, 0x18000080, 0xd3d940a1, 0x18000080,
4431         0xd3d940a2, 0x18000080, 0xd3d940a3, 0x18000080, 0xd3d940a4, 0x18000080,
4432         0xd3d940a5, 0x18000080, 0xd3d940a6, 0x18000080, 0xd3d940a7, 0x18000080,
4433         0xd3d940a8, 0x18000080, 0xd3d940a9, 0x18000080, 0xd3d940aa, 0x18000080,
4434         0xd3d940ab, 0x18000080, 0xd3d940ac, 0x18000080, 0xd3d940ad, 0x18000080,
4435         0xd3d940ae, 0x18000080, 0xd3d940af, 0x18000080, 0xd3d940b0, 0x18000080,
4436         0xd3d940b1, 0x18000080, 0xd3d940b2, 0x18000080, 0xd3d940b3, 0x18000080,
4437         0xd3d940b4, 0x18000080, 0xd3d940b5, 0x18000080, 0xd3d940b6, 0x18000080,
4438         0xd3d940b7, 0x18000080, 0xd3d940b8, 0x18000080, 0xd3d940b9, 0x18000080,
4439         0xd3d940ba, 0x18000080, 0xd3d940bb, 0x18000080, 0xd3d940bc, 0x18000080,
4440         0xd3d940bd, 0x18000080, 0xd3d940be, 0x18000080, 0xd3d940bf, 0x18000080,
4441         0xd3d940c0, 0x18000080, 0xd3d940c1, 0x18000080, 0xd3d940c2, 0x18000080,
4442         0xd3d940c3, 0x18000080, 0xd3d940c4, 0x18000080, 0xd3d940c5, 0x18000080,
4443         0xd3d940c6, 0x18000080, 0xd3d940c7, 0x18000080, 0xd3d940c8, 0x18000080,
4444         0xd3d940c9, 0x18000080, 0xd3d940ca, 0x18000080, 0xd3d940cb, 0x18000080,
4445         0xd3d940cc, 0x18000080, 0xd3d940cd, 0x18000080, 0xd3d940ce, 0x18000080,
4446         0xd3d940cf, 0x18000080, 0xd3d940d0, 0x18000080, 0xd3d940d1, 0x18000080,
4447         0xd3d940d2, 0x18000080, 0xd3d940d3, 0x18000080, 0xd3d940d4, 0x18000080,
4448         0xd3d940d5, 0x18000080, 0xd3d940d6, 0x18000080, 0xd3d940d7, 0x18000080,
4449         0xd3d940d8, 0x18000080, 0xd3d940d9, 0x18000080, 0xd3d940da, 0x18000080,
4450         0xd3d940db, 0x18000080, 0xd3d940dc, 0x18000080, 0xd3d940dd, 0x18000080,
4451         0xd3d940de, 0x18000080, 0xd3d940df, 0x18000080, 0xd3d940e0, 0x18000080,
4452         0xd3d940e1, 0x18000080, 0xd3d940e2, 0x18000080, 0xd3d940e3, 0x18000080,
4453         0xd3d940e4, 0x18000080, 0xd3d940e5, 0x18000080, 0xd3d940e6, 0x18000080,
4454         0xd3d940e7, 0x18000080, 0xd3d940e8, 0x18000080, 0xd3d940e9, 0x18000080,
4455         0xd3d940ea, 0x18000080, 0xd3d940eb, 0x18000080, 0xd3d940ec, 0x18000080,
4456         0xd3d940ed, 0x18000080, 0xd3d940ee, 0x18000080, 0xd3d940ef, 0x18000080,
4457         0xd3d940f0, 0x18000080, 0xd3d940f1, 0x18000080, 0xd3d940f2, 0x18000080,
4458         0xd3d940f3, 0x18000080, 0xd3d940f4, 0x18000080, 0xd3d940f5, 0x18000080,
4459         0xd3d940f6, 0x18000080, 0xd3d940f7, 0x18000080, 0xd3d940f8, 0x18000080,
4460         0xd3d940f9, 0x18000080, 0xd3d940fa, 0x18000080, 0xd3d940fb, 0x18000080,
4461         0xd3d940fc, 0x18000080, 0xd3d940fd, 0x18000080, 0xd3d940fe, 0x18000080,
4462         0xd3d940ff, 0x18000080, 0xb07c0000, 0xbe8a00ff, 0x000000f8, 0xbf11080a,
4463         0x7e000280, 0x7e020280, 0x7e040280, 0x7e060280, 0x7e080280, 0x7e0a0280,
4464         0x7e0c0280, 0x7e0e0280, 0x808a880a, 0xbe80320a, 0xbf84fff5, 0xbf9c0000,
4465         0xd28c0001, 0x0001007f, 0xd28d0001, 0x0002027e, 0x10020288, 0xb88b0904,
4466         0xb78b4000, 0xd1196a01, 0x00001701, 0xbe8a0087, 0xbefc00c1, 0xd89c4000,
4467         0x00020201, 0xd89cc080, 0x00040401, 0x320202ff, 0x00000800, 0x808a810a,
4468         0xbf84fff8, 0xbf810000,
4469 };
4470
4471 /* When below register arrays changed, please update gpr_reg_size,
4472   and sec_ded_counter_reg_size in function gfx_v9_0_do_edc_gpr_workarounds,
4473   to cover all gfx9 ASICs */
4474 static const struct soc15_reg_entry vgpr_init_regs[] = {
4475    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
4476    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 },
4477    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 4 },
4478    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
4479    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x3f },
4480    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x400000 },  /* 64KB LDS */
4481    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff },
4482    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff },
4483    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff },
4484    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff },
4485    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0xffffffff },
4486    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0xffffffff },
4487    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0xffffffff },
4488    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0xffffffff },
4489 };
4490
4491 static const struct soc15_reg_entry vgpr_init_regs_arcturus[] = {
4492    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
4493    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 },
4494    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 4 },
4495    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
4496    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0xbf },
4497    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x400000 },  /* 64KB LDS */
4498    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff },
4499    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff },
4500    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff },
4501    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff },
4502    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0xffffffff },
4503    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0xffffffff },
4504    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0xffffffff },
4505    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0xffffffff },
4506 };
4507
4508 static const struct soc15_reg_entry sgpr1_init_regs[] = {
4509    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
4510    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 },
4511    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 8 },
4512    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
4513    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x240 }, /* (80 GPRS) */
4514    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x0 },
4515    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0x000000ff },
4516    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0x000000ff },
4517    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0x000000ff },
4518    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0x000000ff },
4519    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0x000000ff },
4520    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0x000000ff },
4521    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0x000000ff },
4522    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0x000000ff },
4523 };
4524
4525 static const struct soc15_reg_entry sgpr2_init_regs[] = {
4526    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
4527    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 },
4528    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 8 },
4529    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
4530    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x240 }, /* (80 GPRS) */
4531    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x0 },
4532    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0x0000ff00 },
4533    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0x0000ff00 },
4534    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0x0000ff00 },
4535    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0x0000ff00 },
4536    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0x0000ff00 },
4537    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0x0000ff00 },
4538    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0x0000ff00 },
4539    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0x0000ff00 },
4540 };
4541
4542 static const struct soc15_reg_entry gfx_v9_0_edc_counter_regs[] = {
4543    { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT), 0, 1, 1},
4544    { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT), 0, 1, 1},
4545    { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT), 0, 1, 1},
4546    { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT), 0, 1, 1},
4547    { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT), 0, 1, 1},
4548    { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT), 0, 1, 1},
4549    { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT), 0, 1, 1},
4550    { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT), 0, 1, 1},
4551    { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT), 0, 1, 1},
4552    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT), 0, 1, 1},
4553    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_GRBM_CNT), 0, 1, 1},
4554    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_DED), 0, 1, 1},
4555    { SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT), 0, 4, 1},
4556    { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 0, 4, 6},
4557    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_DED_CNT), 0, 4, 16},
4558    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_INFO), 0, 4, 16},
4559    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_SEC_CNT), 0, 4, 16},
4560    { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 1, 16},
4561    { SOC15_REG_ENTRY(GC, 0, mmTCP_ATC_EDC_GATCL1_CNT), 0, 4, 16},
4562    { SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT), 0, 4, 16},
4563    { SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 0, 4, 16},
4564    { SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 0, 4, 16},
4565    { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 0, 4, 6},
4566    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 0, 4, 16},
4567    { SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 0, 4, 16},
4568    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), 0, 1, 1},
4569    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 0, 1, 1},
4570    { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 1, 32},
4571    { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 1, 32},
4572    { SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT), 0, 1, 72},
4573    { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0, 1, 16},
4574    { SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT), 0, 1, 2},
4575    { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 0, 4, 6},
4576 };
4577
4578 static int gfx_v9_0_do_edc_gds_workarounds(struct amdgpu_device *adev)
4579 {
4580         struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
4581         int i, r;
4582
4583         /* only support when RAS is enabled */
4584         if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
4585                 return 0;
4586
4587         r = amdgpu_ring_alloc(ring, 7);
4588         if (r) {
4589                 DRM_ERROR("amdgpu: GDS workarounds failed to lock ring %s (%d).\n",
4590                         ring->name, r);
4591                 return r;
4592         }
4593
4594         WREG32_SOC15(GC, 0, mmGDS_VMID0_BASE, 0x00000000);
4595         WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, adev->gds.gds_size);
4596
4597         amdgpu_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
4598         amdgpu_ring_write(ring, (PACKET3_DMA_DATA_CP_SYNC |
4599                                 PACKET3_DMA_DATA_DST_SEL(1) |
4600                                 PACKET3_DMA_DATA_SRC_SEL(2) |
4601                                 PACKET3_DMA_DATA_ENGINE(0)));
4602         amdgpu_ring_write(ring, 0);
4603         amdgpu_ring_write(ring, 0);
4604         amdgpu_ring_write(ring, 0);
4605         amdgpu_ring_write(ring, 0);
4606         amdgpu_ring_write(ring, PACKET3_DMA_DATA_CMD_RAW_WAIT |
4607                                 adev->gds.gds_size);
4608
4609         amdgpu_ring_commit(ring);
4610
4611         for (i = 0; i < adev->usec_timeout; i++) {
4612                 if (ring->wptr == gfx_v9_0_ring_get_rptr_compute(ring))
4613                         break;
4614                 udelay(1);
4615         }
4616
4617         if (i >= adev->usec_timeout)
4618                 r = -ETIMEDOUT;
4619
4620         WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, 0x00000000);
4621
4622         return r;
4623 }
4624
4625 static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
4626 {
4627         struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
4628         struct amdgpu_ib ib;
4629         struct dma_fence *f = NULL;
4630         int r, i;
4631         unsigned total_size, vgpr_offset, sgpr_offset;
4632         u64 gpu_addr;
4633
4634         int compute_dim_x = adev->gfx.config.max_shader_engines *
4635                                                 adev->gfx.config.max_cu_per_sh *
4636                                                 adev->gfx.config.max_sh_per_se;
4637         int sgpr_work_group_size = 5;
4638         int gpr_reg_size = adev->gfx.config.max_shader_engines + 6;
4639         int vgpr_init_shader_size;
4640         const u32 *vgpr_init_shader_ptr;
4641         const struct soc15_reg_entry *vgpr_init_regs_ptr;
4642
4643         /* only support when RAS is enabled */
4644         if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
4645                 return 0;
4646
4647         /* bail if the compute ring is not ready */
4648         if (!ring->sched.ready)
4649                 return 0;
4650
4651         if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 1)) {
4652                 vgpr_init_shader_ptr = vgpr_init_compute_shader_arcturus;
4653                 vgpr_init_shader_size = sizeof(vgpr_init_compute_shader_arcturus);
4654                 vgpr_init_regs_ptr = vgpr_init_regs_arcturus;
4655         } else {
4656                 vgpr_init_shader_ptr = vgpr_init_compute_shader;
4657                 vgpr_init_shader_size = sizeof(vgpr_init_compute_shader);
4658                 vgpr_init_regs_ptr = vgpr_init_regs;
4659         }
4660
4661         total_size =
4662                 (gpr_reg_size * 3 + 4 + 5 + 2) * 4; /* VGPRS */
4663         total_size +=
4664                 (gpr_reg_size * 3 + 4 + 5 + 2) * 4; /* SGPRS1 */
4665         total_size +=
4666                 (gpr_reg_size * 3 + 4 + 5 + 2) * 4; /* SGPRS2 */
4667         total_size = ALIGN(total_size, 256);
4668         vgpr_offset = total_size;
4669         total_size += ALIGN(vgpr_init_shader_size, 256);
4670         sgpr_offset = total_size;
4671         total_size += sizeof(sgpr_init_compute_shader);
4672
4673         /* allocate an indirect buffer to put the commands in */
4674         memset(&ib, 0, sizeof(ib));
4675         r = amdgpu_ib_get(adev, NULL, total_size,
4676                                         AMDGPU_IB_POOL_DIRECT, &ib);
4677         if (r) {
4678                 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
4679                 return r;
4680         }
4681
4682         /* load the compute shaders */
4683         for (i = 0; i < vgpr_init_shader_size/sizeof(u32); i++)
4684                 ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_shader_ptr[i];
4685
4686         for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
4687                 ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
4688
4689         /* init the ib length to 0 */
4690         ib.length_dw = 0;
4691
4692         /* VGPR */
4693         /* write the register state for the compute dispatch */
4694         for (i = 0; i < gpr_reg_size; i++) {
4695                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
4696                 ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(vgpr_init_regs_ptr[i])
4697                                                                 - PACKET3_SET_SH_REG_START;
4698                 ib.ptr[ib.length_dw++] = vgpr_init_regs_ptr[i].reg_value;
4699         }
4700         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
4701         gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
4702         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
4703         ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
4704                                                         - PACKET3_SET_SH_REG_START;
4705         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
4706         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
4707
4708         /* write dispatch packet */
4709         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
4710         ib.ptr[ib.length_dw++] = compute_dim_x * 2; /* x */
4711         ib.ptr[ib.length_dw++] = 1; /* y */
4712         ib.ptr[ib.length_dw++] = 1; /* z */
4713         ib.ptr[ib.length_dw++] =
4714                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
4715
4716         /* write CS partial flush packet */
4717         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
4718         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
4719
4720         /* SGPR1 */
4721         /* write the register state for the compute dispatch */
4722         for (i = 0; i < gpr_reg_size; i++) {
4723                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
4724                 ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(sgpr1_init_regs[i])
4725                                                                 - PACKET3_SET_SH_REG_START;
4726                 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i].reg_value;
4727         }
4728         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
4729         gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
4730         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
4731         ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
4732                                                         - PACKET3_SET_SH_REG_START;
4733         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
4734         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
4735
4736         /* write dispatch packet */
4737         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
4738         ib.ptr[ib.length_dw++] = compute_dim_x / 2 * sgpr_work_group_size; /* x */
4739         ib.ptr[ib.length_dw++] = 1; /* y */
4740         ib.ptr[ib.length_dw++] = 1; /* z */
4741         ib.ptr[ib.length_dw++] =
4742                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
4743
4744         /* write CS partial flush packet */
4745         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
4746         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
4747
4748         /* SGPR2 */
4749         /* write the register state for the compute dispatch */
4750         for (i = 0; i < gpr_reg_size; i++) {
4751                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
4752                 ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(sgpr2_init_regs[i])
4753                                                                 - PACKET3_SET_SH_REG_START;
4754                 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i].reg_value;
4755         }
4756         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
4757         gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
4758         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
4759         ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
4760                                                         - PACKET3_SET_SH_REG_START;
4761         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
4762         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
4763
4764         /* write dispatch packet */
4765         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
4766         ib.ptr[ib.length_dw++] = compute_dim_x / 2 * sgpr_work_group_size; /* x */
4767         ib.ptr[ib.length_dw++] = 1; /* y */
4768         ib.ptr[ib.length_dw++] = 1; /* z */
4769         ib.ptr[ib.length_dw++] =
4770                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
4771
4772         /* write CS partial flush packet */
4773         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
4774         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
4775
4776         /* shedule the ib on the ring */
4777         r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
4778         if (r) {
4779                 DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
4780                 goto fail;
4781         }
4782
4783         /* wait for the GPU to finish processing the IB */
4784         r = dma_fence_wait(f, false);
4785         if (r) {
4786                 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
4787                 goto fail;
4788         }
4789
4790 fail:
4791         amdgpu_ib_free(&ib, NULL);
4792         dma_fence_put(f);
4793
4794         return r;
4795 }
4796
4797 static int gfx_v9_0_early_init(struct amdgpu_ip_block *ip_block)
4798 {
4799         struct amdgpu_device *adev = ip_block->adev;
4800
4801         adev->gfx.funcs = &gfx_v9_0_gfx_funcs;
4802
4803         if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 1) ||
4804             amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 2))
4805                 adev->gfx.num_gfx_rings = 0;
4806         else
4807                 adev->gfx.num_gfx_rings = GFX9_NUM_GFX_RINGS;
4808         adev->gfx.xcc_mask = 1;
4809         adev->gfx.num_compute_rings = min(amdgpu_gfx_get_num_kcq(adev),
4810                                           AMDGPU_MAX_COMPUTE_RINGS);
4811         gfx_v9_0_set_kiq_pm4_funcs(adev);
4812         gfx_v9_0_set_ring_funcs(adev);
4813         gfx_v9_0_set_irq_funcs(adev);
4814         gfx_v9_0_set_gds_init(adev);
4815         gfx_v9_0_set_rlc_funcs(adev);
4816
4817         /* init rlcg reg access ctrl */
4818         gfx_v9_0_init_rlcg_reg_access_ctrl(adev);
4819
4820         return gfx_v9_0_init_microcode(adev);
4821 }
4822
4823 static int gfx_v9_0_ecc_late_init(struct amdgpu_ip_block *ip_block)
4824 {
4825         struct amdgpu_device *adev = ip_block->adev;
4826         int r;
4827
4828         /*
4829          * Temp workaround to fix the issue that CP firmware fails to
4830          * update read pointer when CPDMA is writing clearing operation
4831          * to GDS in suspend/resume sequence on several cards. So just
4832          * limit this operation in cold boot sequence.
4833          */
4834         if ((!adev->in_suspend) &&
4835             (adev->gds.gds_size)) {
4836                 r = gfx_v9_0_do_edc_gds_workarounds(adev);
4837                 if (r)
4838                         return r;
4839         }
4840
4841         /* requires IBs so do in late init after IB pool is initialized */
4842         if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 2))
4843                 r = gfx_v9_4_2_do_edc_gpr_workarounds(adev);
4844         else
4845                 r = gfx_v9_0_do_edc_gpr_workarounds(adev);
4846
4847         if (r)
4848                 return r;
4849
4850         if (adev->gfx.ras &&
4851             adev->gfx.ras->enable_watchdog_timer)
4852                 adev->gfx.ras->enable_watchdog_timer(adev);
4853
4854         return 0;
4855 }
4856
4857 static int gfx_v9_0_late_init(struct amdgpu_ip_block *ip_block)
4858 {
4859         struct amdgpu_device *adev = ip_block->adev;
4860         int r;
4861
4862         r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
4863         if (r)
4864                 return r;
4865
4866         r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
4867         if (r)
4868                 return r;
4869
4870         r = amdgpu_irq_get(adev, &adev->gfx.bad_op_irq, 0);
4871         if (r)
4872                 return r;
4873
4874         r = gfx_v9_0_ecc_late_init(ip_block);
4875         if (r)
4876                 return r;
4877
4878         if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 2))
4879                 gfx_v9_4_2_debug_trap_config_init(adev,
4880                         adev->vm_manager.first_kfd_vmid, AMDGPU_NUM_VMID);
4881         else
4882                 gfx_v9_0_debug_trap_config_init(adev,
4883                         adev->vm_manager.first_kfd_vmid, AMDGPU_NUM_VMID);
4884
4885         return 0;
4886 }
4887
4888 static bool gfx_v9_0_is_rlc_enabled(struct amdgpu_device *adev)
4889 {
4890         uint32_t rlc_setting;
4891
4892         /* if RLC is not enabled, do nothing */
4893         rlc_setting = RREG32_SOC15(GC, 0, mmRLC_CNTL);
4894         if (!(rlc_setting & RLC_CNTL__RLC_ENABLE_F32_MASK))
4895                 return false;
4896
4897         return true;
4898 }
4899
4900 static void gfx_v9_0_set_safe_mode(struct amdgpu_device *adev, int xcc_id)
4901 {
4902         uint32_t data;
4903         unsigned i;
4904
4905         data = RLC_SAFE_MODE__CMD_MASK;
4906         data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
4907         WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data);
4908
4909         /* wait for RLC_SAFE_MODE */
4910         for (i = 0; i < adev->usec_timeout; i++) {
4911                 if (!REG_GET_FIELD(RREG32_SOC15(GC, 0, mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
4912                         break;
4913                 udelay(1);
4914         }
4915 }
4916
4917 static void gfx_v9_0_unset_safe_mode(struct amdgpu_device *adev, int xcc_id)
4918 {
4919         uint32_t data;
4920
4921         data = RLC_SAFE_MODE__CMD_MASK;
4922         WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data);
4923 }
4924
4925 static void gfx_v9_0_update_gfx_cg_power_gating(struct amdgpu_device *adev,
4926                                                 bool enable)
4927 {
4928         amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
4929
4930         if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
4931                 gfx_v9_0_enable_gfx_cg_power_gating(adev, true);
4932                 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
4933                         gfx_v9_0_enable_gfx_pipeline_powergating(adev, true);
4934         } else {
4935                 gfx_v9_0_enable_gfx_cg_power_gating(adev, false);
4936                 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
4937                         gfx_v9_0_enable_gfx_pipeline_powergating(adev, false);
4938         }
4939
4940         amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
4941 }
4942
4943 static void gfx_v9_0_update_gfx_mg_power_gating(struct amdgpu_device *adev,
4944                                                 bool enable)
4945 {
4946         /* TODO: double check if we need to perform under safe mode */
4947         /* gfx_v9_0_enter_rlc_safe_mode(adev); */
4948
4949         if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
4950                 gfx_v9_0_enable_gfx_static_mg_power_gating(adev, true);
4951         else
4952                 gfx_v9_0_enable_gfx_static_mg_power_gating(adev, false);
4953
4954         if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
4955                 gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, true);
4956         else
4957                 gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, false);
4958
4959         /* gfx_v9_0_exit_rlc_safe_mode(adev); */
4960 }
4961
4962 static void gfx_v9_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
4963                                                       bool enable)
4964 {
4965         uint32_t data, def;
4966
4967         amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
4968
4969         /* It is disabled by HW by default */
4970         if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
4971                 /* 1 - RLC_CGTT_MGCG_OVERRIDE */
4972                 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4973
4974                 if (amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 2, 1))
4975                         data &= ~RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK;
4976
4977                 data &= ~(RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
4978                           RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK |
4979                           RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK);
4980
4981                 /* only for Vega10 & Raven1 */
4982                 data |= RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK;
4983
4984                 if (def != data)
4985                         WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4986
4987                 /* MGLS is a global flag to control all MGLS in GFX */
4988                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
4989                         /* 2 - RLC memory Light sleep */
4990                         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
4991                                 def = data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
4992                                 data |= RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
4993                                 if (def != data)
4994                                         WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data);
4995                         }
4996                         /* 3 - CP memory Light sleep */
4997                         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
4998                                 def = data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
4999                                 data |= CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
5000                                 if (def != data)
5001                                         WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data);
5002                         }
5003                 }
5004         } else {
5005                 /* 1 - MGCG_OVERRIDE */
5006                 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
5007
5008                 if (amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 2, 1))
5009                         data |= RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK;
5010
5011                 data |= (RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK |
5012                          RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
5013                          RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK |
5014                          RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK);
5015
5016                 if (def != data)
5017                         WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
5018
5019                 /* 2 - disable MGLS in RLC */
5020                 data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
5021                 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
5022                         data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
5023                         WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data);
5024                 }
5025
5026                 /* 3 - disable MGLS in CP */
5027                 data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
5028                 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
5029                         data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
5030                         WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data);
5031                 }
5032         }
5033
5034         amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
5035 }
5036
5037 static void gfx_v9_0_update_3d_clock_gating(struct amdgpu_device *adev,
5038                                            bool enable)
5039 {
5040         uint32_t data, def;
5041
5042         if (!adev->gfx.num_gfx_rings)
5043                 return;
5044
5045         amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
5046
5047         /* Enable 3D CGCG/CGLS */
5048         if (enable) {
5049                 /* write cmd to clear cgcg/cgls ov */
5050                 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
5051                 /* unset CGCG override */
5052                 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_GFX3D_CG_OVERRIDE_MASK;
5053                 /* update CGCG and CGLS override bits */
5054                 if (def != data)
5055                         WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
5056
5057                 /* enable 3Dcgcg FSM(0x0000363f) */
5058                 def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
5059
5060                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG)
5061                         data = (0x36 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
5062                                 RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK;
5063                 else
5064                         data = 0x0 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT;
5065
5066                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS)
5067                         data |= (0x000F << RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
5068                                 RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK;
5069                 if (def != data)
5070                         WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data);
5071
5072                 /* set IDLE_POLL_COUNT(0x00900100) */
5073                 def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL);
5074                 data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
5075                         (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
5076                 if (def != data)
5077                         WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data);
5078         } else {
5079                 /* Disable CGCG/CGLS */
5080                 def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
5081                 /* disable cgcg, cgls should be disabled */
5082                 data &= ~(RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK |
5083                           RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK);
5084                 /* disable cgcg and cgls in FSM */
5085                 if (def != data)
5086                         WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data);
5087         }
5088
5089         amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
5090 }
5091
5092 static void gfx_v9_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
5093                                                       bool enable)
5094 {
5095         uint32_t def, data;
5096
5097         amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
5098
5099         if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
5100                 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
5101                 /* unset CGCG override */
5102                 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGCG_OVERRIDE_MASK;
5103                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
5104                         data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
5105                 else
5106                         data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
5107                 /* update CGCG and CGLS override bits */
5108                 if (def != data)
5109                         WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
5110
5111                 /* enable cgcg FSM(0x0000363F) */
5112                 def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
5113
5114                 if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 1))
5115                         data = (0x2000 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
5116                                 RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
5117                 else
5118                         data = (0x36 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
5119                                 RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
5120                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
5121                         data |= (0x000F << RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
5122                                 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5123                 if (def != data)
5124                         WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data);
5125
5126                 /* set IDLE_POLL_COUNT(0x00900100) */
5127                 def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL);
5128                 data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
5129                         (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
5130                 if (def != data)
5131                         WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data);
5132         } else {
5133                 def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
5134                 /* reset CGCG/CGLS bits */
5135                 data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
5136                 /* disable cgcg and cgls in FSM */
5137                 if (def != data)
5138                         WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data);
5139         }
5140
5141         amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
5142 }
5143
5144 static int gfx_v9_0_update_gfx_clock_gating(struct amdgpu_device *adev,
5145                                             bool enable)
5146 {
5147         if (enable) {
5148                 /* CGCG/CGLS should be enabled after MGCG/MGLS
5149                  * ===  MGCG + MGLS ===
5150                  */
5151                 gfx_v9_0_update_medium_grain_clock_gating(adev, enable);
5152                 /* ===  CGCG /CGLS for GFX 3D Only === */
5153                 gfx_v9_0_update_3d_clock_gating(adev, enable);
5154                 /* ===  CGCG + CGLS === */
5155                 gfx_v9_0_update_coarse_grain_clock_gating(adev, enable);
5156         } else {
5157                 /* CGCG/CGLS should be disabled before MGCG/MGLS
5158                  * ===  CGCG + CGLS ===
5159                  */
5160                 gfx_v9_0_update_coarse_grain_clock_gating(adev, enable);
5161                 /* ===  CGCG /CGLS for GFX 3D Only === */
5162                 gfx_v9_0_update_3d_clock_gating(adev, enable);
5163                 /* ===  MGCG + MGLS === */
5164                 gfx_v9_0_update_medium_grain_clock_gating(adev, enable);
5165         }
5166         return 0;
5167 }
5168
5169 static void gfx_v9_0_update_spm_vmid_internal(struct amdgpu_device *adev,
5170                                               unsigned int vmid)
5171 {
5172         u32 reg, data;
5173
5174         reg = SOC15_REG_OFFSET(GC, 0, mmRLC_SPM_MC_CNTL);
5175         if (amdgpu_sriov_is_pp_one_vf(adev))
5176                 data = RREG32_NO_KIQ(reg);
5177         else
5178                 data = RREG32_SOC15(GC, 0, mmRLC_SPM_MC_CNTL);
5179
5180         data &= ~RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK;
5181         data |= (vmid & RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK) << RLC_SPM_MC_CNTL__RLC_SPM_VMID__SHIFT;
5182
5183         if (amdgpu_sriov_is_pp_one_vf(adev))
5184                 WREG32_SOC15_NO_KIQ(GC, 0, mmRLC_SPM_MC_CNTL, data);
5185         else
5186                 WREG32_SOC15(GC, 0, mmRLC_SPM_MC_CNTL, data);
5187 }
5188
5189 static void gfx_v9_0_update_spm_vmid(struct amdgpu_device *adev, struct amdgpu_ring *ring, unsigned int vmid)
5190 {
5191         amdgpu_gfx_off_ctrl(adev, false);
5192
5193         gfx_v9_0_update_spm_vmid_internal(adev, vmid);
5194
5195         amdgpu_gfx_off_ctrl(adev, true);
5196 }
5197
5198 static bool gfx_v9_0_check_rlcg_range(struct amdgpu_device *adev,
5199                                         uint32_t offset,
5200                                         struct soc15_reg_rlcg *entries, int arr_size)
5201 {
5202         int i;
5203         uint32_t reg;
5204
5205         if (!entries)
5206                 return false;
5207
5208         for (i = 0; i < arr_size; i++) {
5209                 const struct soc15_reg_rlcg *entry;
5210
5211                 entry = &entries[i];
5212                 reg = adev->reg_offset[entry->hwip][entry->instance][entry->segment] + entry->reg;
5213                 if (offset == reg)
5214                         return true;
5215         }
5216
5217         return false;
5218 }
5219
5220 static bool gfx_v9_0_is_rlcg_access_range(struct amdgpu_device *adev, u32 offset)
5221 {
5222         return gfx_v9_0_check_rlcg_range(adev, offset,
5223                                         (void *)rlcg_access_gc_9_0,
5224                                         ARRAY_SIZE(rlcg_access_gc_9_0));
5225 }
5226
5227 static const struct amdgpu_rlc_funcs gfx_v9_0_rlc_funcs = {
5228         .is_rlc_enabled = gfx_v9_0_is_rlc_enabled,
5229         .set_safe_mode = gfx_v9_0_set_safe_mode,
5230         .unset_safe_mode = gfx_v9_0_unset_safe_mode,
5231         .init = gfx_v9_0_rlc_init,
5232         .get_csb_size = gfx_v9_0_get_csb_size,
5233         .get_csb_buffer = gfx_v9_0_get_csb_buffer,
5234         .get_cp_table_num = gfx_v9_0_cp_jump_table_num,
5235         .resume = gfx_v9_0_rlc_resume,
5236         .stop = gfx_v9_0_rlc_stop,
5237         .reset = gfx_v9_0_rlc_reset,
5238         .start = gfx_v9_0_rlc_start,
5239         .update_spm_vmid = gfx_v9_0_update_spm_vmid,
5240         .is_rlcg_access_range = gfx_v9_0_is_rlcg_access_range,
5241 };
5242
5243 static int gfx_v9_0_set_powergating_state(struct amdgpu_ip_block *ip_block,
5244                                           enum amd_powergating_state state)
5245 {
5246         struct amdgpu_device *adev = ip_block->adev;
5247         bool enable = (state == AMD_PG_STATE_GATE);
5248
5249         switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
5250         case IP_VERSION(9, 2, 2):
5251         case IP_VERSION(9, 1, 0):
5252         case IP_VERSION(9, 3, 0):
5253                 if (!enable)
5254                         amdgpu_gfx_off_ctrl(adev, false);
5255
5256                 if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
5257                         gfx_v9_0_enable_sck_slow_down_on_power_up(adev, true);
5258                         gfx_v9_0_enable_sck_slow_down_on_power_down(adev, true);
5259                 } else {
5260                         gfx_v9_0_enable_sck_slow_down_on_power_up(adev, false);
5261                         gfx_v9_0_enable_sck_slow_down_on_power_down(adev, false);
5262                 }
5263
5264                 if (adev->pg_flags & AMD_PG_SUPPORT_CP)
5265                         gfx_v9_0_enable_cp_power_gating(adev, true);
5266                 else
5267                         gfx_v9_0_enable_cp_power_gating(adev, false);
5268
5269                 /* update gfx cgpg state */
5270                 gfx_v9_0_update_gfx_cg_power_gating(adev, enable);
5271
5272                 /* update mgcg state */
5273                 gfx_v9_0_update_gfx_mg_power_gating(adev, enable);
5274
5275                 if (enable)
5276                         amdgpu_gfx_off_ctrl(adev, true);
5277                 break;
5278         case IP_VERSION(9, 2, 1):
5279                 amdgpu_gfx_off_ctrl(adev, enable);
5280                 break;
5281         default:
5282                 break;
5283         }
5284
5285         return 0;
5286 }
5287
5288 static int gfx_v9_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
5289                                           enum amd_clockgating_state state)
5290 {
5291         struct amdgpu_device *adev = ip_block->adev;
5292
5293         if (amdgpu_sriov_vf(adev))
5294                 return 0;
5295
5296         switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
5297         case IP_VERSION(9, 0, 1):
5298         case IP_VERSION(9, 2, 1):
5299         case IP_VERSION(9, 4, 0):
5300         case IP_VERSION(9, 2, 2):
5301         case IP_VERSION(9, 1, 0):
5302         case IP_VERSION(9, 4, 1):
5303         case IP_VERSION(9, 3, 0):
5304         case IP_VERSION(9, 4, 2):
5305                 gfx_v9_0_update_gfx_clock_gating(adev,
5306                                                  state == AMD_CG_STATE_GATE);
5307                 break;
5308         default:
5309                 break;
5310         }
5311         return 0;
5312 }
5313
5314 static void gfx_v9_0_get_clockgating_state(void *handle, u64 *flags)
5315 {
5316         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5317         int data;
5318
5319         if (amdgpu_sriov_vf(adev))
5320                 *flags = 0;
5321
5322         /* AMD_CG_SUPPORT_GFX_MGCG */
5323         data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE));
5324         if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK))
5325                 *flags |= AMD_CG_SUPPORT_GFX_MGCG;
5326
5327         /* AMD_CG_SUPPORT_GFX_CGCG */
5328         data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL));
5329         if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
5330                 *flags |= AMD_CG_SUPPORT_GFX_CGCG;
5331
5332         /* AMD_CG_SUPPORT_GFX_CGLS */
5333         if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
5334                 *flags |= AMD_CG_SUPPORT_GFX_CGLS;
5335
5336         /* AMD_CG_SUPPORT_GFX_RLC_LS */
5337         data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_MEM_SLP_CNTL));
5338         if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK)
5339                 *flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS;
5340
5341         /* AMD_CG_SUPPORT_GFX_CP_LS */
5342         data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmCP_MEM_SLP_CNTL));
5343         if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK)
5344                 *flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS;
5345
5346         if (amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 4, 1)) {
5347                 /* AMD_CG_SUPPORT_GFX_3D_CGCG */
5348                 data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D));
5349                 if (data & RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK)
5350                         *flags |= AMD_CG_SUPPORT_GFX_3D_CGCG;
5351
5352                 /* AMD_CG_SUPPORT_GFX_3D_CGLS */
5353                 if (data & RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK)
5354                         *flags |= AMD_CG_SUPPORT_GFX_3D_CGLS;
5355         }
5356 }
5357
5358 static u64 gfx_v9_0_ring_get_rptr_gfx(struct amdgpu_ring *ring)
5359 {
5360         return *ring->rptr_cpu_addr; /* gfx9 is 32bit rptr*/
5361 }
5362
5363 static u64 gfx_v9_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
5364 {
5365         struct amdgpu_device *adev = ring->adev;
5366         u64 wptr;
5367
5368         /* XXX check if swapping is necessary on BE */
5369         if (ring->use_doorbell) {
5370                 wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr);
5371         } else {
5372                 wptr = RREG32_SOC15(GC, 0, mmCP_RB0_WPTR);
5373                 wptr += (u64)RREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI) << 32;
5374         }
5375
5376         return wptr;
5377 }
5378
5379 static void gfx_v9_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
5380 {
5381         struct amdgpu_device *adev = ring->adev;
5382
5383         if (ring->use_doorbell) {
5384                 /* XXX check if swapping is necessary on BE */
5385                 atomic64_set((atomic64_t *)ring->wptr_cpu_addr, ring->wptr);
5386                 WDOORBELL64(ring->doorbell_index, ring->wptr);
5387         } else {
5388                 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
5389                 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
5390         }
5391 }
5392
5393 static void gfx_v9_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
5394 {
5395         struct amdgpu_device *adev = ring->adev;
5396         u32 ref_and_mask, reg_mem_engine;
5397         const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio.hdp_flush_reg;
5398
5399         if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
5400                 switch (ring->me) {
5401                 case 1:
5402                         ref_and_mask = nbio_hf_reg->ref_and_mask_cp2 << ring->pipe;
5403                         break;
5404                 case 2:
5405                         ref_and_mask = nbio_hf_reg->ref_and_mask_cp6 << ring->pipe;
5406                         break;
5407                 default:
5408                         return;
5409                 }
5410                 reg_mem_engine = 0;
5411         } else {
5412                 ref_and_mask = nbio_hf_reg->ref_and_mask_cp0;
5413                 reg_mem_engine = 1; /* pfp */
5414         }
5415
5416         gfx_v9_0_wait_reg_mem(ring, reg_mem_engine, 0, 1,
5417                               adev->nbio.funcs->get_hdp_flush_req_offset(adev),
5418                               adev->nbio.funcs->get_hdp_flush_done_offset(adev),
5419                               ref_and_mask, ref_and_mask, 0x20);
5420 }
5421
5422 static void gfx_v9_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
5423                                         struct amdgpu_job *job,
5424                                         struct amdgpu_ib *ib,
5425                                         uint32_t flags)
5426 {
5427         unsigned vmid = AMDGPU_JOB_GET_VMID(job);
5428         u32 header, control = 0;
5429
5430         if (ib->flags & AMDGPU_IB_FLAG_CE)
5431                 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
5432         else
5433                 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
5434
5435         control |= ib->length_dw | (vmid << 24);
5436
5437         if (ib->flags & AMDGPU_IB_FLAG_PREEMPT) {
5438                 control |= INDIRECT_BUFFER_PRE_ENB(1);
5439
5440                 if (flags & AMDGPU_IB_PREEMPTED)
5441                         control |= INDIRECT_BUFFER_PRE_RESUME(1);
5442
5443                 if (!(ib->flags & AMDGPU_IB_FLAG_CE) && vmid)
5444                         gfx_v9_0_ring_emit_de_meta(ring,
5445                                                    (!amdgpu_sriov_vf(ring->adev) &&
5446                                                    flags & AMDGPU_IB_PREEMPTED) ?
5447                                                    true : false,
5448                                                    job->gds_size > 0 && job->gds_base != 0);
5449         }
5450
5451         amdgpu_ring_write(ring, header);
5452         BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
5453         amdgpu_ring_write(ring,
5454 #ifdef __BIG_ENDIAN
5455                 (2 << 0) |
5456 #endif
5457                 lower_32_bits(ib->gpu_addr));
5458         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
5459         amdgpu_ring_ib_on_emit_cntl(ring);
5460         amdgpu_ring_write(ring, control);
5461 }
5462
5463 static void gfx_v9_0_ring_patch_cntl(struct amdgpu_ring *ring,
5464                                      unsigned offset)
5465 {
5466         u32 control = ring->ring[offset];
5467
5468         control |= INDIRECT_BUFFER_PRE_RESUME(1);
5469         ring->ring[offset] = control;
5470 }
5471
5472 static void gfx_v9_0_ring_patch_ce_meta(struct amdgpu_ring *ring,
5473                                         unsigned offset)
5474 {
5475         struct amdgpu_device *adev = ring->adev;
5476         void *ce_payload_cpu_addr;
5477         uint64_t payload_offset, payload_size;
5478
5479         payload_size = sizeof(struct v9_ce_ib_state);
5480
5481         if (ring->is_mes_queue) {
5482                 payload_offset = offsetof(struct amdgpu_mes_ctx_meta_data,
5483                                           gfx[0].gfx_meta_data) +
5484                         offsetof(struct v9_gfx_meta_data, ce_payload);
5485                 ce_payload_cpu_addr =
5486                         amdgpu_mes_ctx_get_offs_cpu_addr(ring, payload_offset);
5487         } else {
5488                 payload_offset = offsetof(struct v9_gfx_meta_data, ce_payload);
5489                 ce_payload_cpu_addr = adev->virt.csa_cpu_addr + payload_offset;
5490         }
5491
5492         if (offset + (payload_size >> 2) <= ring->buf_mask + 1) {
5493                 memcpy((void *)&ring->ring[offset], ce_payload_cpu_addr, payload_size);
5494         } else {
5495                 memcpy((void *)&ring->ring[offset], ce_payload_cpu_addr,
5496                        (ring->buf_mask + 1 - offset) << 2);
5497                 payload_size -= (ring->buf_mask + 1 - offset) << 2;
5498                 memcpy((void *)&ring->ring[0],
5499                        ce_payload_cpu_addr + ((ring->buf_mask + 1 - offset) << 2),
5500                        payload_size);
5501         }
5502 }
5503
5504 static void gfx_v9_0_ring_patch_de_meta(struct amdgpu_ring *ring,
5505                                         unsigned offset)
5506 {
5507         struct amdgpu_device *adev = ring->adev;
5508         void *de_payload_cpu_addr;
5509         uint64_t payload_offset, payload_size;
5510
5511         payload_size = sizeof(struct v9_de_ib_state);
5512
5513         if (ring->is_mes_queue) {
5514                 payload_offset = offsetof(struct amdgpu_mes_ctx_meta_data,
5515                                           gfx[0].gfx_meta_data) +
5516                         offsetof(struct v9_gfx_meta_data, de_payload);
5517                 de_payload_cpu_addr =
5518                         amdgpu_mes_ctx_get_offs_cpu_addr(ring, payload_offset);
5519         } else {
5520                 payload_offset = offsetof(struct v9_gfx_meta_data, de_payload);
5521                 de_payload_cpu_addr = adev->virt.csa_cpu_addr + payload_offset;
5522         }
5523
5524         ((struct v9_de_ib_state *)de_payload_cpu_addr)->ib_completion_status =
5525                 IB_COMPLETION_STATUS_PREEMPTED;
5526
5527         if (offset + (payload_size >> 2) <= ring->buf_mask + 1) {
5528                 memcpy((void *)&ring->ring[offset], de_payload_cpu_addr, payload_size);
5529         } else {
5530                 memcpy((void *)&ring->ring[offset], de_payload_cpu_addr,
5531                        (ring->buf_mask + 1 - offset) << 2);
5532                 payload_size -= (ring->buf_mask + 1 - offset) << 2;
5533                 memcpy((void *)&ring->ring[0],
5534                        de_payload_cpu_addr + ((ring->buf_mask + 1 - offset) << 2),
5535                        payload_size);
5536         }
5537 }
5538
5539 static void gfx_v9_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
5540                                           struct amdgpu_job *job,
5541                                           struct amdgpu_ib *ib,
5542                                           uint32_t flags)
5543 {
5544         unsigned vmid = AMDGPU_JOB_GET_VMID(job);
5545         u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
5546
5547         /* Currently, there is a high possibility to get wave ID mismatch
5548          * between ME and GDS, leading to a hw deadlock, because ME generates
5549          * different wave IDs than the GDS expects. This situation happens
5550          * randomly when at least 5 compute pipes use GDS ordered append.
5551          * The wave IDs generated by ME are also wrong after suspend/resume.
5552          * Those are probably bugs somewhere else in the kernel driver.
5553          *
5554          * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and
5555          * GDS to 0 for this ring (me/pipe).
5556          */
5557         if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) {
5558                 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
5559                 amdgpu_ring_write(ring, mmGDS_COMPUTE_MAX_WAVE_ID);
5560                 amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id);
5561         }
5562
5563         amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
5564         BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
5565         amdgpu_ring_write(ring,
5566 #ifdef __BIG_ENDIAN
5567                                 (2 << 0) |
5568 #endif
5569                                 lower_32_bits(ib->gpu_addr));
5570         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
5571         amdgpu_ring_write(ring, control);
5572 }
5573
5574 static void gfx_v9_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
5575                                      u64 seq, unsigned flags)
5576 {
5577         bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
5578         bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
5579         bool writeback = flags & AMDGPU_FENCE_FLAG_TC_WB_ONLY;
5580         bool exec = flags & AMDGPU_FENCE_FLAG_EXEC;
5581         uint32_t dw2 = 0;
5582
5583         /* RELEASE_MEM - flush caches, send int */
5584         amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6));
5585
5586         if (writeback) {
5587                 dw2 = EOP_TC_NC_ACTION_EN;
5588         } else {
5589                 dw2 = EOP_TCL1_ACTION_EN | EOP_TC_ACTION_EN |
5590                                 EOP_TC_MD_ACTION_EN;
5591         }
5592         dw2 |= EOP_TC_WB_ACTION_EN | EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
5593                                 EVENT_INDEX(5);
5594         if (exec)
5595                 dw2 |= EOP_EXEC;
5596
5597         amdgpu_ring_write(ring, dw2);
5598         amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
5599
5600         /*
5601          * the address should be Qword aligned if 64bit write, Dword
5602          * aligned if only send 32bit data low (discard data high)
5603          */
5604         if (write64bit)
5605                 BUG_ON(addr & 0x7);
5606         else
5607                 BUG_ON(addr & 0x3);
5608         amdgpu_ring_write(ring, lower_32_bits(addr));
5609         amdgpu_ring_write(ring, upper_32_bits(addr));
5610         amdgpu_ring_write(ring, lower_32_bits(seq));
5611         amdgpu_ring_write(ring, upper_32_bits(seq));
5612         amdgpu_ring_write(ring, 0);
5613 }
5614
5615 static void gfx_v9_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
5616 {
5617         int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
5618         uint32_t seq = ring->fence_drv.sync_seq;
5619         uint64_t addr = ring->fence_drv.gpu_addr;
5620
5621         gfx_v9_0_wait_reg_mem(ring, usepfp, 1, 0,
5622                               lower_32_bits(addr), upper_32_bits(addr),
5623                               seq, 0xffffffff, 4);
5624 }
5625
5626 static void gfx_v9_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
5627                                         unsigned vmid, uint64_t pd_addr)
5628 {
5629         amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
5630
5631         /* compute doesn't have PFP */
5632         if (ring->funcs->type == AMDGPU_RING_TYPE_GFX) {
5633                 /* sync PFP to ME, otherwise we might get invalid PFP reads */
5634                 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
5635                 amdgpu_ring_write(ring, 0x0);
5636         }
5637 }
5638
5639 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
5640 {
5641         return *ring->rptr_cpu_addr; /* gfx9 hardware is 32bit rptr */
5642 }
5643
5644 static u64 gfx_v9_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
5645 {
5646         u64 wptr;
5647
5648         /* XXX check if swapping is necessary on BE */
5649         if (ring->use_doorbell)
5650                 wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr);
5651         else
5652                 BUG();
5653         return wptr;
5654 }
5655
5656 static void gfx_v9_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
5657 {
5658         struct amdgpu_device *adev = ring->adev;
5659
5660         /* XXX check if swapping is necessary on BE */
5661         if (ring->use_doorbell) {
5662                 atomic64_set((atomic64_t *)ring->wptr_cpu_addr, ring->wptr);
5663                 WDOORBELL64(ring->doorbell_index, ring->wptr);
5664         } else{
5665                 BUG(); /* only DOORBELL method supported on gfx9 now */
5666         }
5667 }
5668
5669 static void gfx_v9_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
5670                                          u64 seq, unsigned int flags)
5671 {
5672         struct amdgpu_device *adev = ring->adev;
5673
5674         /* we only allocate 32bit for each seq wb address */
5675         BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
5676
5677         /* write fence seq to the "addr" */
5678         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5679         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5680                                  WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
5681         amdgpu_ring_write(ring, lower_32_bits(addr));
5682         amdgpu_ring_write(ring, upper_32_bits(addr));
5683         amdgpu_ring_write(ring, lower_32_bits(seq));
5684
5685         if (flags & AMDGPU_FENCE_FLAG_INT) {
5686                 /* set register to trigger INT */
5687                 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5688                 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5689                                          WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
5690                 amdgpu_ring_write(ring, SOC15_REG_OFFSET(GC, 0, mmCPC_INT_STATUS));
5691                 amdgpu_ring_write(ring, 0);
5692                 amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
5693         }
5694 }
5695
5696 static void gfx_v9_ring_emit_sb(struct amdgpu_ring *ring)
5697 {
5698         amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
5699         amdgpu_ring_write(ring, 0);
5700 }
5701
5702 static void gfx_v9_0_ring_emit_ce_meta(struct amdgpu_ring *ring, bool resume)
5703 {
5704         struct amdgpu_device *adev = ring->adev;
5705         struct v9_ce_ib_state ce_payload = {0};
5706         uint64_t offset, ce_payload_gpu_addr;
5707         void *ce_payload_cpu_addr;
5708         int cnt;
5709
5710         cnt = (sizeof(ce_payload) >> 2) + 4 - 2;
5711
5712         if (ring->is_mes_queue) {
5713                 offset = offsetof(struct amdgpu_mes_ctx_meta_data,
5714                                   gfx[0].gfx_meta_data) +
5715                         offsetof(struct v9_gfx_meta_data, ce_payload);
5716                 ce_payload_gpu_addr =
5717                         amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
5718                 ce_payload_cpu_addr =
5719                         amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset);
5720         } else {
5721                 offset = offsetof(struct v9_gfx_meta_data, ce_payload);
5722                 ce_payload_gpu_addr = amdgpu_csa_vaddr(ring->adev) + offset;
5723                 ce_payload_cpu_addr = adev->virt.csa_cpu_addr + offset;
5724         }
5725
5726         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
5727         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
5728                                  WRITE_DATA_DST_SEL(8) |
5729                                  WR_CONFIRM) |
5730                                  WRITE_DATA_CACHE_POLICY(0));
5731         amdgpu_ring_write(ring, lower_32_bits(ce_payload_gpu_addr));
5732         amdgpu_ring_write(ring, upper_32_bits(ce_payload_gpu_addr));
5733
5734         amdgpu_ring_ib_on_emit_ce(ring);
5735
5736         if (resume)
5737                 amdgpu_ring_write_multiple(ring, ce_payload_cpu_addr,
5738                                            sizeof(ce_payload) >> 2);
5739         else
5740                 amdgpu_ring_write_multiple(ring, (void *)&ce_payload,
5741                                            sizeof(ce_payload) >> 2);
5742 }
5743
5744 static int gfx_v9_0_ring_preempt_ib(struct amdgpu_ring *ring)
5745 {
5746         int i, r = 0;
5747         struct amdgpu_device *adev = ring->adev;
5748         struct amdgpu_kiq *kiq = &adev->gfx.kiq[0];
5749         struct amdgpu_ring *kiq_ring = &kiq->ring;
5750         unsigned long flags;
5751
5752         if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues)
5753                 return -EINVAL;
5754
5755         spin_lock_irqsave(&kiq->ring_lock, flags);
5756
5757         if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size)) {
5758                 spin_unlock_irqrestore(&kiq->ring_lock, flags);
5759                 return -ENOMEM;
5760         }
5761
5762         /* assert preemption condition */
5763         amdgpu_ring_set_preempt_cond_exec(ring, false);
5764
5765         ring->trail_seq += 1;
5766         amdgpu_ring_alloc(ring, 13);
5767         gfx_v9_0_ring_emit_fence(ring, ring->trail_fence_gpu_addr,
5768                                  ring->trail_seq, AMDGPU_FENCE_FLAG_EXEC | AMDGPU_FENCE_FLAG_INT);
5769
5770         /* assert IB preemption, emit the trailing fence */
5771         kiq->pmf->kiq_unmap_queues(kiq_ring, ring, PREEMPT_QUEUES_NO_UNMAP,
5772                                    ring->trail_fence_gpu_addr,
5773                                    ring->trail_seq);
5774
5775         amdgpu_ring_commit(kiq_ring);
5776         spin_unlock_irqrestore(&kiq->ring_lock, flags);
5777
5778         /* poll the trailing fence */
5779         for (i = 0; i < adev->usec_timeout; i++) {
5780                 if (ring->trail_seq ==
5781                         le32_to_cpu(*ring->trail_fence_cpu_addr))
5782                         break;
5783                 udelay(1);
5784         }
5785
5786         if (i >= adev->usec_timeout) {
5787                 r = -EINVAL;
5788                 DRM_WARN("ring %d timeout to preempt ib\n", ring->idx);
5789         }
5790
5791         /*reset the CP_VMID_PREEMPT after trailing fence*/
5792         amdgpu_ring_emit_wreg(ring,
5793                               SOC15_REG_OFFSET(GC, 0, mmCP_VMID_PREEMPT),
5794                               0x0);
5795         amdgpu_ring_commit(ring);
5796
5797         /* deassert preemption condition */
5798         amdgpu_ring_set_preempt_cond_exec(ring, true);
5799         return r;
5800 }
5801
5802 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume, bool usegds)
5803 {
5804         struct amdgpu_device *adev = ring->adev;
5805         struct v9_de_ib_state de_payload = {0};
5806         uint64_t offset, gds_addr, de_payload_gpu_addr;
5807         void *de_payload_cpu_addr;
5808         int cnt;
5809
5810         if (ring->is_mes_queue) {
5811                 offset = offsetof(struct amdgpu_mes_ctx_meta_data,
5812                                   gfx[0].gfx_meta_data) +
5813                         offsetof(struct v9_gfx_meta_data, de_payload);
5814                 de_payload_gpu_addr =
5815                         amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
5816                 de_payload_cpu_addr =
5817                         amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset);
5818
5819                 offset = offsetof(struct amdgpu_mes_ctx_meta_data,
5820                                   gfx[0].gds_backup) +
5821                         offsetof(struct v9_gfx_meta_data, de_payload);
5822                 gds_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
5823         } else {
5824                 offset = offsetof(struct v9_gfx_meta_data, de_payload);
5825                 de_payload_gpu_addr = amdgpu_csa_vaddr(ring->adev) + offset;
5826                 de_payload_cpu_addr = adev->virt.csa_cpu_addr + offset;
5827
5828                 gds_addr = ALIGN(amdgpu_csa_vaddr(ring->adev) +
5829                                  AMDGPU_CSA_SIZE - adev->gds.gds_size,
5830                                  PAGE_SIZE);
5831         }
5832
5833         if (usegds) {
5834                 de_payload.gds_backup_addrlo = lower_32_bits(gds_addr);
5835                 de_payload.gds_backup_addrhi = upper_32_bits(gds_addr);
5836         }
5837
5838         cnt = (sizeof(de_payload) >> 2) + 4 - 2;
5839         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
5840         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
5841                                  WRITE_DATA_DST_SEL(8) |
5842                                  WR_CONFIRM) |
5843                                  WRITE_DATA_CACHE_POLICY(0));
5844         amdgpu_ring_write(ring, lower_32_bits(de_payload_gpu_addr));
5845         amdgpu_ring_write(ring, upper_32_bits(de_payload_gpu_addr));
5846
5847         amdgpu_ring_ib_on_emit_de(ring);
5848         if (resume)
5849                 amdgpu_ring_write_multiple(ring, de_payload_cpu_addr,
5850                                            sizeof(de_payload) >> 2);
5851         else
5852                 amdgpu_ring_write_multiple(ring, (void *)&de_payload,
5853                                            sizeof(de_payload) >> 2);
5854 }
5855
5856 static void gfx_v9_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start,
5857                                    bool secure)
5858 {
5859         uint32_t v = secure ? FRAME_TMZ : 0;
5860
5861         amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0));
5862         amdgpu_ring_write(ring, v | FRAME_CMD(start ? 0 : 1));
5863 }
5864
5865 static void gfx_v9_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
5866 {
5867         uint32_t dw2 = 0;
5868
5869         gfx_v9_0_ring_emit_ce_meta(ring,
5870                                    (!amdgpu_sriov_vf(ring->adev) &&
5871                                    flags & AMDGPU_IB_PREEMPTED) ? true : false);
5872
5873         dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
5874         if (flags & AMDGPU_HAVE_CTX_SWITCH) {
5875                 /* set load_global_config & load_global_uconfig */
5876                 dw2 |= 0x8001;
5877                 /* set load_cs_sh_regs */
5878                 dw2 |= 0x01000000;
5879                 /* set load_per_context_state & load_gfx_sh_regs for GFX */
5880                 dw2 |= 0x10002;
5881
5882                 /* set load_ce_ram if preamble presented */
5883                 if (AMDGPU_PREAMBLE_IB_PRESENT & flags)
5884                         dw2 |= 0x10000000;
5885         } else {
5886                 /* still load_ce_ram if this is the first time preamble presented
5887                  * although there is no context switch happens.
5888                  */
5889                 if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags)
5890                         dw2 |= 0x10000000;
5891         }
5892
5893         amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
5894         amdgpu_ring_write(ring, dw2);
5895         amdgpu_ring_write(ring, 0);
5896 }
5897
5898 static unsigned gfx_v9_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring,
5899                                                   uint64_t addr)
5900 {
5901         unsigned ret;
5902         amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
5903         amdgpu_ring_write(ring, lower_32_bits(addr));
5904         amdgpu_ring_write(ring, upper_32_bits(addr));
5905         /* discard following DWs if *cond_exec_gpu_addr==0 */
5906         amdgpu_ring_write(ring, 0);
5907         ret = ring->wptr & ring->buf_mask;
5908         /* patch dummy value later */
5909         amdgpu_ring_write(ring, 0);
5910         return ret;
5911 }
5912
5913 static void gfx_v9_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg,
5914                                     uint32_t reg_val_offs)
5915 {
5916         struct amdgpu_device *adev = ring->adev;
5917
5918         amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
5919         amdgpu_ring_write(ring, 0 |     /* src: register*/
5920                                 (5 << 8) |      /* dst: memory */
5921                                 (1 << 20));     /* write confirm */
5922         amdgpu_ring_write(ring, reg);
5923         amdgpu_ring_write(ring, 0);
5924         amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
5925                                 reg_val_offs * 4));
5926         amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
5927                                 reg_val_offs * 4));
5928 }
5929
5930 static void gfx_v9_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
5931                                     uint32_t val)
5932 {
5933         uint32_t cmd = 0;
5934
5935         switch (ring->funcs->type) {
5936         case AMDGPU_RING_TYPE_GFX:
5937                 cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM;
5938                 break;
5939         case AMDGPU_RING_TYPE_KIQ:
5940                 cmd = (1 << 16); /* no inc addr */
5941                 break;
5942         default:
5943                 cmd = WR_CONFIRM;
5944                 break;
5945         }
5946         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5947         amdgpu_ring_write(ring, cmd);
5948         amdgpu_ring_write(ring, reg);
5949         amdgpu_ring_write(ring, 0);
5950         amdgpu_ring_write(ring, val);
5951 }
5952
5953 static void gfx_v9_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
5954                                         uint32_t val, uint32_t mask)
5955 {
5956         gfx_v9_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20);
5957 }
5958
5959 static void gfx_v9_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
5960                                                   uint32_t reg0, uint32_t reg1,
5961                                                   uint32_t ref, uint32_t mask)
5962 {
5963         int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
5964         struct amdgpu_device *adev = ring->adev;
5965         bool fw_version_ok = (ring->funcs->type == AMDGPU_RING_TYPE_GFX) ?
5966                 adev->gfx.me_fw_write_wait : adev->gfx.mec_fw_write_wait;
5967
5968         if (fw_version_ok)
5969                 gfx_v9_0_wait_reg_mem(ring, usepfp, 0, 1, reg0, reg1,
5970                                       ref, mask, 0x20);
5971         else
5972                 amdgpu_ring_emit_reg_write_reg_wait_helper(ring, reg0, reg1,
5973                                                            ref, mask);
5974 }
5975
5976 static void gfx_v9_0_ring_soft_recovery(struct amdgpu_ring *ring, unsigned vmid)
5977 {
5978         struct amdgpu_device *adev = ring->adev;
5979         uint32_t value = 0;
5980
5981         value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03);
5982         value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01);
5983         value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1);
5984         value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid);
5985         amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
5986         WREG32_SOC15(GC, 0, mmSQ_CMD, value);
5987         amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
5988 }
5989
5990 static void gfx_v9_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
5991                                                  enum amdgpu_interrupt_state state)
5992 {
5993         switch (state) {
5994         case AMDGPU_IRQ_STATE_DISABLE:
5995         case AMDGPU_IRQ_STATE_ENABLE:
5996                 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5997                                TIME_STAMP_INT_ENABLE,
5998                                state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
5999                 break;
6000         default:
6001                 break;
6002         }
6003 }
6004
6005 static void gfx_v9_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
6006                                                      int me, int pipe,
6007                                                      enum amdgpu_interrupt_state state)
6008 {
6009         u32 mec_int_cntl, mec_int_cntl_reg;
6010
6011         /*
6012          * amdgpu controls only the first MEC. That's why this function only
6013          * handles the setting of interrupts for this specific MEC. All other
6014          * pipes' interrupts are set by amdkfd.
6015          */
6016
6017         if (me == 1) {
6018                 switch (pipe) {
6019                 case 0:
6020                         mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE0_INT_CNTL);
6021                         break;
6022                 case 1:
6023                         mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE1_INT_CNTL);
6024                         break;
6025                 case 2:
6026                         mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE2_INT_CNTL);
6027                         break;
6028                 case 3:
6029                         mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE3_INT_CNTL);
6030                         break;
6031                 default:
6032                         DRM_DEBUG("invalid pipe %d\n", pipe);
6033                         return;
6034                 }
6035         } else {
6036                 DRM_DEBUG("invalid me %d\n", me);
6037                 return;
6038         }
6039
6040         switch (state) {
6041         case AMDGPU_IRQ_STATE_DISABLE:
6042                 mec_int_cntl = RREG32_SOC15_IP(GC,mec_int_cntl_reg);
6043                 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
6044                                              TIME_STAMP_INT_ENABLE, 0);
6045                 WREG32_SOC15_IP(GC, mec_int_cntl_reg, mec_int_cntl);
6046                 break;
6047         case AMDGPU_IRQ_STATE_ENABLE:
6048                 mec_int_cntl = RREG32_SOC15_IP(GC, mec_int_cntl_reg);
6049                 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
6050                                              TIME_STAMP_INT_ENABLE, 1);
6051                 WREG32_SOC15_IP(GC, mec_int_cntl_reg, mec_int_cntl);
6052                 break;
6053         default:
6054                 break;
6055         }
6056 }
6057
6058 static u32 gfx_v9_0_get_cpc_int_cntl(struct amdgpu_device *adev,
6059                                      int me, int pipe)
6060 {
6061         /*
6062          * amdgpu controls only the first MEC. That's why this function only
6063          * handles the setting of interrupts for this specific MEC. All other
6064          * pipes' interrupts are set by amdkfd.
6065          */
6066         if (me != 1)
6067                 return 0;
6068
6069         switch (pipe) {
6070         case 0:
6071                 return SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE0_INT_CNTL);
6072         case 1:
6073                 return SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE1_INT_CNTL);
6074         case 2:
6075                 return SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE2_INT_CNTL);
6076         case 3:
6077                 return SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE3_INT_CNTL);
6078         default:
6079                 return 0;
6080         }
6081 }
6082
6083 static int gfx_v9_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
6084                                              struct amdgpu_irq_src *source,
6085                                              unsigned type,
6086                                              enum amdgpu_interrupt_state state)
6087 {
6088         u32 cp_int_cntl_reg, cp_int_cntl;
6089         int i, j;
6090
6091         switch (state) {
6092         case AMDGPU_IRQ_STATE_DISABLE:
6093         case AMDGPU_IRQ_STATE_ENABLE:
6094                 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
6095                                PRIV_REG_INT_ENABLE,
6096                                state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
6097                 for (i = 0; i < adev->gfx.mec.num_mec; i++) {
6098                         for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) {
6099                                 /* MECs start at 1 */
6100                                 cp_int_cntl_reg = gfx_v9_0_get_cpc_int_cntl(adev, i + 1, j);
6101
6102                                 if (cp_int_cntl_reg) {
6103                                         cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg);
6104                                         cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_ME1_PIPE0_INT_CNTL,
6105                                                                     PRIV_REG_INT_ENABLE,
6106                                                                     state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
6107                                         WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl);
6108                                 }
6109                         }
6110                 }
6111                 break;
6112         default:
6113                 break;
6114         }
6115
6116         return 0;
6117 }
6118
6119 static int gfx_v9_0_set_bad_op_fault_state(struct amdgpu_device *adev,
6120                                            struct amdgpu_irq_src *source,
6121                                            unsigned type,
6122                                            enum amdgpu_interrupt_state state)
6123 {
6124         u32 cp_int_cntl_reg, cp_int_cntl;
6125         int i, j;
6126
6127         switch (state) {
6128         case AMDGPU_IRQ_STATE_DISABLE:
6129         case AMDGPU_IRQ_STATE_ENABLE:
6130                 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
6131                                OPCODE_ERROR_INT_ENABLE,
6132                                state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
6133                 for (i = 0; i < adev->gfx.mec.num_mec; i++) {
6134                         for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) {
6135                                 /* MECs start at 1 */
6136                                 cp_int_cntl_reg = gfx_v9_0_get_cpc_int_cntl(adev, i + 1, j);
6137
6138                                 if (cp_int_cntl_reg) {
6139                                         cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg);
6140                                         cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_ME1_PIPE0_INT_CNTL,
6141                                                                     OPCODE_ERROR_INT_ENABLE,
6142                                                                     state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
6143                                         WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl);
6144                                 }
6145                         }
6146                 }
6147                 break;
6148         default:
6149                 break;
6150         }
6151
6152         return 0;
6153 }
6154
6155 static int gfx_v9_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
6156                                               struct amdgpu_irq_src *source,
6157                                               unsigned type,
6158                                               enum amdgpu_interrupt_state state)
6159 {
6160         switch (state) {
6161         case AMDGPU_IRQ_STATE_DISABLE:
6162         case AMDGPU_IRQ_STATE_ENABLE:
6163                 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
6164                                PRIV_INSTR_INT_ENABLE,
6165                                state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
6166                 break;
6167         default:
6168                 break;
6169         }
6170
6171         return 0;
6172 }
6173
6174 #define ENABLE_ECC_ON_ME_PIPE(me, pipe)                         \
6175         WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\
6176                         CP_ECC_ERROR_INT_ENABLE, 1)
6177
6178 #define DISABLE_ECC_ON_ME_PIPE(me, pipe)                        \
6179         WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\
6180                         CP_ECC_ERROR_INT_ENABLE, 0)
6181
6182 static int gfx_v9_0_set_cp_ecc_error_state(struct amdgpu_device *adev,
6183                                               struct amdgpu_irq_src *source,
6184                                               unsigned type,
6185                                               enum amdgpu_interrupt_state state)
6186 {
6187         switch (state) {
6188         case AMDGPU_IRQ_STATE_DISABLE:
6189                 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
6190                                 CP_ECC_ERROR_INT_ENABLE, 0);
6191                 DISABLE_ECC_ON_ME_PIPE(1, 0);
6192                 DISABLE_ECC_ON_ME_PIPE(1, 1);
6193                 DISABLE_ECC_ON_ME_PIPE(1, 2);
6194                 DISABLE_ECC_ON_ME_PIPE(1, 3);
6195                 break;
6196
6197         case AMDGPU_IRQ_STATE_ENABLE:
6198                 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
6199                                 CP_ECC_ERROR_INT_ENABLE, 1);
6200                 ENABLE_ECC_ON_ME_PIPE(1, 0);
6201                 ENABLE_ECC_ON_ME_PIPE(1, 1);
6202                 ENABLE_ECC_ON_ME_PIPE(1, 2);
6203                 ENABLE_ECC_ON_ME_PIPE(1, 3);
6204                 break;
6205         default:
6206                 break;
6207         }
6208
6209         return 0;
6210 }
6211
6212
6213 static int gfx_v9_0_set_eop_interrupt_state(struct amdgpu_device *adev,
6214                                             struct amdgpu_irq_src *src,
6215                                             unsigned type,
6216                                             enum amdgpu_interrupt_state state)
6217 {
6218         switch (type) {
6219         case AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP:
6220                 gfx_v9_0_set_gfx_eop_interrupt_state(adev, state);
6221                 break;
6222         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
6223                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
6224                 break;
6225         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
6226                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
6227                 break;
6228         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
6229                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
6230                 break;
6231         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
6232                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
6233                 break;
6234         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
6235                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
6236                 break;
6237         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
6238                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
6239                 break;
6240         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
6241                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
6242                 break;
6243         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
6244                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
6245                 break;
6246         default:
6247                 break;
6248         }
6249         return 0;
6250 }
6251
6252 static int gfx_v9_0_eop_irq(struct amdgpu_device *adev,
6253                             struct amdgpu_irq_src *source,
6254                             struct amdgpu_iv_entry *entry)
6255 {
6256         int i;
6257         u8 me_id, pipe_id, queue_id;
6258         struct amdgpu_ring *ring;
6259
6260         DRM_DEBUG("IH: CP EOP\n");
6261         me_id = (entry->ring_id & 0x0c) >> 2;
6262         pipe_id = (entry->ring_id & 0x03) >> 0;
6263         queue_id = (entry->ring_id & 0x70) >> 4;
6264
6265         switch (me_id) {
6266         case 0:
6267                 if (adev->gfx.num_gfx_rings) {
6268                         if (!adev->gfx.mcbp) {
6269                                 amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
6270                         } else if (!amdgpu_mcbp_handle_trailing_fence_irq(&adev->gfx.muxer)) {
6271                                 /* Fence signals are handled on the software rings*/
6272                                 for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++)
6273                                         amdgpu_fence_process(&adev->gfx.sw_gfx_ring[i]);
6274                         }
6275                 }
6276                 break;
6277         case 1:
6278         case 2:
6279                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
6280                         ring = &adev->gfx.compute_ring[i];
6281                         /* Per-queue interrupt is supported for MEC starting from VI.
6282                           * The interrupt can only be enabled/disabled per pipe instead of per queue.
6283                           */
6284                         if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
6285                                 amdgpu_fence_process(ring);
6286                 }
6287                 break;
6288         }
6289         return 0;
6290 }
6291
6292 static void gfx_v9_0_fault(struct amdgpu_device *adev,
6293                            struct amdgpu_iv_entry *entry)
6294 {
6295         u8 me_id, pipe_id, queue_id;
6296         struct amdgpu_ring *ring;
6297         int i;
6298
6299         me_id = (entry->ring_id & 0x0c) >> 2;
6300         pipe_id = (entry->ring_id & 0x03) >> 0;
6301         queue_id = (entry->ring_id & 0x70) >> 4;
6302
6303         switch (me_id) {
6304         case 0:
6305                 drm_sched_fault(&adev->gfx.gfx_ring[0].sched);
6306                 break;
6307         case 1:
6308         case 2:
6309                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
6310                         ring = &adev->gfx.compute_ring[i];
6311                         if (ring->me == me_id && ring->pipe == pipe_id &&
6312                             ring->queue == queue_id)
6313                                 drm_sched_fault(&ring->sched);
6314                 }
6315                 break;
6316         }
6317 }
6318
6319 static int gfx_v9_0_priv_reg_irq(struct amdgpu_device *adev,
6320                                  struct amdgpu_irq_src *source,
6321                                  struct amdgpu_iv_entry *entry)
6322 {
6323         DRM_ERROR("Illegal register access in command stream\n");
6324         gfx_v9_0_fault(adev, entry);
6325         return 0;
6326 }
6327
6328 static int gfx_v9_0_bad_op_irq(struct amdgpu_device *adev,
6329                                struct amdgpu_irq_src *source,
6330                                struct amdgpu_iv_entry *entry)
6331 {
6332         DRM_ERROR("Illegal opcode in command stream\n");
6333         gfx_v9_0_fault(adev, entry);
6334         return 0;
6335 }
6336
6337 static int gfx_v9_0_priv_inst_irq(struct amdgpu_device *adev,
6338                                   struct amdgpu_irq_src *source,
6339                                   struct amdgpu_iv_entry *entry)
6340 {
6341         DRM_ERROR("Illegal instruction in command stream\n");
6342         gfx_v9_0_fault(adev, entry);
6343         return 0;
6344 }
6345
6346
6347 static const struct soc15_ras_field_entry gfx_v9_0_ras_fields[] = {
6348         { "CPC_SCRATCH", SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT),
6349           SOC15_REG_FIELD(CPC_EDC_SCRATCH_CNT, SEC_COUNT),
6350           SOC15_REG_FIELD(CPC_EDC_SCRATCH_CNT, DED_COUNT)
6351         },
6352         { "CPC_UCODE", SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT),
6353           SOC15_REG_FIELD(CPC_EDC_UCODE_CNT, SEC_COUNT),
6354           SOC15_REG_FIELD(CPC_EDC_UCODE_CNT, DED_COUNT)
6355         },
6356         { "CPF_ROQ_ME1", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT),
6357           SOC15_REG_FIELD(CPF_EDC_ROQ_CNT, COUNT_ME1),
6358           0, 0
6359         },
6360         { "CPF_ROQ_ME2", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT),
6361           SOC15_REG_FIELD(CPF_EDC_ROQ_CNT, COUNT_ME2),
6362           0, 0
6363         },
6364         { "CPF_TAG", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT),
6365           SOC15_REG_FIELD(CPF_EDC_TAG_CNT, SEC_COUNT),
6366           SOC15_REG_FIELD(CPF_EDC_TAG_CNT, DED_COUNT)
6367         },
6368         { "CPG_DMA_ROQ", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT),
6369           SOC15_REG_FIELD(CPG_EDC_DMA_CNT, ROQ_COUNT),
6370           0, 0
6371         },
6372         { "CPG_DMA_TAG", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT),
6373           SOC15_REG_FIELD(CPG_EDC_DMA_CNT, TAG_SEC_COUNT),
6374           SOC15_REG_FIELD(CPG_EDC_DMA_CNT, TAG_DED_COUNT)
6375         },
6376         { "CPG_TAG", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT),
6377           SOC15_REG_FIELD(CPG_EDC_TAG_CNT, SEC_COUNT),
6378           SOC15_REG_FIELD(CPG_EDC_TAG_CNT, DED_COUNT)
6379         },
6380         { "DC_CSINVOC", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT),
6381           SOC15_REG_FIELD(DC_EDC_CSINVOC_CNT, COUNT_ME1),
6382           0, 0
6383         },
6384         { "DC_RESTORE", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT),
6385           SOC15_REG_FIELD(DC_EDC_RESTORE_CNT, COUNT_ME1),
6386           0, 0
6387         },
6388         { "DC_STATE", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT),
6389           SOC15_REG_FIELD(DC_EDC_STATE_CNT, COUNT_ME1),
6390           0, 0
6391         },
6392         { "GDS_MEM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT),
6393           SOC15_REG_FIELD(GDS_EDC_CNT, GDS_MEM_SEC),
6394           SOC15_REG_FIELD(GDS_EDC_CNT, GDS_MEM_DED)
6395         },
6396         { "GDS_INPUT_QUEUE", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT),
6397           SOC15_REG_FIELD(GDS_EDC_CNT, GDS_INPUT_QUEUE_SED),
6398           0, 0
6399         },
6400         { "GDS_ME0_CS_PIPE_MEM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT),
6401           SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, ME0_CS_PIPE_MEM_SEC),
6402           SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, ME0_CS_PIPE_MEM_DED)
6403         },
6404         { "GDS_OA_PHY_PHY_CMD_RAM_MEM",
6405           SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT),
6406           SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_CMD_RAM_MEM_SEC),
6407           SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_CMD_RAM_MEM_DED)
6408         },
6409         { "GDS_OA_PHY_PHY_DATA_RAM_MEM",
6410           SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT),
6411           SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_DATA_RAM_MEM_SED),
6412           0, 0
6413         },
6414         { "GDS_OA_PIPE_ME1_PIPE0_PIPE_MEM",
6415           SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
6416           SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE0_PIPE_MEM_SEC),
6417           SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE0_PIPE_MEM_DED)
6418         },
6419         { "GDS_OA_PIPE_ME1_PIPE1_PIPE_MEM",
6420           SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
6421           SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE1_PIPE_MEM_SEC),
6422           SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE1_PIPE_MEM_DED)
6423         },
6424         { "GDS_OA_PIPE_ME1_PIPE2_PIPE_MEM",
6425           SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
6426           SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE2_PIPE_MEM_SEC),
6427           SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE2_PIPE_MEM_DED)
6428         },
6429         { "GDS_OA_PIPE_ME1_PIPE3_PIPE_MEM",
6430           SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
6431           SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE3_PIPE_MEM_SEC),
6432           SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE3_PIPE_MEM_DED)
6433         },
6434         { "SPI_SR_MEM", SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT),
6435           SOC15_REG_FIELD(SPI_EDC_CNT, SPI_SR_MEM_SED_COUNT),
6436           0, 0
6437         },
6438         { "TA_FS_DFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
6439           SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_DFIFO_SEC_COUNT),
6440           SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_DFIFO_DED_COUNT)
6441         },
6442         { "TA_FS_AFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
6443           SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_AFIFO_SED_COUNT),
6444           0, 0
6445         },
6446         { "TA_FL_LFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
6447           SOC15_REG_FIELD(TA_EDC_CNT, TA_FL_LFIFO_SED_COUNT),
6448           0, 0
6449         },
6450         { "TA_FX_LFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
6451           SOC15_REG_FIELD(TA_EDC_CNT, TA_FX_LFIFO_SED_COUNT),
6452           0, 0
6453         },
6454         { "TA_FS_CFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
6455           SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_CFIFO_SED_COUNT),
6456           0, 0
6457         },
6458         { "TCA_HOLE_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT),
6459           SOC15_REG_FIELD(TCA_EDC_CNT, HOLE_FIFO_SED_COUNT),
6460           0, 0
6461         },
6462         { "TCA_REQ_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT),
6463           SOC15_REG_FIELD(TCA_EDC_CNT, REQ_FIFO_SED_COUNT),
6464           0, 0
6465         },
6466         { "TCC_CACHE_DATA", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6467           SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DATA_SEC_COUNT),
6468           SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DATA_DED_COUNT)
6469         },
6470         { "TCC_CACHE_DIRTY", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6471           SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DIRTY_SEC_COUNT),
6472           SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DIRTY_DED_COUNT)
6473         },
6474         { "TCC_HIGH_RATE_TAG", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6475           SOC15_REG_FIELD(TCC_EDC_CNT, HIGH_RATE_TAG_SEC_COUNT),
6476           SOC15_REG_FIELD(TCC_EDC_CNT, HIGH_RATE_TAG_DED_COUNT)
6477         },
6478         { "TCC_LOW_RATE_TAG", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6479           SOC15_REG_FIELD(TCC_EDC_CNT, LOW_RATE_TAG_SEC_COUNT),
6480           SOC15_REG_FIELD(TCC_EDC_CNT, LOW_RATE_TAG_DED_COUNT)
6481         },
6482         { "TCC_SRC_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6483           SOC15_REG_FIELD(TCC_EDC_CNT, SRC_FIFO_SEC_COUNT),
6484           SOC15_REG_FIELD(TCC_EDC_CNT, SRC_FIFO_DED_COUNT)
6485         },
6486         { "TCC_IN_USE_DEC", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6487           SOC15_REG_FIELD(TCC_EDC_CNT, IN_USE_DEC_SED_COUNT),
6488           0, 0
6489         },
6490         { "TCC_IN_USE_TRANSFER", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6491           SOC15_REG_FIELD(TCC_EDC_CNT, IN_USE_TRANSFER_SED_COUNT),
6492           0, 0
6493         },
6494         { "TCC_LATENCY_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6495           SOC15_REG_FIELD(TCC_EDC_CNT, LATENCY_FIFO_SED_COUNT),
6496           0, 0
6497         },
6498         { "TCC_RETURN_DATA", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6499           SOC15_REG_FIELD(TCC_EDC_CNT, RETURN_DATA_SED_COUNT),
6500           0, 0
6501         },
6502         { "TCC_RETURN_CONTROL", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6503           SOC15_REG_FIELD(TCC_EDC_CNT, RETURN_CONTROL_SED_COUNT),
6504           0, 0
6505         },
6506         { "TCC_UC_ATOMIC_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6507           SOC15_REG_FIELD(TCC_EDC_CNT, UC_ATOMIC_FIFO_SED_COUNT),
6508           0, 0
6509         },
6510         { "TCC_WRITE_RETURN", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6511           SOC15_REG_FIELD(TCC_EDC_CNT2, WRITE_RETURN_SED_COUNT),
6512           0, 0
6513         },
6514         { "TCC_WRITE_CACHE_READ", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6515           SOC15_REG_FIELD(TCC_EDC_CNT2, WRITE_CACHE_READ_SED_COUNT),
6516           0, 0
6517         },
6518         { "TCC_SRC_FIFO_NEXT_RAM", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6519           SOC15_REG_FIELD(TCC_EDC_CNT2, SRC_FIFO_NEXT_RAM_SED_COUNT),
6520           0, 0
6521         },
6522         { "TCC_LATENCY_FIFO_NEXT_RAM", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6523           SOC15_REG_FIELD(TCC_EDC_CNT2, LATENCY_FIFO_NEXT_RAM_SED_COUNT),
6524           0, 0
6525         },
6526         { "TCC_CACHE_TAG_PROBE_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6527           SOC15_REG_FIELD(TCC_EDC_CNT2, CACHE_TAG_PROBE_FIFO_SED_COUNT),
6528           0, 0
6529         },
6530         { "TCC_WRRET_TAG_WRITE_RETURN", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6531           SOC15_REG_FIELD(TCC_EDC_CNT2, WRRET_TAG_WRITE_RETURN_SED_COUNT),
6532           0, 0
6533         },
6534         { "TCC_ATOMIC_RETURN_BUFFER", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6535           SOC15_REG_FIELD(TCC_EDC_CNT2, ATOMIC_RETURN_BUFFER_SED_COUNT),
6536           0, 0
6537         },
6538         { "TCI_WRITE_RAM", SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT),
6539           SOC15_REG_FIELD(TCI_EDC_CNT, WRITE_RAM_SED_COUNT),
6540           0, 0
6541         },
6542         { "TCP_CACHE_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6543           SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CACHE_RAM_SEC_COUNT),
6544           SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CACHE_RAM_DED_COUNT)
6545         },
6546         { "TCP_LFIFO_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6547           SOC15_REG_FIELD(TCP_EDC_CNT_NEW, LFIFO_RAM_SEC_COUNT),
6548           SOC15_REG_FIELD(TCP_EDC_CNT_NEW, LFIFO_RAM_DED_COUNT)
6549         },
6550         { "TCP_CMD_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6551           SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CMD_FIFO_SED_COUNT),
6552           0, 0
6553         },
6554         { "TCP_VM_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6555           SOC15_REG_FIELD(TCP_EDC_CNT_NEW, VM_FIFO_SEC_COUNT),
6556           0, 0
6557         },
6558         { "TCP_DB_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6559           SOC15_REG_FIELD(TCP_EDC_CNT_NEW, DB_RAM_SED_COUNT),
6560           0, 0
6561         },
6562         { "TCP_UTCL1_LFIFO0", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6563           SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO0_SEC_COUNT),
6564           SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO0_DED_COUNT)
6565         },
6566         { "TCP_UTCL1_LFIFO1", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6567           SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO1_SEC_COUNT),
6568           SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO1_DED_COUNT)
6569         },
6570         { "TD_SS_FIFO_LO", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT),
6571           SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_LO_SEC_COUNT),
6572           SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_LO_DED_COUNT)
6573         },
6574         { "TD_SS_FIFO_HI", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT),
6575           SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_HI_SEC_COUNT),
6576           SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_HI_DED_COUNT)
6577         },
6578         { "TD_CS_FIFO", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT),
6579           SOC15_REG_FIELD(TD_EDC_CNT, CS_FIFO_SED_COUNT),
6580           0, 0
6581         },
6582         { "SQ_LDS_D", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6583           SOC15_REG_FIELD(SQ_EDC_CNT, LDS_D_SEC_COUNT),
6584           SOC15_REG_FIELD(SQ_EDC_CNT, LDS_D_DED_COUNT)
6585         },
6586         { "SQ_LDS_I", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6587           SOC15_REG_FIELD(SQ_EDC_CNT, LDS_I_SEC_COUNT),
6588           SOC15_REG_FIELD(SQ_EDC_CNT, LDS_I_DED_COUNT)
6589         },
6590         { "SQ_SGPR", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6591           SOC15_REG_FIELD(SQ_EDC_CNT, SGPR_SEC_COUNT),
6592           SOC15_REG_FIELD(SQ_EDC_CNT, SGPR_DED_COUNT)
6593         },
6594         { "SQ_VGPR0", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6595           SOC15_REG_FIELD(SQ_EDC_CNT, VGPR0_SEC_COUNT),
6596           SOC15_REG_FIELD(SQ_EDC_CNT, VGPR0_DED_COUNT)
6597         },
6598         { "SQ_VGPR1", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6599           SOC15_REG_FIELD(SQ_EDC_CNT, VGPR1_SEC_COUNT),
6600           SOC15_REG_FIELD(SQ_EDC_CNT, VGPR1_DED_COUNT)
6601         },
6602         { "SQ_VGPR2", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6603           SOC15_REG_FIELD(SQ_EDC_CNT, VGPR2_SEC_COUNT),
6604           SOC15_REG_FIELD(SQ_EDC_CNT, VGPR2_DED_COUNT)
6605         },
6606         { "SQ_VGPR3", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6607           SOC15_REG_FIELD(SQ_EDC_CNT, VGPR3_SEC_COUNT),
6608           SOC15_REG_FIELD(SQ_EDC_CNT, VGPR3_DED_COUNT)
6609         },
6610         { "SQC_DATA_CU0_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6611           SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_WRITE_DATA_BUF_SEC_COUNT),
6612           SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_WRITE_DATA_BUF_DED_COUNT)
6613         },
6614         { "SQC_DATA_CU0_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6615           SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_UTCL1_LFIFO_SEC_COUNT),
6616           SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_UTCL1_LFIFO_DED_COUNT)
6617         },
6618         { "SQC_DATA_CU1_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6619           SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_WRITE_DATA_BUF_SEC_COUNT),
6620           SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_WRITE_DATA_BUF_DED_COUNT)
6621         },
6622         { "SQC_DATA_CU1_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6623           SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_UTCL1_LFIFO_SEC_COUNT),
6624           SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_UTCL1_LFIFO_DED_COUNT)
6625         },
6626         { "SQC_DATA_CU2_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6627           SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_WRITE_DATA_BUF_SEC_COUNT),
6628           SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_WRITE_DATA_BUF_DED_COUNT)
6629         },
6630         { "SQC_DATA_CU2_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6631           SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_UTCL1_LFIFO_SEC_COUNT),
6632           SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_UTCL1_LFIFO_DED_COUNT)
6633         },
6634         { "SQC_INST_BANKA_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6635           SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_TAG_RAM_SEC_COUNT),
6636           SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_TAG_RAM_DED_COUNT)
6637         },
6638         { "SQC_INST_BANKA_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6639           SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_BANK_RAM_SEC_COUNT),
6640           SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_BANK_RAM_DED_COUNT)
6641         },
6642         { "SQC_DATA_BANKA_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6643           SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_TAG_RAM_SEC_COUNT),
6644           SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_TAG_RAM_DED_COUNT)
6645         },
6646         { "SQC_DATA_BANKA_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6647           SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_BANK_RAM_SEC_COUNT),
6648           SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_BANK_RAM_DED_COUNT)
6649         },
6650         { "SQC_INST_BANKA_UTCL1_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6651           SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_UTCL1_MISS_FIFO_SED_COUNT),
6652           0, 0
6653         },
6654         { "SQC_INST_BANKA_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6655           SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_MISS_FIFO_SED_COUNT),
6656           0, 0
6657         },
6658         { "SQC_DATA_BANKA_HIT_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6659           SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_HIT_FIFO_SED_COUNT),
6660           0, 0
6661         },
6662         { "SQC_DATA_BANKA_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6663           SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_MISS_FIFO_SED_COUNT),
6664           0, 0
6665         },
6666         { "SQC_DATA_BANKA_DIRTY_BIT_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6667           SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_DIRTY_BIT_RAM_SED_COUNT),
6668           0, 0
6669         },
6670         { "SQC_INST_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6671           SOC15_REG_FIELD(SQC_EDC_CNT2, INST_UTCL1_LFIFO_SEC_COUNT),
6672           SOC15_REG_FIELD(SQC_EDC_CNT2, INST_UTCL1_LFIFO_DED_COUNT)
6673         },
6674         { "SQC_INST_BANKB_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6675           SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_TAG_RAM_SEC_COUNT),
6676           SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_TAG_RAM_DED_COUNT)
6677         },
6678         { "SQC_INST_BANKB_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6679           SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_BANK_RAM_SEC_COUNT),
6680           SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_BANK_RAM_DED_COUNT)
6681         },
6682         { "SQC_DATA_BANKB_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6683           SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_TAG_RAM_SEC_COUNT),
6684           SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_TAG_RAM_DED_COUNT)
6685         },
6686         { "SQC_DATA_BANKB_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6687           SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_BANK_RAM_SEC_COUNT),
6688           SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_BANK_RAM_DED_COUNT)
6689         },
6690         { "SQC_INST_BANKB_UTCL1_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6691           SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_UTCL1_MISS_FIFO_SED_COUNT),
6692           0, 0
6693         },
6694         { "SQC_INST_BANKB_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6695           SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_MISS_FIFO_SED_COUNT),
6696           0, 0
6697         },
6698         { "SQC_DATA_BANKB_HIT_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6699           SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_HIT_FIFO_SED_COUNT),
6700           0, 0
6701         },
6702         { "SQC_DATA_BANKB_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6703           SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_MISS_FIFO_SED_COUNT),
6704           0, 0
6705         },
6706         { "SQC_DATA_BANKB_DIRTY_BIT_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6707           SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_DIRTY_BIT_RAM_SED_COUNT),
6708           0, 0
6709         },
6710         { "EA_DRAMRD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6711           SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_CMDMEM_SEC_COUNT),
6712           SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_CMDMEM_DED_COUNT)
6713         },
6714         { "EA_DRAMWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6715           SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_CMDMEM_SEC_COUNT),
6716           SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_CMDMEM_DED_COUNT)
6717         },
6718         { "EA_DRAMWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6719           SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_DATAMEM_SEC_COUNT),
6720           SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_DATAMEM_DED_COUNT)
6721         },
6722         { "EA_RRET_TAGMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6723           SOC15_REG_FIELD(GCEA_EDC_CNT, RRET_TAGMEM_SEC_COUNT),
6724           SOC15_REG_FIELD(GCEA_EDC_CNT, RRET_TAGMEM_DED_COUNT)
6725         },
6726         { "EA_WRET_TAGMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6727           SOC15_REG_FIELD(GCEA_EDC_CNT, WRET_TAGMEM_SEC_COUNT),
6728           SOC15_REG_FIELD(GCEA_EDC_CNT, WRET_TAGMEM_DED_COUNT)
6729         },
6730         { "EA_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6731           SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_PAGEMEM_SED_COUNT),
6732           0, 0
6733         },
6734         { "EA_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6735           SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_PAGEMEM_SED_COUNT),
6736           0, 0
6737         },
6738         { "EA_IORD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6739           SOC15_REG_FIELD(GCEA_EDC_CNT, IORD_CMDMEM_SED_COUNT),
6740           0, 0
6741         },
6742         { "EA_IOWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6743           SOC15_REG_FIELD(GCEA_EDC_CNT, IOWR_CMDMEM_SED_COUNT),
6744           0, 0
6745         },
6746         { "EA_IOWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6747           SOC15_REG_FIELD(GCEA_EDC_CNT, IOWR_DATAMEM_SED_COUNT),
6748           0, 0
6749         },
6750         { "GMIRD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6751           SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_CMDMEM_SEC_COUNT),
6752           SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_CMDMEM_DED_COUNT)
6753         },
6754         { "GMIWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6755           SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_CMDMEM_SEC_COUNT),
6756           SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_CMDMEM_DED_COUNT)
6757         },
6758         { "GMIWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6759           SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_DATAMEM_SEC_COUNT),
6760           SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_DATAMEM_DED_COUNT)
6761         },
6762         { "GMIRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6763           SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_PAGEMEM_SED_COUNT),
6764           0, 0
6765         },
6766         { "GMIWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6767           SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_PAGEMEM_SED_COUNT),
6768           0, 0
6769         },
6770         { "MAM_D0MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6771           SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D0MEM_SED_COUNT),
6772           0, 0
6773         },
6774         { "MAM_D1MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6775           SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D1MEM_SED_COUNT),
6776           0, 0
6777         },
6778         { "MAM_D2MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6779           SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D2MEM_SED_COUNT),
6780           0, 0
6781         },
6782         { "MAM_D3MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6783           SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D3MEM_SED_COUNT),
6784           0, 0
6785         }
6786 };
6787
6788 static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev,
6789                                      void *inject_if, uint32_t instance_mask)
6790 {
6791         struct ras_inject_if *info = (struct ras_inject_if *)inject_if;
6792         int ret;
6793         struct ta_ras_trigger_error_input block_info = { 0 };
6794
6795         if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
6796                 return -EINVAL;
6797
6798         if (info->head.sub_block_index >= ARRAY_SIZE(ras_gfx_subblocks))
6799                 return -EINVAL;
6800
6801         if (!ras_gfx_subblocks[info->head.sub_block_index].name)
6802                 return -EPERM;
6803
6804         if (!(ras_gfx_subblocks[info->head.sub_block_index].hw_supported_error_type &
6805               info->head.type)) {
6806                 DRM_ERROR("GFX Subblock %s, hardware do not support type 0x%x\n",
6807                         ras_gfx_subblocks[info->head.sub_block_index].name,
6808                         info->head.type);
6809                 return -EPERM;
6810         }
6811
6812         if (!(ras_gfx_subblocks[info->head.sub_block_index].sw_supported_error_type &
6813               info->head.type)) {
6814                 DRM_ERROR("GFX Subblock %s, driver do not support type 0x%x\n",
6815                         ras_gfx_subblocks[info->head.sub_block_index].name,
6816                         info->head.type);
6817                 return -EPERM;
6818         }
6819
6820         block_info.block_id = amdgpu_ras_block_to_ta(info->head.block);
6821         block_info.sub_block_index =
6822                 ras_gfx_subblocks[info->head.sub_block_index].ta_subblock;
6823         block_info.inject_error_type = amdgpu_ras_error_to_ta(info->head.type);
6824         block_info.address = info->address;
6825         block_info.value = info->value;
6826
6827         mutex_lock(&adev->grbm_idx_mutex);
6828         ret = psp_ras_trigger_error(&adev->psp, &block_info, instance_mask);
6829         mutex_unlock(&adev->grbm_idx_mutex);
6830
6831         return ret;
6832 }
6833
6834 static const char * const vml2_mems[] = {
6835         "UTC_VML2_BANK_CACHE_0_BIGK_MEM0",
6836         "UTC_VML2_BANK_CACHE_0_BIGK_MEM1",
6837         "UTC_VML2_BANK_CACHE_0_4K_MEM0",
6838         "UTC_VML2_BANK_CACHE_0_4K_MEM1",
6839         "UTC_VML2_BANK_CACHE_1_BIGK_MEM0",
6840         "UTC_VML2_BANK_CACHE_1_BIGK_MEM1",
6841         "UTC_VML2_BANK_CACHE_1_4K_MEM0",
6842         "UTC_VML2_BANK_CACHE_1_4K_MEM1",
6843         "UTC_VML2_BANK_CACHE_2_BIGK_MEM0",
6844         "UTC_VML2_BANK_CACHE_2_BIGK_MEM1",
6845         "UTC_VML2_BANK_CACHE_2_4K_MEM0",
6846         "UTC_VML2_BANK_CACHE_2_4K_MEM1",
6847         "UTC_VML2_BANK_CACHE_3_BIGK_MEM0",
6848         "UTC_VML2_BANK_CACHE_3_BIGK_MEM1",
6849         "UTC_VML2_BANK_CACHE_3_4K_MEM0",
6850         "UTC_VML2_BANK_CACHE_3_4K_MEM1",
6851 };
6852
6853 static const char * const vml2_walker_mems[] = {
6854         "UTC_VML2_CACHE_PDE0_MEM0",
6855         "UTC_VML2_CACHE_PDE0_MEM1",
6856         "UTC_VML2_CACHE_PDE1_MEM0",
6857         "UTC_VML2_CACHE_PDE1_MEM1",
6858         "UTC_VML2_CACHE_PDE2_MEM0",
6859         "UTC_VML2_CACHE_PDE2_MEM1",
6860         "UTC_VML2_RDIF_LOG_FIFO",
6861 };
6862
6863 static const char * const atc_l2_cache_2m_mems[] = {
6864         "UTC_ATCL2_CACHE_2M_BANK0_WAY0_MEM",
6865         "UTC_ATCL2_CACHE_2M_BANK0_WAY1_MEM",
6866         "UTC_ATCL2_CACHE_2M_BANK1_WAY0_MEM",
6867         "UTC_ATCL2_CACHE_2M_BANK1_WAY1_MEM",
6868 };
6869
6870 static const char *atc_l2_cache_4k_mems[] = {
6871         "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM0",
6872         "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM1",
6873         "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM2",
6874         "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM3",
6875         "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM4",
6876         "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM5",
6877         "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM6",
6878         "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM7",
6879         "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM0",
6880         "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM1",
6881         "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM2",
6882         "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM3",
6883         "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM4",
6884         "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM5",
6885         "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM6",
6886         "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM7",
6887         "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM0",
6888         "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM1",
6889         "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM2",
6890         "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM3",
6891         "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM4",
6892         "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM5",
6893         "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM6",
6894         "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM7",
6895         "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM0",
6896         "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM1",
6897         "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM2",
6898         "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM3",
6899         "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM4",
6900         "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM5",
6901         "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM6",
6902         "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM7",
6903 };
6904
6905 static int gfx_v9_0_query_utc_edc_status(struct amdgpu_device *adev,
6906                                          struct ras_err_data *err_data)
6907 {
6908         uint32_t i, data;
6909         uint32_t sec_count, ded_count;
6910
6911         WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
6912         WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT, 0);
6913         WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
6914         WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT, 0);
6915         WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
6916         WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT, 0);
6917         WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
6918         WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT, 0);
6919
6920         for (i = 0; i < ARRAY_SIZE(vml2_mems); i++) {
6921                 WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, i);
6922                 data = RREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT);
6923
6924                 sec_count = REG_GET_FIELD(data, VM_L2_MEM_ECC_CNT, SEC_COUNT);
6925                 if (sec_count) {
6926                         dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6927                                 "SEC %d\n", i, vml2_mems[i], sec_count);
6928                         err_data->ce_count += sec_count;
6929                 }
6930
6931                 ded_count = REG_GET_FIELD(data, VM_L2_MEM_ECC_CNT, DED_COUNT);
6932                 if (ded_count) {
6933                         dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6934                                 "DED %d\n", i, vml2_mems[i], ded_count);
6935                         err_data->ue_count += ded_count;
6936                 }
6937         }
6938
6939         for (i = 0; i < ARRAY_SIZE(vml2_walker_mems); i++) {
6940                 WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, i);
6941                 data = RREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT);
6942
6943                 sec_count = REG_GET_FIELD(data, VM_L2_WALKER_MEM_ECC_CNT,
6944                                                 SEC_COUNT);
6945                 if (sec_count) {
6946                         dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6947                                 "SEC %d\n", i, vml2_walker_mems[i], sec_count);
6948                         err_data->ce_count += sec_count;
6949                 }
6950
6951                 ded_count = REG_GET_FIELD(data, VM_L2_WALKER_MEM_ECC_CNT,
6952                                                 DED_COUNT);
6953                 if (ded_count) {
6954                         dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6955                                 "DED %d\n", i, vml2_walker_mems[i], ded_count);
6956                         err_data->ue_count += ded_count;
6957                 }
6958         }
6959
6960         for (i = 0; i < ARRAY_SIZE(atc_l2_cache_2m_mems); i++) {
6961                 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, i);
6962                 data = RREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT);
6963
6964                 sec_count = (data & 0x00006000L) >> 0xd;
6965                 if (sec_count) {
6966                         dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6967                                 "SEC %d\n", i, atc_l2_cache_2m_mems[i],
6968                                 sec_count);
6969                         err_data->ce_count += sec_count;
6970                 }
6971         }
6972
6973         for (i = 0; i < ARRAY_SIZE(atc_l2_cache_4k_mems); i++) {
6974                 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, i);
6975                 data = RREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT);
6976
6977                 sec_count = (data & 0x00006000L) >> 0xd;
6978                 if (sec_count) {
6979                         dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6980                                 "SEC %d\n", i, atc_l2_cache_4k_mems[i],
6981                                 sec_count);
6982                         err_data->ce_count += sec_count;
6983                 }
6984
6985                 ded_count = (data & 0x00018000L) >> 0xf;
6986                 if (ded_count) {
6987                         dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6988                                 "DED %d\n", i, atc_l2_cache_4k_mems[i],
6989                                 ded_count);
6990                         err_data->ue_count += ded_count;
6991                 }
6992         }
6993
6994         WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
6995         WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
6996         WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
6997         WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
6998
6999         return 0;
7000 }
7001
7002 static int gfx_v9_0_ras_error_count(struct amdgpu_device *adev,
7003         const struct soc15_reg_entry *reg,
7004         uint32_t se_id, uint32_t inst_id, uint32_t value,
7005         uint32_t *sec_count, uint32_t *ded_count)
7006 {
7007         uint32_t i;
7008         uint32_t sec_cnt, ded_cnt;
7009
7010         for (i = 0; i < ARRAY_SIZE(gfx_v9_0_ras_fields); i++) {
7011                 if(gfx_v9_0_ras_fields[i].reg_offset != reg->reg_offset ||
7012                         gfx_v9_0_ras_fields[i].seg != reg->seg ||
7013                         gfx_v9_0_ras_fields[i].inst != reg->inst)
7014                         continue;
7015
7016                 sec_cnt = (value &
7017                                 gfx_v9_0_ras_fields[i].sec_count_mask) >>
7018                                 gfx_v9_0_ras_fields[i].sec_count_shift;
7019                 if (sec_cnt) {
7020                         dev_info(adev->dev, "GFX SubBlock %s, "
7021                                 "Instance[%d][%d], SEC %d\n",
7022                                 gfx_v9_0_ras_fields[i].name,
7023                                 se_id, inst_id,
7024                                 sec_cnt);
7025                         *sec_count += sec_cnt;
7026                 }
7027
7028                 ded_cnt = (value &
7029                                 gfx_v9_0_ras_fields[i].ded_count_mask) >>
7030                                 gfx_v9_0_ras_fields[i].ded_count_shift;
7031                 if (ded_cnt) {
7032                         dev_info(adev->dev, "GFX SubBlock %s, "
7033                                 "Instance[%d][%d], DED %d\n",
7034                                 gfx_v9_0_ras_fields[i].name,
7035                                 se_id, inst_id,
7036                                 ded_cnt);
7037                         *ded_count += ded_cnt;
7038                 }
7039         }
7040
7041         return 0;
7042 }
7043
7044 static void gfx_v9_0_reset_ras_error_count(struct amdgpu_device *adev)
7045 {
7046         int i, j, k;
7047
7048         if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
7049                 return;
7050
7051         /* read back registers to clear the counters */
7052         mutex_lock(&adev->grbm_idx_mutex);
7053         for (i = 0; i < ARRAY_SIZE(gfx_v9_0_edc_counter_regs); i++) {
7054                 for (j = 0; j < gfx_v9_0_edc_counter_regs[i].se_num; j++) {
7055                         for (k = 0; k < gfx_v9_0_edc_counter_regs[i].instance; k++) {
7056                                 amdgpu_gfx_select_se_sh(adev, j, 0x0, k, 0);
7057                                 RREG32(SOC15_REG_ENTRY_OFFSET(gfx_v9_0_edc_counter_regs[i]));
7058                         }
7059                 }
7060         }
7061         WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, 0xe0000000);
7062         mutex_unlock(&adev->grbm_idx_mutex);
7063
7064         WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
7065         WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT, 0);
7066         WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
7067         WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT, 0);
7068         WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
7069         WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT, 0);
7070         WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
7071         WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT, 0);
7072
7073         for (i = 0; i < ARRAY_SIZE(vml2_mems); i++) {
7074                 WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, i);
7075                 RREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT);
7076         }
7077
7078         for (i = 0; i < ARRAY_SIZE(vml2_walker_mems); i++) {
7079                 WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, i);
7080                 RREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT);
7081         }
7082
7083         for (i = 0; i < ARRAY_SIZE(atc_l2_cache_2m_mems); i++) {
7084                 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, i);
7085                 RREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT);
7086         }
7087
7088         for (i = 0; i < ARRAY_SIZE(atc_l2_cache_4k_mems); i++) {
7089                 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, i);
7090                 RREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT);
7091         }
7092
7093         WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
7094         WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
7095         WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
7096         WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
7097 }
7098
7099 static void gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev,
7100                                           void *ras_error_status)
7101 {
7102         struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
7103         uint32_t sec_count = 0, ded_count = 0;
7104         uint32_t i, j, k;
7105         uint32_t reg_value;
7106
7107         if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
7108                 return;
7109
7110         err_data->ue_count = 0;
7111         err_data->ce_count = 0;
7112
7113         mutex_lock(&adev->grbm_idx_mutex);
7114
7115         for (i = 0; i < ARRAY_SIZE(gfx_v9_0_edc_counter_regs); i++) {
7116                 for (j = 0; j < gfx_v9_0_edc_counter_regs[i].se_num; j++) {
7117                         for (k = 0; k < gfx_v9_0_edc_counter_regs[i].instance; k++) {
7118                                 amdgpu_gfx_select_se_sh(adev, j, 0, k, 0);
7119                                 reg_value =
7120                                         RREG32(SOC15_REG_ENTRY_OFFSET(gfx_v9_0_edc_counter_regs[i]));
7121                                 if (reg_value)
7122                                         gfx_v9_0_ras_error_count(adev,
7123                                                 &gfx_v9_0_edc_counter_regs[i],
7124                                                 j, k, reg_value,
7125                                                 &sec_count, &ded_count);
7126                         }
7127                 }
7128         }
7129
7130         err_data->ce_count += sec_count;
7131         err_data->ue_count += ded_count;
7132
7133         amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
7134         mutex_unlock(&adev->grbm_idx_mutex);
7135
7136         gfx_v9_0_query_utc_edc_status(adev, err_data);
7137 }
7138
7139 static void gfx_v9_0_emit_mem_sync(struct amdgpu_ring *ring)
7140 {
7141         const unsigned int cp_coher_cntl =
7142                         PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_SH_ICACHE_ACTION_ENA(1) |
7143                         PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_SH_KCACHE_ACTION_ENA(1) |
7144                         PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TC_ACTION_ENA(1) |
7145                         PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TCL1_ACTION_ENA(1) |
7146                         PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TC_WB_ACTION_ENA(1);
7147
7148         /* ACQUIRE_MEM -make one or more surfaces valid for use by the subsequent operations */
7149         amdgpu_ring_write(ring, PACKET3(PACKET3_ACQUIRE_MEM, 5));
7150         amdgpu_ring_write(ring, cp_coher_cntl); /* CP_COHER_CNTL */
7151         amdgpu_ring_write(ring, 0xffffffff);  /* CP_COHER_SIZE */
7152         amdgpu_ring_write(ring, 0xffffff);  /* CP_COHER_SIZE_HI */
7153         amdgpu_ring_write(ring, 0); /* CP_COHER_BASE */
7154         amdgpu_ring_write(ring, 0);  /* CP_COHER_BASE_HI */
7155         amdgpu_ring_write(ring, 0x0000000A); /* POLL_INTERVAL */
7156 }
7157
7158 static void gfx_v9_0_emit_wave_limit_cs(struct amdgpu_ring *ring,
7159                                         uint32_t pipe, bool enable)
7160 {
7161         struct amdgpu_device *adev = ring->adev;
7162         uint32_t val;
7163         uint32_t wcl_cs_reg;
7164
7165         /* mmSPI_WCL_PIPE_PERCENT_CS[0-7]_DEFAULT values are same */
7166         val = enable ? 0x1 : mmSPI_WCL_PIPE_PERCENT_CS0_DEFAULT;
7167
7168         switch (pipe) {
7169         case 0:
7170                 wcl_cs_reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_CS0);
7171                 break;
7172         case 1:
7173                 wcl_cs_reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_CS1);
7174                 break;
7175         case 2:
7176                 wcl_cs_reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_CS2);
7177                 break;
7178         case 3:
7179                 wcl_cs_reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_CS3);
7180                 break;
7181         default:
7182                 DRM_DEBUG("invalid pipe %d\n", pipe);
7183                 return;
7184         }
7185
7186         amdgpu_ring_emit_wreg(ring, wcl_cs_reg, val);
7187
7188 }
7189 static void gfx_v9_0_emit_wave_limit(struct amdgpu_ring *ring, bool enable)
7190 {
7191         struct amdgpu_device *adev = ring->adev;
7192         uint32_t val;
7193         int i;
7194
7195
7196         /* mmSPI_WCL_PIPE_PERCENT_GFX is 7 bit multiplier register to limit
7197          * number of gfx waves. Setting 5 bit will make sure gfx only gets
7198          * around 25% of gpu resources.
7199          */
7200         val = enable ? 0x1f : mmSPI_WCL_PIPE_PERCENT_GFX_DEFAULT;
7201         amdgpu_ring_emit_wreg(ring,
7202                               SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_GFX),
7203                               val);
7204
7205         /* Restrict waves for normal/low priority compute queues as well
7206          * to get best QoS for high priority compute jobs.
7207          *
7208          * amdgpu controls only 1st ME(0-3 CS pipes).
7209          */
7210         for (i = 0; i < adev->gfx.mec.num_pipe_per_mec; i++) {
7211                 if (i != ring->pipe)
7212                         gfx_v9_0_emit_wave_limit_cs(ring, i, enable);
7213
7214         }
7215 }
7216
7217 static void gfx_v9_ring_insert_nop(struct amdgpu_ring *ring, uint32_t num_nop)
7218 {
7219         /* Header itself is a NOP packet */
7220         if (num_nop == 1) {
7221                 amdgpu_ring_write(ring, ring->funcs->nop);
7222                 return;
7223         }
7224
7225         /* Max HW optimization till 0x3ffe, followed by remaining one NOP at a time*/
7226         amdgpu_ring_write(ring, PACKET3(PACKET3_NOP, min(num_nop - 2, 0x3ffe)));
7227
7228         /* Header is at index 0, followed by num_nops - 1 NOP packet's */
7229         amdgpu_ring_insert_nop(ring, num_nop - 1);
7230 }
7231
7232 static int gfx_v9_0_reset_kgq(struct amdgpu_ring *ring, unsigned int vmid)
7233 {
7234         struct amdgpu_device *adev = ring->adev;
7235         struct amdgpu_kiq *kiq = &adev->gfx.kiq[0];
7236         struct amdgpu_ring *kiq_ring = &kiq->ring;
7237         unsigned long flags;
7238         u32 tmp;
7239         int r;
7240
7241         if (amdgpu_sriov_vf(adev))
7242                 return -EINVAL;
7243
7244         if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues)
7245                 return -EINVAL;
7246
7247         spin_lock_irqsave(&kiq->ring_lock, flags);
7248
7249         if (amdgpu_ring_alloc(kiq_ring, 5)) {
7250                 spin_unlock_irqrestore(&kiq->ring_lock, flags);
7251                 return -ENOMEM;
7252         }
7253
7254         tmp = REG_SET_FIELD(0, CP_VMID_RESET, RESET_REQUEST, 1 << vmid);
7255         gfx_v9_0_ring_emit_wreg(kiq_ring,
7256                                  SOC15_REG_OFFSET(GC, 0, mmCP_VMID_RESET), tmp);
7257         amdgpu_ring_commit(kiq_ring);
7258
7259         spin_unlock_irqrestore(&kiq->ring_lock, flags);
7260
7261         r = amdgpu_ring_test_ring(kiq_ring);
7262         if (r)
7263                 return r;
7264
7265         if (amdgpu_ring_alloc(ring, 7 + 7 + 5))
7266                 return -ENOMEM;
7267         gfx_v9_0_ring_emit_fence(ring, ring->fence_drv.gpu_addr,
7268                                  ring->fence_drv.sync_seq, AMDGPU_FENCE_FLAG_EXEC);
7269         gfx_v9_0_ring_emit_reg_wait(ring,
7270                                     SOC15_REG_OFFSET(GC, 0, mmCP_VMID_RESET), 0, 0xffff);
7271         gfx_v9_0_ring_emit_wreg(ring,
7272                                 SOC15_REG_OFFSET(GC, 0, mmCP_VMID_RESET), 0);
7273
7274         return amdgpu_ring_test_ring(ring);
7275 }
7276
7277 static int gfx_v9_0_reset_kcq(struct amdgpu_ring *ring,
7278                               unsigned int vmid)
7279 {
7280         struct amdgpu_device *adev = ring->adev;
7281         struct amdgpu_kiq *kiq = &adev->gfx.kiq[0];
7282         struct amdgpu_ring *kiq_ring = &kiq->ring;
7283         unsigned long flags;
7284         int i, r;
7285
7286         if (amdgpu_sriov_vf(adev))
7287                 return -EINVAL;
7288
7289         if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues)
7290                 return -EINVAL;
7291
7292         spin_lock_irqsave(&kiq->ring_lock, flags);
7293
7294         if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size)) {
7295                 spin_unlock_irqrestore(&kiq->ring_lock, flags);
7296                 return -ENOMEM;
7297         }
7298
7299         kiq->pmf->kiq_unmap_queues(kiq_ring, ring, RESET_QUEUES,
7300                                    0, 0);
7301         amdgpu_ring_commit(kiq_ring);
7302
7303         spin_unlock_irqrestore(&kiq->ring_lock, flags);
7304
7305         r = amdgpu_ring_test_ring(kiq_ring);
7306         if (r)
7307                 return r;
7308
7309         /* make sure dequeue is complete*/
7310         amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
7311         mutex_lock(&adev->srbm_mutex);
7312         soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0, 0);
7313         for (i = 0; i < adev->usec_timeout; i++) {
7314                 if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
7315                         break;
7316                 udelay(1);
7317         }
7318         if (i >= adev->usec_timeout)
7319                 r = -ETIMEDOUT;
7320         soc15_grbm_select(adev, 0, 0, 0, 0, 0);
7321         mutex_unlock(&adev->srbm_mutex);
7322         amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
7323         if (r) {
7324                 dev_err(adev->dev, "fail to wait on hqd deactive\n");
7325                 return r;
7326         }
7327
7328         r = amdgpu_bo_reserve(ring->mqd_obj, false);
7329         if (unlikely(r != 0)){
7330                 dev_err(adev->dev, "fail to resv mqd_obj\n");
7331                 return r;
7332         }
7333         r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
7334         if (!r) {
7335                 r = gfx_v9_0_kcq_init_queue(ring, true);
7336                 amdgpu_bo_kunmap(ring->mqd_obj);
7337                 ring->mqd_ptr = NULL;
7338         }
7339         amdgpu_bo_unreserve(ring->mqd_obj);
7340         if (r) {
7341                 dev_err(adev->dev, "fail to unresv mqd_obj\n");
7342                 return r;
7343         }
7344         spin_lock_irqsave(&kiq->ring_lock, flags);
7345         r = amdgpu_ring_alloc(kiq_ring, kiq->pmf->map_queues_size);
7346         if (r) {
7347                 spin_unlock_irqrestore(&kiq->ring_lock, flags);
7348                 return -ENOMEM;
7349         }
7350         kiq->pmf->kiq_map_queues(kiq_ring, ring);
7351         amdgpu_ring_commit(kiq_ring);
7352         spin_unlock_irqrestore(&kiq->ring_lock, flags);
7353         r = amdgpu_ring_test_ring(kiq_ring);
7354         if (r) {
7355                 DRM_ERROR("fail to remap queue\n");
7356                 return r;
7357         }
7358         return amdgpu_ring_test_ring(ring);
7359 }
7360
7361 static void gfx_v9_ip_print(struct amdgpu_ip_block *ip_block, struct drm_printer *p)
7362 {
7363         struct amdgpu_device *adev = ip_block->adev;
7364         uint32_t i, j, k, reg, index = 0;
7365         uint32_t reg_count = ARRAY_SIZE(gc_reg_list_9);
7366
7367         if (!adev->gfx.ip_dump_core)
7368                 return;
7369
7370         for (i = 0; i < reg_count; i++)
7371                 drm_printf(p, "%-50s \t 0x%08x\n",
7372                            gc_reg_list_9[i].reg_name,
7373                            adev->gfx.ip_dump_core[i]);
7374
7375         /* print compute queue registers for all instances */
7376         if (!adev->gfx.ip_dump_compute_queues)
7377                 return;
7378
7379         reg_count = ARRAY_SIZE(gc_cp_reg_list_9);
7380         drm_printf(p, "\nnum_mec: %d num_pipe: %d num_queue: %d\n",
7381                    adev->gfx.mec.num_mec,
7382                    adev->gfx.mec.num_pipe_per_mec,
7383                    adev->gfx.mec.num_queue_per_pipe);
7384
7385         for (i = 0; i < adev->gfx.mec.num_mec; i++) {
7386                 for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) {
7387                         for (k = 0; k < adev->gfx.mec.num_queue_per_pipe; k++) {
7388                                 drm_printf(p, "\nmec %d, pipe %d, queue %d\n", i, j, k);
7389                                 for (reg = 0; reg < reg_count; reg++) {
7390                                         drm_printf(p, "%-50s \t 0x%08x\n",
7391                                                    gc_cp_reg_list_9[reg].reg_name,
7392                                                    adev->gfx.ip_dump_compute_queues[index + reg]);
7393                                 }
7394                                 index += reg_count;
7395                         }
7396                 }
7397         }
7398
7399 }
7400
7401 static void gfx_v9_ip_dump(struct amdgpu_ip_block *ip_block)
7402 {
7403         struct amdgpu_device *adev = ip_block->adev;
7404         uint32_t i, j, k, reg, index = 0;
7405         uint32_t reg_count = ARRAY_SIZE(gc_reg_list_9);
7406
7407         if (!adev->gfx.ip_dump_core || !adev->gfx.num_gfx_rings)
7408                 return;
7409
7410         amdgpu_gfx_off_ctrl(adev, false);
7411         for (i = 0; i < reg_count; i++)
7412                 adev->gfx.ip_dump_core[i] = RREG32(SOC15_REG_ENTRY_OFFSET(gc_reg_list_9[i]));
7413         amdgpu_gfx_off_ctrl(adev, true);
7414
7415         /* dump compute queue registers for all instances */
7416         if (!adev->gfx.ip_dump_compute_queues)
7417                 return;
7418
7419         reg_count = ARRAY_SIZE(gc_cp_reg_list_9);
7420         amdgpu_gfx_off_ctrl(adev, false);
7421         mutex_lock(&adev->srbm_mutex);
7422         for (i = 0; i < adev->gfx.mec.num_mec; i++) {
7423                 for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) {
7424                         for (k = 0; k < adev->gfx.mec.num_queue_per_pipe; k++) {
7425                                 /* ME0 is for GFX so start from 1 for CP */
7426                                 soc15_grbm_select(adev, 1 + i, j, k, 0, 0);
7427
7428                                 for (reg = 0; reg < reg_count; reg++) {
7429                                         adev->gfx.ip_dump_compute_queues[index + reg] =
7430                                                 RREG32(SOC15_REG_ENTRY_OFFSET(
7431                                                         gc_cp_reg_list_9[reg]));
7432                                 }
7433                                 index += reg_count;
7434                         }
7435                 }
7436         }
7437         soc15_grbm_select(adev, 0, 0, 0, 0, 0);
7438         mutex_unlock(&adev->srbm_mutex);
7439         amdgpu_gfx_off_ctrl(adev, true);
7440
7441 }
7442
7443 static void gfx_v9_0_ring_emit_cleaner_shader(struct amdgpu_ring *ring)
7444 {
7445         /* Emit the cleaner shader */
7446         amdgpu_ring_write(ring, PACKET3(PACKET3_RUN_CLEANER_SHADER, 0));
7447         amdgpu_ring_write(ring, 0);  /* RESERVED field, programmed to zero */
7448 }
7449
7450 static const struct amd_ip_funcs gfx_v9_0_ip_funcs = {
7451         .name = "gfx_v9_0",
7452         .early_init = gfx_v9_0_early_init,
7453         .late_init = gfx_v9_0_late_init,
7454         .sw_init = gfx_v9_0_sw_init,
7455         .sw_fini = gfx_v9_0_sw_fini,
7456         .hw_init = gfx_v9_0_hw_init,
7457         .hw_fini = gfx_v9_0_hw_fini,
7458         .suspend = gfx_v9_0_suspend,
7459         .resume = gfx_v9_0_resume,
7460         .is_idle = gfx_v9_0_is_idle,
7461         .wait_for_idle = gfx_v9_0_wait_for_idle,
7462         .soft_reset = gfx_v9_0_soft_reset,
7463         .set_clockgating_state = gfx_v9_0_set_clockgating_state,
7464         .set_powergating_state = gfx_v9_0_set_powergating_state,
7465         .get_clockgating_state = gfx_v9_0_get_clockgating_state,
7466         .dump_ip_state = gfx_v9_ip_dump,
7467         .print_ip_state = gfx_v9_ip_print,
7468 };
7469
7470 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = {
7471         .type = AMDGPU_RING_TYPE_GFX,
7472         .align_mask = 0xff,
7473         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
7474         .support_64bit_ptrs = true,
7475         .secure_submission_supported = true,
7476         .get_rptr = gfx_v9_0_ring_get_rptr_gfx,
7477         .get_wptr = gfx_v9_0_ring_get_wptr_gfx,
7478         .set_wptr = gfx_v9_0_ring_set_wptr_gfx,
7479         .emit_frame_size = /* totally 242 maximum if 16 IBs */
7480                 5 +  /* COND_EXEC */
7481                 7 +  /* PIPELINE_SYNC */
7482                 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
7483                 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
7484                 2 + /* VM_FLUSH */
7485                 8 +  /* FENCE for VM_FLUSH */
7486                 20 + /* GDS switch */
7487                 4 + /* double SWITCH_BUFFER,
7488                        the first COND_EXEC jump to the place just
7489                            prior to this double SWITCH_BUFFER  */
7490                 5 + /* COND_EXEC */
7491                 7 +      /*     HDP_flush */
7492                 4 +      /*     VGT_flush */
7493                 14 + /* CE_META */
7494                 31 + /* DE_META */
7495                 3 + /* CNTX_CTRL */
7496                 5 + /* HDP_INVL */
7497                 8 + 8 + /* FENCE x2 */
7498                 2 + /* SWITCH_BUFFER */
7499                 7 + /* gfx_v9_0_emit_mem_sync */
7500                 2, /* gfx_v9_0_ring_emit_cleaner_shader */
7501         .emit_ib_size = 4, /* gfx_v9_0_ring_emit_ib_gfx */
7502         .emit_ib = gfx_v9_0_ring_emit_ib_gfx,
7503         .emit_fence = gfx_v9_0_ring_emit_fence,
7504         .emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
7505         .emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
7506         .emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
7507         .emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
7508         .test_ring = gfx_v9_0_ring_test_ring,
7509         .insert_nop = gfx_v9_ring_insert_nop,
7510         .pad_ib = amdgpu_ring_generic_pad_ib,
7511         .emit_switch_buffer = gfx_v9_ring_emit_sb,
7512         .emit_cntxcntl = gfx_v9_ring_emit_cntxcntl,
7513         .init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec,
7514         .preempt_ib = gfx_v9_0_ring_preempt_ib,
7515         .emit_frame_cntl = gfx_v9_0_ring_emit_frame_cntl,
7516         .emit_wreg = gfx_v9_0_ring_emit_wreg,
7517         .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
7518         .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
7519         .soft_recovery = gfx_v9_0_ring_soft_recovery,
7520         .emit_mem_sync = gfx_v9_0_emit_mem_sync,
7521         .reset = gfx_v9_0_reset_kgq,
7522         .emit_cleaner_shader = gfx_v9_0_ring_emit_cleaner_shader,
7523         .begin_use = amdgpu_gfx_enforce_isolation_ring_begin_use,
7524         .end_use = amdgpu_gfx_enforce_isolation_ring_end_use,
7525 };
7526
7527 static const struct amdgpu_ring_funcs gfx_v9_0_sw_ring_funcs_gfx = {
7528         .type = AMDGPU_RING_TYPE_GFX,
7529         .align_mask = 0xff,
7530         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
7531         .support_64bit_ptrs = true,
7532         .secure_submission_supported = true,
7533         .get_rptr = amdgpu_sw_ring_get_rptr_gfx,
7534         .get_wptr = amdgpu_sw_ring_get_wptr_gfx,
7535         .set_wptr = amdgpu_sw_ring_set_wptr_gfx,
7536         .emit_frame_size = /* totally 242 maximum if 16 IBs */
7537                 5 +  /* COND_EXEC */
7538                 7 +  /* PIPELINE_SYNC */
7539                 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
7540                 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
7541                 2 + /* VM_FLUSH */
7542                 8 +  /* FENCE for VM_FLUSH */
7543                 20 + /* GDS switch */
7544                 4 + /* double SWITCH_BUFFER,
7545                      * the first COND_EXEC jump to the place just
7546                      * prior to this double SWITCH_BUFFER
7547                      */
7548                 5 + /* COND_EXEC */
7549                 7 +      /*     HDP_flush */
7550                 4 +      /*     VGT_flush */
7551                 14 + /* CE_META */
7552                 31 + /* DE_META */
7553                 3 + /* CNTX_CTRL */
7554                 5 + /* HDP_INVL */
7555                 8 + 8 + /* FENCE x2 */
7556                 2 + /* SWITCH_BUFFER */
7557                 7 + /* gfx_v9_0_emit_mem_sync */
7558                 2, /* gfx_v9_0_ring_emit_cleaner_shader */
7559         .emit_ib_size = 4, /* gfx_v9_0_ring_emit_ib_gfx */
7560         .emit_ib = gfx_v9_0_ring_emit_ib_gfx,
7561         .emit_fence = gfx_v9_0_ring_emit_fence,
7562         .emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
7563         .emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
7564         .emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
7565         .emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
7566         .test_ring = gfx_v9_0_ring_test_ring,
7567         .test_ib = gfx_v9_0_ring_test_ib,
7568         .insert_nop = gfx_v9_ring_insert_nop,
7569         .pad_ib = amdgpu_ring_generic_pad_ib,
7570         .emit_switch_buffer = gfx_v9_ring_emit_sb,
7571         .emit_cntxcntl = gfx_v9_ring_emit_cntxcntl,
7572         .init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec,
7573         .emit_frame_cntl = gfx_v9_0_ring_emit_frame_cntl,
7574         .emit_wreg = gfx_v9_0_ring_emit_wreg,
7575         .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
7576         .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
7577         .soft_recovery = gfx_v9_0_ring_soft_recovery,
7578         .emit_mem_sync = gfx_v9_0_emit_mem_sync,
7579         .patch_cntl = gfx_v9_0_ring_patch_cntl,
7580         .patch_de = gfx_v9_0_ring_patch_de_meta,
7581         .patch_ce = gfx_v9_0_ring_patch_ce_meta,
7582         .emit_cleaner_shader = gfx_v9_0_ring_emit_cleaner_shader,
7583         .begin_use = amdgpu_gfx_enforce_isolation_ring_begin_use,
7584         .end_use = amdgpu_gfx_enforce_isolation_ring_end_use,
7585 };
7586
7587 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = {
7588         .type = AMDGPU_RING_TYPE_COMPUTE,
7589         .align_mask = 0xff,
7590         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
7591         .support_64bit_ptrs = true,
7592         .get_rptr = gfx_v9_0_ring_get_rptr_compute,
7593         .get_wptr = gfx_v9_0_ring_get_wptr_compute,
7594         .set_wptr = gfx_v9_0_ring_set_wptr_compute,
7595         .emit_frame_size =
7596                 20 + /* gfx_v9_0_ring_emit_gds_switch */
7597                 7 + /* gfx_v9_0_ring_emit_hdp_flush */
7598                 5 + /* hdp invalidate */
7599                 7 + /* gfx_v9_0_ring_emit_pipeline_sync */
7600                 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
7601                 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
7602                 8 + 8 + 8 + /* gfx_v9_0_ring_emit_fence x3 for user fence, vm fence */
7603                 7 + /* gfx_v9_0_emit_mem_sync */
7604                 5 + /* gfx_v9_0_emit_wave_limit for updating mmSPI_WCL_PIPE_PERCENT_GFX register */
7605                 15 + /* for updating 3 mmSPI_WCL_PIPE_PERCENT_CS registers */
7606                 2, /* gfx_v9_0_ring_emit_cleaner_shader */
7607         .emit_ib_size = 7, /* gfx_v9_0_ring_emit_ib_compute */
7608         .emit_ib = gfx_v9_0_ring_emit_ib_compute,
7609         .emit_fence = gfx_v9_0_ring_emit_fence,
7610         .emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
7611         .emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
7612         .emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
7613         .emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
7614         .test_ring = gfx_v9_0_ring_test_ring,
7615         .test_ib = gfx_v9_0_ring_test_ib,
7616         .insert_nop = gfx_v9_ring_insert_nop,
7617         .pad_ib = amdgpu_ring_generic_pad_ib,
7618         .emit_wreg = gfx_v9_0_ring_emit_wreg,
7619         .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
7620         .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
7621         .soft_recovery = gfx_v9_0_ring_soft_recovery,
7622         .emit_mem_sync = gfx_v9_0_emit_mem_sync,
7623         .emit_wave_limit = gfx_v9_0_emit_wave_limit,
7624         .reset = gfx_v9_0_reset_kcq,
7625         .emit_cleaner_shader = gfx_v9_0_ring_emit_cleaner_shader,
7626         .begin_use = amdgpu_gfx_enforce_isolation_ring_begin_use,
7627         .end_use = amdgpu_gfx_enforce_isolation_ring_end_use,
7628 };
7629
7630 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = {
7631         .type = AMDGPU_RING_TYPE_KIQ,
7632         .align_mask = 0xff,
7633         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
7634         .support_64bit_ptrs = true,
7635         .get_rptr = gfx_v9_0_ring_get_rptr_compute,
7636         .get_wptr = gfx_v9_0_ring_get_wptr_compute,
7637         .set_wptr = gfx_v9_0_ring_set_wptr_compute,
7638         .emit_frame_size =
7639                 20 + /* gfx_v9_0_ring_emit_gds_switch */
7640                 7 + /* gfx_v9_0_ring_emit_hdp_flush */
7641                 5 + /* hdp invalidate */
7642                 7 + /* gfx_v9_0_ring_emit_pipeline_sync */
7643                 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
7644                 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
7645                 8 + 8 + 8, /* gfx_v9_0_ring_emit_fence_kiq x3 for user fence, vm fence */
7646         .emit_ib_size = 7, /* gfx_v9_0_ring_emit_ib_compute */
7647         .emit_fence = gfx_v9_0_ring_emit_fence_kiq,
7648         .test_ring = gfx_v9_0_ring_test_ring,
7649         .insert_nop = amdgpu_ring_insert_nop,
7650         .pad_ib = amdgpu_ring_generic_pad_ib,
7651         .emit_rreg = gfx_v9_0_ring_emit_rreg,
7652         .emit_wreg = gfx_v9_0_ring_emit_wreg,
7653         .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
7654         .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
7655 };
7656
7657 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev)
7658 {
7659         int i;
7660
7661         adev->gfx.kiq[0].ring.funcs = &gfx_v9_0_ring_funcs_kiq;
7662
7663         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
7664                 adev->gfx.gfx_ring[i].funcs = &gfx_v9_0_ring_funcs_gfx;
7665
7666         if (adev->gfx.mcbp && adev->gfx.num_gfx_rings) {
7667                 for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++)
7668                         adev->gfx.sw_gfx_ring[i].funcs = &gfx_v9_0_sw_ring_funcs_gfx;
7669         }
7670
7671         for (i = 0; i < adev->gfx.num_compute_rings; i++)
7672                 adev->gfx.compute_ring[i].funcs = &gfx_v9_0_ring_funcs_compute;
7673 }
7674
7675 static const struct amdgpu_irq_src_funcs gfx_v9_0_eop_irq_funcs = {
7676         .set = gfx_v9_0_set_eop_interrupt_state,
7677         .process = gfx_v9_0_eop_irq,
7678 };
7679
7680 static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_reg_irq_funcs = {
7681         .set = gfx_v9_0_set_priv_reg_fault_state,
7682         .process = gfx_v9_0_priv_reg_irq,
7683 };
7684
7685 static const struct amdgpu_irq_src_funcs gfx_v9_0_bad_op_irq_funcs = {
7686         .set = gfx_v9_0_set_bad_op_fault_state,
7687         .process = gfx_v9_0_bad_op_irq,
7688 };
7689
7690 static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_inst_irq_funcs = {
7691         .set = gfx_v9_0_set_priv_inst_fault_state,
7692         .process = gfx_v9_0_priv_inst_irq,
7693 };
7694
7695 static const struct amdgpu_irq_src_funcs gfx_v9_0_cp_ecc_error_irq_funcs = {
7696         .set = gfx_v9_0_set_cp_ecc_error_state,
7697         .process = amdgpu_gfx_cp_ecc_error_irq,
7698 };
7699
7700
7701 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev)
7702 {
7703         adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
7704         adev->gfx.eop_irq.funcs = &gfx_v9_0_eop_irq_funcs;
7705
7706         adev->gfx.priv_reg_irq.num_types = 1;
7707         adev->gfx.priv_reg_irq.funcs = &gfx_v9_0_priv_reg_irq_funcs;
7708
7709         adev->gfx.bad_op_irq.num_types = 1;
7710         adev->gfx.bad_op_irq.funcs = &gfx_v9_0_bad_op_irq_funcs;
7711
7712         adev->gfx.priv_inst_irq.num_types = 1;
7713         adev->gfx.priv_inst_irq.funcs = &gfx_v9_0_priv_inst_irq_funcs;
7714
7715         adev->gfx.cp_ecc_error_irq.num_types = 2; /*C5 ECC error and C9 FUE error*/
7716         adev->gfx.cp_ecc_error_irq.funcs = &gfx_v9_0_cp_ecc_error_irq_funcs;
7717 }
7718
7719 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev)
7720 {
7721         switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
7722         case IP_VERSION(9, 0, 1):
7723         case IP_VERSION(9, 2, 1):
7724         case IP_VERSION(9, 4, 0):
7725         case IP_VERSION(9, 2, 2):
7726         case IP_VERSION(9, 1, 0):
7727         case IP_VERSION(9, 4, 1):
7728         case IP_VERSION(9, 3, 0):
7729         case IP_VERSION(9, 4, 2):
7730                 adev->gfx.rlc.funcs = &gfx_v9_0_rlc_funcs;
7731                 break;
7732         default:
7733                 break;
7734         }
7735 }
7736
7737 static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev)
7738 {
7739         /* init asci gds info */
7740         switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
7741         case IP_VERSION(9, 0, 1):
7742         case IP_VERSION(9, 2, 1):
7743         case IP_VERSION(9, 4, 0):
7744                 adev->gds.gds_size = 0x10000;
7745                 break;
7746         case IP_VERSION(9, 2, 2):
7747         case IP_VERSION(9, 1, 0):
7748         case IP_VERSION(9, 4, 1):
7749                 adev->gds.gds_size = 0x1000;
7750                 break;
7751         case IP_VERSION(9, 4, 2):
7752                 /* aldebaran removed all the GDS internal memory,
7753                  * only support GWS opcode in kernel, like barrier
7754                  * semaphore.etc */
7755                 adev->gds.gds_size = 0;
7756                 break;
7757         default:
7758                 adev->gds.gds_size = 0x10000;
7759                 break;
7760         }
7761
7762         switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
7763         case IP_VERSION(9, 0, 1):
7764         case IP_VERSION(9, 4, 0):
7765                 adev->gds.gds_compute_max_wave_id = 0x7ff;
7766                 break;
7767         case IP_VERSION(9, 2, 1):
7768                 adev->gds.gds_compute_max_wave_id = 0x27f;
7769                 break;
7770         case IP_VERSION(9, 2, 2):
7771         case IP_VERSION(9, 1, 0):
7772                 if (adev->apu_flags & AMD_APU_IS_RAVEN2)
7773                         adev->gds.gds_compute_max_wave_id = 0x77; /* raven2 */
7774                 else
7775                         adev->gds.gds_compute_max_wave_id = 0x15f; /* raven1 */
7776                 break;
7777         case IP_VERSION(9, 4, 1):
7778                 adev->gds.gds_compute_max_wave_id = 0xfff;
7779                 break;
7780         case IP_VERSION(9, 4, 2):
7781                 /* deprecated for Aldebaran, no usage at all */
7782                 adev->gds.gds_compute_max_wave_id = 0;
7783                 break;
7784         default:
7785                 /* this really depends on the chip */
7786                 adev->gds.gds_compute_max_wave_id = 0x7ff;
7787                 break;
7788         }
7789
7790         adev->gds.gws_size = 64;
7791         adev->gds.oa_size = 16;
7792 }
7793
7794 static void gfx_v9_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
7795                                                  u32 bitmap)
7796 {
7797         u32 data;
7798
7799         if (!bitmap)
7800                 return;
7801
7802         data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
7803         data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
7804
7805         WREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG, data);
7806 }
7807
7808 static u32 gfx_v9_0_get_cu_active_bitmap(struct amdgpu_device *adev)
7809 {
7810         u32 data, mask;
7811
7812         data = RREG32_SOC15(GC, 0, mmCC_GC_SHADER_ARRAY_CONFIG);
7813         data |= RREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG);
7814
7815         data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
7816         data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
7817
7818         mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh);
7819
7820         return (~data) & mask;
7821 }
7822
7823 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
7824                                  struct amdgpu_cu_info *cu_info)
7825 {
7826         int i, j, k, counter, active_cu_number = 0;
7827         u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
7828         unsigned disable_masks[4 * 4];
7829
7830         if (!adev || !cu_info)
7831                 return -EINVAL;
7832
7833         /*
7834          * 16 comes from bitmap array size 4*4, and it can cover all gfx9 ASICs
7835          */
7836         if (adev->gfx.config.max_shader_engines *
7837                 adev->gfx.config.max_sh_per_se > 16)
7838                 return -EINVAL;
7839
7840         amdgpu_gfx_parse_disable_cu(disable_masks,
7841                                     adev->gfx.config.max_shader_engines,
7842                                     adev->gfx.config.max_sh_per_se);
7843
7844         mutex_lock(&adev->grbm_idx_mutex);
7845         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
7846                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
7847                         mask = 1;
7848                         ao_bitmap = 0;
7849                         counter = 0;
7850                         amdgpu_gfx_select_se_sh(adev, i, j, 0xffffffff, 0);
7851                         gfx_v9_0_set_user_cu_inactive_bitmap(
7852                                 adev, disable_masks[i * adev->gfx.config.max_sh_per_se + j]);
7853                         bitmap = gfx_v9_0_get_cu_active_bitmap(adev);
7854
7855                         /*
7856                          * The bitmap(and ao_cu_bitmap) in cu_info structure is
7857                          * 4x4 size array, and it's usually suitable for Vega
7858                          * ASICs which has 4*2 SE/SH layout.
7859                          * But for Arcturus, SE/SH layout is changed to 8*1.
7860                          * To mostly reduce the impact, we make it compatible
7861                          * with current bitmap array as below:
7862                          *    SE4,SH0 --> bitmap[0][1]
7863                          *    SE5,SH0 --> bitmap[1][1]
7864                          *    SE6,SH0 --> bitmap[2][1]
7865                          *    SE7,SH0 --> bitmap[3][1]
7866                          */
7867                         cu_info->bitmap[0][i % 4][j + i / 4] = bitmap;
7868
7869                         for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
7870                                 if (bitmap & mask) {
7871                                         if (counter < adev->gfx.config.max_cu_per_sh)
7872                                                 ao_bitmap |= mask;
7873                                         counter ++;
7874                                 }
7875                                 mask <<= 1;
7876                         }
7877                         active_cu_number += counter;
7878                         if (i < 2 && j < 2)
7879                                 ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
7880                         cu_info->ao_cu_bitmap[i % 4][j + i / 4] = ao_bitmap;
7881                 }
7882         }
7883         amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
7884         mutex_unlock(&adev->grbm_idx_mutex);
7885
7886         cu_info->number = active_cu_number;
7887         cu_info->ao_cu_mask = ao_cu_mask;
7888         cu_info->simd_per_cu = NUM_SIMD_PER_CU;
7889
7890         return 0;
7891 }
7892
7893 const struct amdgpu_ip_block_version gfx_v9_0_ip_block =
7894 {
7895         .type = AMD_IP_BLOCK_TYPE_GFX,
7896         .major = 9,
7897         .minor = 0,
7898         .rev = 0,
7899         .funcs = &gfx_v9_0_ip_funcs,
7900 };
This page took 0.500787 seconds and 4 git commands to generate.