]> Git Repo - linux.git/blob - drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
Merge tag 'for-5.18-rc1-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave...
[linux.git] / drivers / gpu / drm / amd / amdgpu / gfx_v9_0.c
1 /*
2  * Copyright 2016 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23
24 #include <linux/delay.h>
25 #include <linux/kernel.h>
26 #include <linux/firmware.h>
27 #include <linux/module.h>
28 #include <linux/pci.h>
29
30 #include "amdgpu.h"
31 #include "amdgpu_gfx.h"
32 #include "soc15.h"
33 #include "soc15d.h"
34 #include "amdgpu_atomfirmware.h"
35 #include "amdgpu_pm.h"
36
37 #include "gc/gc_9_0_offset.h"
38 #include "gc/gc_9_0_sh_mask.h"
39
40 #include "vega10_enum.h"
41
42 #include "soc15_common.h"
43 #include "clearstate_gfx9.h"
44 #include "v9_structs.h"
45
46 #include "ivsrcid/gfx/irqsrcs_gfx_9_0.h"
47
48 #include "amdgpu_ras.h"
49
50 #include "gfx_v9_4.h"
51 #include "gfx_v9_0.h"
52 #include "gfx_v9_4_2.h"
53
54 #include "asic_reg/pwr/pwr_10_0_offset.h"
55 #include "asic_reg/pwr/pwr_10_0_sh_mask.h"
56 #include "asic_reg/gc/gc_9_0_default.h"
57
58 #define GFX9_NUM_GFX_RINGS     1
59 #define GFX9_MEC_HPD_SIZE 4096
60 #define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L
61 #define RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET 0x00000000L
62
63 #define mmGCEA_PROBE_MAP                        0x070c
64 #define mmGCEA_PROBE_MAP_BASE_IDX               0
65
66 MODULE_FIRMWARE("amdgpu/vega10_ce.bin");
67 MODULE_FIRMWARE("amdgpu/vega10_pfp.bin");
68 MODULE_FIRMWARE("amdgpu/vega10_me.bin");
69 MODULE_FIRMWARE("amdgpu/vega10_mec.bin");
70 MODULE_FIRMWARE("amdgpu/vega10_mec2.bin");
71 MODULE_FIRMWARE("amdgpu/vega10_rlc.bin");
72
73 MODULE_FIRMWARE("amdgpu/vega12_ce.bin");
74 MODULE_FIRMWARE("amdgpu/vega12_pfp.bin");
75 MODULE_FIRMWARE("amdgpu/vega12_me.bin");
76 MODULE_FIRMWARE("amdgpu/vega12_mec.bin");
77 MODULE_FIRMWARE("amdgpu/vega12_mec2.bin");
78 MODULE_FIRMWARE("amdgpu/vega12_rlc.bin");
79
80 MODULE_FIRMWARE("amdgpu/vega20_ce.bin");
81 MODULE_FIRMWARE("amdgpu/vega20_pfp.bin");
82 MODULE_FIRMWARE("amdgpu/vega20_me.bin");
83 MODULE_FIRMWARE("amdgpu/vega20_mec.bin");
84 MODULE_FIRMWARE("amdgpu/vega20_mec2.bin");
85 MODULE_FIRMWARE("amdgpu/vega20_rlc.bin");
86
87 MODULE_FIRMWARE("amdgpu/raven_ce.bin");
88 MODULE_FIRMWARE("amdgpu/raven_pfp.bin");
89 MODULE_FIRMWARE("amdgpu/raven_me.bin");
90 MODULE_FIRMWARE("amdgpu/raven_mec.bin");
91 MODULE_FIRMWARE("amdgpu/raven_mec2.bin");
92 MODULE_FIRMWARE("amdgpu/raven_rlc.bin");
93
94 MODULE_FIRMWARE("amdgpu/picasso_ce.bin");
95 MODULE_FIRMWARE("amdgpu/picasso_pfp.bin");
96 MODULE_FIRMWARE("amdgpu/picasso_me.bin");
97 MODULE_FIRMWARE("amdgpu/picasso_mec.bin");
98 MODULE_FIRMWARE("amdgpu/picasso_mec2.bin");
99 MODULE_FIRMWARE("amdgpu/picasso_rlc.bin");
100 MODULE_FIRMWARE("amdgpu/picasso_rlc_am4.bin");
101
102 MODULE_FIRMWARE("amdgpu/raven2_ce.bin");
103 MODULE_FIRMWARE("amdgpu/raven2_pfp.bin");
104 MODULE_FIRMWARE("amdgpu/raven2_me.bin");
105 MODULE_FIRMWARE("amdgpu/raven2_mec.bin");
106 MODULE_FIRMWARE("amdgpu/raven2_mec2.bin");
107 MODULE_FIRMWARE("amdgpu/raven2_rlc.bin");
108 MODULE_FIRMWARE("amdgpu/raven_kicker_rlc.bin");
109
110 MODULE_FIRMWARE("amdgpu/arcturus_mec.bin");
111 MODULE_FIRMWARE("amdgpu/arcturus_rlc.bin");
112
113 MODULE_FIRMWARE("amdgpu/renoir_ce.bin");
114 MODULE_FIRMWARE("amdgpu/renoir_pfp.bin");
115 MODULE_FIRMWARE("amdgpu/renoir_me.bin");
116 MODULE_FIRMWARE("amdgpu/renoir_mec.bin");
117 MODULE_FIRMWARE("amdgpu/renoir_rlc.bin");
118
119 MODULE_FIRMWARE("amdgpu/green_sardine_ce.bin");
120 MODULE_FIRMWARE("amdgpu/green_sardine_pfp.bin");
121 MODULE_FIRMWARE("amdgpu/green_sardine_me.bin");
122 MODULE_FIRMWARE("amdgpu/green_sardine_mec.bin");
123 MODULE_FIRMWARE("amdgpu/green_sardine_mec2.bin");
124 MODULE_FIRMWARE("amdgpu/green_sardine_rlc.bin");
125
126 MODULE_FIRMWARE("amdgpu/aldebaran_mec.bin");
127 MODULE_FIRMWARE("amdgpu/aldebaran_mec2.bin");
128 MODULE_FIRMWARE("amdgpu/aldebaran_rlc.bin");
129
130 #define mmTCP_CHAN_STEER_0_ARCT                                                         0x0b03
131 #define mmTCP_CHAN_STEER_0_ARCT_BASE_IDX                                                        0
132 #define mmTCP_CHAN_STEER_1_ARCT                                                         0x0b04
133 #define mmTCP_CHAN_STEER_1_ARCT_BASE_IDX                                                        0
134 #define mmTCP_CHAN_STEER_2_ARCT                                                         0x0b09
135 #define mmTCP_CHAN_STEER_2_ARCT_BASE_IDX                                                        0
136 #define mmTCP_CHAN_STEER_3_ARCT                                                         0x0b0a
137 #define mmTCP_CHAN_STEER_3_ARCT_BASE_IDX                                                        0
138 #define mmTCP_CHAN_STEER_4_ARCT                                                         0x0b0b
139 #define mmTCP_CHAN_STEER_4_ARCT_BASE_IDX                                                        0
140 #define mmTCP_CHAN_STEER_5_ARCT                                                         0x0b0c
141 #define mmTCP_CHAN_STEER_5_ARCT_BASE_IDX                                                        0
142
143 #define mmGOLDEN_TSC_COUNT_UPPER_Renoir                0x0025
144 #define mmGOLDEN_TSC_COUNT_UPPER_Renoir_BASE_IDX       1
145 #define mmGOLDEN_TSC_COUNT_LOWER_Renoir                0x0026
146 #define mmGOLDEN_TSC_COUNT_LOWER_Renoir_BASE_IDX       1
147
148 enum ta_ras_gfx_subblock {
149         /*CPC*/
150         TA_RAS_BLOCK__GFX_CPC_INDEX_START = 0,
151         TA_RAS_BLOCK__GFX_CPC_SCRATCH = TA_RAS_BLOCK__GFX_CPC_INDEX_START,
152         TA_RAS_BLOCK__GFX_CPC_UCODE,
153         TA_RAS_BLOCK__GFX_DC_STATE_ME1,
154         TA_RAS_BLOCK__GFX_DC_CSINVOC_ME1,
155         TA_RAS_BLOCK__GFX_DC_RESTORE_ME1,
156         TA_RAS_BLOCK__GFX_DC_STATE_ME2,
157         TA_RAS_BLOCK__GFX_DC_CSINVOC_ME2,
158         TA_RAS_BLOCK__GFX_DC_RESTORE_ME2,
159         TA_RAS_BLOCK__GFX_CPC_INDEX_END = TA_RAS_BLOCK__GFX_DC_RESTORE_ME2,
160         /* CPF*/
161         TA_RAS_BLOCK__GFX_CPF_INDEX_START,
162         TA_RAS_BLOCK__GFX_CPF_ROQ_ME2 = TA_RAS_BLOCK__GFX_CPF_INDEX_START,
163         TA_RAS_BLOCK__GFX_CPF_ROQ_ME1,
164         TA_RAS_BLOCK__GFX_CPF_TAG,
165         TA_RAS_BLOCK__GFX_CPF_INDEX_END = TA_RAS_BLOCK__GFX_CPF_TAG,
166         /* CPG*/
167         TA_RAS_BLOCK__GFX_CPG_INDEX_START,
168         TA_RAS_BLOCK__GFX_CPG_DMA_ROQ = TA_RAS_BLOCK__GFX_CPG_INDEX_START,
169         TA_RAS_BLOCK__GFX_CPG_DMA_TAG,
170         TA_RAS_BLOCK__GFX_CPG_TAG,
171         TA_RAS_BLOCK__GFX_CPG_INDEX_END = TA_RAS_BLOCK__GFX_CPG_TAG,
172         /* GDS*/
173         TA_RAS_BLOCK__GFX_GDS_INDEX_START,
174         TA_RAS_BLOCK__GFX_GDS_MEM = TA_RAS_BLOCK__GFX_GDS_INDEX_START,
175         TA_RAS_BLOCK__GFX_GDS_INPUT_QUEUE,
176         TA_RAS_BLOCK__GFX_GDS_OA_PHY_CMD_RAM_MEM,
177         TA_RAS_BLOCK__GFX_GDS_OA_PHY_DATA_RAM_MEM,
178         TA_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM,
179         TA_RAS_BLOCK__GFX_GDS_INDEX_END = TA_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM,
180         /* SPI*/
181         TA_RAS_BLOCK__GFX_SPI_SR_MEM,
182         /* SQ*/
183         TA_RAS_BLOCK__GFX_SQ_INDEX_START,
184         TA_RAS_BLOCK__GFX_SQ_SGPR = TA_RAS_BLOCK__GFX_SQ_INDEX_START,
185         TA_RAS_BLOCK__GFX_SQ_LDS_D,
186         TA_RAS_BLOCK__GFX_SQ_LDS_I,
187         TA_RAS_BLOCK__GFX_SQ_VGPR, /* VGPR = SP*/
188         TA_RAS_BLOCK__GFX_SQ_INDEX_END = TA_RAS_BLOCK__GFX_SQ_VGPR,
189         /* SQC (3 ranges)*/
190         TA_RAS_BLOCK__GFX_SQC_INDEX_START,
191         /* SQC range 0*/
192         TA_RAS_BLOCK__GFX_SQC_INDEX0_START = TA_RAS_BLOCK__GFX_SQC_INDEX_START,
193         TA_RAS_BLOCK__GFX_SQC_INST_UTCL1_LFIFO =
194                 TA_RAS_BLOCK__GFX_SQC_INDEX0_START,
195         TA_RAS_BLOCK__GFX_SQC_DATA_CU0_WRITE_DATA_BUF,
196         TA_RAS_BLOCK__GFX_SQC_DATA_CU0_UTCL1_LFIFO,
197         TA_RAS_BLOCK__GFX_SQC_DATA_CU1_WRITE_DATA_BUF,
198         TA_RAS_BLOCK__GFX_SQC_DATA_CU1_UTCL1_LFIFO,
199         TA_RAS_BLOCK__GFX_SQC_DATA_CU2_WRITE_DATA_BUF,
200         TA_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO,
201         TA_RAS_BLOCK__GFX_SQC_INDEX0_END =
202                 TA_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO,
203         /* SQC range 1*/
204         TA_RAS_BLOCK__GFX_SQC_INDEX1_START,
205         TA_RAS_BLOCK__GFX_SQC_INST_BANKA_TAG_RAM =
206                 TA_RAS_BLOCK__GFX_SQC_INDEX1_START,
207         TA_RAS_BLOCK__GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO,
208         TA_RAS_BLOCK__GFX_SQC_INST_BANKA_MISS_FIFO,
209         TA_RAS_BLOCK__GFX_SQC_INST_BANKA_BANK_RAM,
210         TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_TAG_RAM,
211         TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_HIT_FIFO,
212         TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_MISS_FIFO,
213         TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM,
214         TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM,
215         TA_RAS_BLOCK__GFX_SQC_INDEX1_END =
216                 TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM,
217         /* SQC range 2*/
218         TA_RAS_BLOCK__GFX_SQC_INDEX2_START,
219         TA_RAS_BLOCK__GFX_SQC_INST_BANKB_TAG_RAM =
220                 TA_RAS_BLOCK__GFX_SQC_INDEX2_START,
221         TA_RAS_BLOCK__GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO,
222         TA_RAS_BLOCK__GFX_SQC_INST_BANKB_MISS_FIFO,
223         TA_RAS_BLOCK__GFX_SQC_INST_BANKB_BANK_RAM,
224         TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_TAG_RAM,
225         TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_HIT_FIFO,
226         TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_MISS_FIFO,
227         TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM,
228         TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM,
229         TA_RAS_BLOCK__GFX_SQC_INDEX2_END =
230                 TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM,
231         TA_RAS_BLOCK__GFX_SQC_INDEX_END = TA_RAS_BLOCK__GFX_SQC_INDEX2_END,
232         /* TA*/
233         TA_RAS_BLOCK__GFX_TA_INDEX_START,
234         TA_RAS_BLOCK__GFX_TA_FS_DFIFO = TA_RAS_BLOCK__GFX_TA_INDEX_START,
235         TA_RAS_BLOCK__GFX_TA_FS_AFIFO,
236         TA_RAS_BLOCK__GFX_TA_FL_LFIFO,
237         TA_RAS_BLOCK__GFX_TA_FX_LFIFO,
238         TA_RAS_BLOCK__GFX_TA_FS_CFIFO,
239         TA_RAS_BLOCK__GFX_TA_INDEX_END = TA_RAS_BLOCK__GFX_TA_FS_CFIFO,
240         /* TCA*/
241         TA_RAS_BLOCK__GFX_TCA_INDEX_START,
242         TA_RAS_BLOCK__GFX_TCA_HOLE_FIFO = TA_RAS_BLOCK__GFX_TCA_INDEX_START,
243         TA_RAS_BLOCK__GFX_TCA_REQ_FIFO,
244         TA_RAS_BLOCK__GFX_TCA_INDEX_END = TA_RAS_BLOCK__GFX_TCA_REQ_FIFO,
245         /* TCC (5 sub-ranges)*/
246         TA_RAS_BLOCK__GFX_TCC_INDEX_START,
247         /* TCC range 0*/
248         TA_RAS_BLOCK__GFX_TCC_INDEX0_START = TA_RAS_BLOCK__GFX_TCC_INDEX_START,
249         TA_RAS_BLOCK__GFX_TCC_CACHE_DATA = TA_RAS_BLOCK__GFX_TCC_INDEX0_START,
250         TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_0_1,
251         TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_0,
252         TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_1,
253         TA_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_0,
254         TA_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_1,
255         TA_RAS_BLOCK__GFX_TCC_HIGH_RATE_TAG,
256         TA_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG,
257         TA_RAS_BLOCK__GFX_TCC_INDEX0_END = TA_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG,
258         /* TCC range 1*/
259         TA_RAS_BLOCK__GFX_TCC_INDEX1_START,
260         TA_RAS_BLOCK__GFX_TCC_IN_USE_DEC = TA_RAS_BLOCK__GFX_TCC_INDEX1_START,
261         TA_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER,
262         TA_RAS_BLOCK__GFX_TCC_INDEX1_END =
263                 TA_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER,
264         /* TCC range 2*/
265         TA_RAS_BLOCK__GFX_TCC_INDEX2_START,
266         TA_RAS_BLOCK__GFX_TCC_RETURN_DATA = TA_RAS_BLOCK__GFX_TCC_INDEX2_START,
267         TA_RAS_BLOCK__GFX_TCC_RETURN_CONTROL,
268         TA_RAS_BLOCK__GFX_TCC_UC_ATOMIC_FIFO,
269         TA_RAS_BLOCK__GFX_TCC_WRITE_RETURN,
270         TA_RAS_BLOCK__GFX_TCC_WRITE_CACHE_READ,
271         TA_RAS_BLOCK__GFX_TCC_SRC_FIFO,
272         TA_RAS_BLOCK__GFX_TCC_SRC_FIFO_NEXT_RAM,
273         TA_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO,
274         TA_RAS_BLOCK__GFX_TCC_INDEX2_END =
275                 TA_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO,
276         /* TCC range 3*/
277         TA_RAS_BLOCK__GFX_TCC_INDEX3_START,
278         TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO = TA_RAS_BLOCK__GFX_TCC_INDEX3_START,
279         TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM,
280         TA_RAS_BLOCK__GFX_TCC_INDEX3_END =
281                 TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM,
282         /* TCC range 4*/
283         TA_RAS_BLOCK__GFX_TCC_INDEX4_START,
284         TA_RAS_BLOCK__GFX_TCC_WRRET_TAG_WRITE_RETURN =
285                 TA_RAS_BLOCK__GFX_TCC_INDEX4_START,
286         TA_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER,
287         TA_RAS_BLOCK__GFX_TCC_INDEX4_END =
288                 TA_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER,
289         TA_RAS_BLOCK__GFX_TCC_INDEX_END = TA_RAS_BLOCK__GFX_TCC_INDEX4_END,
290         /* TCI*/
291         TA_RAS_BLOCK__GFX_TCI_WRITE_RAM,
292         /* TCP*/
293         TA_RAS_BLOCK__GFX_TCP_INDEX_START,
294         TA_RAS_BLOCK__GFX_TCP_CACHE_RAM = TA_RAS_BLOCK__GFX_TCP_INDEX_START,
295         TA_RAS_BLOCK__GFX_TCP_LFIFO_RAM,
296         TA_RAS_BLOCK__GFX_TCP_CMD_FIFO,
297         TA_RAS_BLOCK__GFX_TCP_VM_FIFO,
298         TA_RAS_BLOCK__GFX_TCP_DB_RAM,
299         TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO0,
300         TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1,
301         TA_RAS_BLOCK__GFX_TCP_INDEX_END = TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1,
302         /* TD*/
303         TA_RAS_BLOCK__GFX_TD_INDEX_START,
304         TA_RAS_BLOCK__GFX_TD_SS_FIFO_LO = TA_RAS_BLOCK__GFX_TD_INDEX_START,
305         TA_RAS_BLOCK__GFX_TD_SS_FIFO_HI,
306         TA_RAS_BLOCK__GFX_TD_CS_FIFO,
307         TA_RAS_BLOCK__GFX_TD_INDEX_END = TA_RAS_BLOCK__GFX_TD_CS_FIFO,
308         /* EA (3 sub-ranges)*/
309         TA_RAS_BLOCK__GFX_EA_INDEX_START,
310         /* EA range 0*/
311         TA_RAS_BLOCK__GFX_EA_INDEX0_START = TA_RAS_BLOCK__GFX_EA_INDEX_START,
312         TA_RAS_BLOCK__GFX_EA_DRAMRD_CMDMEM = TA_RAS_BLOCK__GFX_EA_INDEX0_START,
313         TA_RAS_BLOCK__GFX_EA_DRAMWR_CMDMEM,
314         TA_RAS_BLOCK__GFX_EA_DRAMWR_DATAMEM,
315         TA_RAS_BLOCK__GFX_EA_RRET_TAGMEM,
316         TA_RAS_BLOCK__GFX_EA_WRET_TAGMEM,
317         TA_RAS_BLOCK__GFX_EA_GMIRD_CMDMEM,
318         TA_RAS_BLOCK__GFX_EA_GMIWR_CMDMEM,
319         TA_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM,
320         TA_RAS_BLOCK__GFX_EA_INDEX0_END = TA_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM,
321         /* EA range 1*/
322         TA_RAS_BLOCK__GFX_EA_INDEX1_START,
323         TA_RAS_BLOCK__GFX_EA_DRAMRD_PAGEMEM = TA_RAS_BLOCK__GFX_EA_INDEX1_START,
324         TA_RAS_BLOCK__GFX_EA_DRAMWR_PAGEMEM,
325         TA_RAS_BLOCK__GFX_EA_IORD_CMDMEM,
326         TA_RAS_BLOCK__GFX_EA_IOWR_CMDMEM,
327         TA_RAS_BLOCK__GFX_EA_IOWR_DATAMEM,
328         TA_RAS_BLOCK__GFX_EA_GMIRD_PAGEMEM,
329         TA_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM,
330         TA_RAS_BLOCK__GFX_EA_INDEX1_END = TA_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM,
331         /* EA range 2*/
332         TA_RAS_BLOCK__GFX_EA_INDEX2_START,
333         TA_RAS_BLOCK__GFX_EA_MAM_D0MEM = TA_RAS_BLOCK__GFX_EA_INDEX2_START,
334         TA_RAS_BLOCK__GFX_EA_MAM_D1MEM,
335         TA_RAS_BLOCK__GFX_EA_MAM_D2MEM,
336         TA_RAS_BLOCK__GFX_EA_MAM_D3MEM,
337         TA_RAS_BLOCK__GFX_EA_INDEX2_END = TA_RAS_BLOCK__GFX_EA_MAM_D3MEM,
338         TA_RAS_BLOCK__GFX_EA_INDEX_END = TA_RAS_BLOCK__GFX_EA_INDEX2_END,
339         /* UTC VM L2 bank*/
340         TA_RAS_BLOCK__UTC_VML2_BANK_CACHE,
341         /* UTC VM walker*/
342         TA_RAS_BLOCK__UTC_VML2_WALKER,
343         /* UTC ATC L2 2MB cache*/
344         TA_RAS_BLOCK__UTC_ATCL2_CACHE_2M_BANK,
345         /* UTC ATC L2 4KB cache*/
346         TA_RAS_BLOCK__UTC_ATCL2_CACHE_4K_BANK,
347         TA_RAS_BLOCK__GFX_MAX
348 };
349
350 struct ras_gfx_subblock {
351         unsigned char *name;
352         int ta_subblock;
353         int hw_supported_error_type;
354         int sw_supported_error_type;
355 };
356
357 #define AMDGPU_RAS_SUB_BLOCK(subblock, a, b, c, d, e, f, g, h)                             \
358         [AMDGPU_RAS_BLOCK__##subblock] = {                                     \
359                 #subblock,                                                     \
360                 TA_RAS_BLOCK__##subblock,                                      \
361                 ((a) | ((b) << 1) | ((c) << 2) | ((d) << 3)),                  \
362                 (((e) << 1) | ((f) << 3) | (g) | ((h) << 2)),                  \
363         }
364
365 static const struct ras_gfx_subblock ras_gfx_subblocks[] = {
366         AMDGPU_RAS_SUB_BLOCK(GFX_CPC_SCRATCH, 0, 1, 1, 1, 1, 0, 0, 1),
367         AMDGPU_RAS_SUB_BLOCK(GFX_CPC_UCODE, 0, 1, 1, 1, 1, 0, 0, 1),
368         AMDGPU_RAS_SUB_BLOCK(GFX_DC_STATE_ME1, 1, 0, 0, 1, 0, 0, 1, 0),
369         AMDGPU_RAS_SUB_BLOCK(GFX_DC_CSINVOC_ME1, 1, 0, 0, 1, 0, 0, 0, 0),
370         AMDGPU_RAS_SUB_BLOCK(GFX_DC_RESTORE_ME1, 1, 0, 0, 1, 0, 0, 0, 0),
371         AMDGPU_RAS_SUB_BLOCK(GFX_DC_STATE_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
372         AMDGPU_RAS_SUB_BLOCK(GFX_DC_CSINVOC_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
373         AMDGPU_RAS_SUB_BLOCK(GFX_DC_RESTORE_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
374         AMDGPU_RAS_SUB_BLOCK(GFX_CPF_ROQ_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
375         AMDGPU_RAS_SUB_BLOCK(GFX_CPF_ROQ_ME1, 1, 0, 0, 1, 0, 0, 1, 0),
376         AMDGPU_RAS_SUB_BLOCK(GFX_CPF_TAG, 0, 1, 1, 1, 1, 0, 0, 1),
377         AMDGPU_RAS_SUB_BLOCK(GFX_CPG_DMA_ROQ, 1, 0, 0, 1, 0, 0, 1, 0),
378         AMDGPU_RAS_SUB_BLOCK(GFX_CPG_DMA_TAG, 0, 1, 1, 1, 0, 1, 0, 1),
379         AMDGPU_RAS_SUB_BLOCK(GFX_CPG_TAG, 0, 1, 1, 1, 1, 1, 0, 1),
380         AMDGPU_RAS_SUB_BLOCK(GFX_GDS_MEM, 0, 1, 1, 1, 0, 0, 0, 0),
381         AMDGPU_RAS_SUB_BLOCK(GFX_GDS_INPUT_QUEUE, 1, 0, 0, 1, 0, 0, 0, 0),
382         AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PHY_CMD_RAM_MEM, 0, 1, 1, 1, 0, 0, 0,
383                              0),
384         AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PHY_DATA_RAM_MEM, 1, 0, 0, 1, 0, 0, 0,
385                              0),
386         AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PIPE_MEM, 0, 1, 1, 1, 0, 0, 0, 0),
387         AMDGPU_RAS_SUB_BLOCK(GFX_SPI_SR_MEM, 1, 0, 0, 1, 0, 0, 0, 0),
388         AMDGPU_RAS_SUB_BLOCK(GFX_SQ_SGPR, 0, 1, 1, 1, 0, 0, 0, 0),
389         AMDGPU_RAS_SUB_BLOCK(GFX_SQ_LDS_D, 0, 1, 1, 1, 1, 0, 0, 1),
390         AMDGPU_RAS_SUB_BLOCK(GFX_SQ_LDS_I, 0, 1, 1, 1, 0, 0, 0, 0),
391         AMDGPU_RAS_SUB_BLOCK(GFX_SQ_VGPR, 0, 1, 1, 1, 0, 0, 0, 0),
392         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0, 1),
393         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU0_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0,
394                              0, 0),
395         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU0_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0,
396                              0),
397         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU1_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0,
398                              0, 0),
399         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU1_UTCL1_LFIFO, 0, 1, 1, 1, 1, 0, 0,
400                              0),
401         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU2_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0,
402                              0, 0),
403         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU2_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0,
404                              0),
405         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_TAG_RAM, 0, 1, 1, 1, 1, 0, 0,
406                              1),
407         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO, 1, 0, 0, 1, 0,
408                              0, 0, 0),
409         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
410                              0),
411         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
412                              0),
413         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_TAG_RAM, 0, 1, 1, 1, 0, 0, 0,
414                              0),
415         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_HIT_FIFO, 1, 0, 0, 1, 0, 0, 0,
416                              0),
417         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
418                              0),
419         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM, 1, 0, 0, 1, 0, 0,
420                              0, 0),
421         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
422                              0),
423         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_TAG_RAM, 0, 1, 1, 1, 1, 0, 0,
424                              0),
425         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO, 1, 0, 0, 1, 0,
426                              0, 0, 0),
427         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
428                              0),
429         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
430                              0),
431         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_TAG_RAM, 0, 1, 1, 1, 0, 0, 0,
432                              0),
433         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_HIT_FIFO, 1, 0, 0, 1, 0, 0, 0,
434                              0),
435         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
436                              0),
437         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM, 1, 0, 0, 1, 0, 0,
438                              0, 0),
439         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
440                              0),
441         AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_DFIFO, 0, 1, 1, 1, 1, 0, 0, 1),
442         AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_AFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
443         AMDGPU_RAS_SUB_BLOCK(GFX_TA_FL_LFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
444         AMDGPU_RAS_SUB_BLOCK(GFX_TA_FX_LFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
445         AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_CFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
446         AMDGPU_RAS_SUB_BLOCK(GFX_TCA_HOLE_FIFO, 1, 0, 0, 1, 0, 1, 1, 0),
447         AMDGPU_RAS_SUB_BLOCK(GFX_TCA_REQ_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
448         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA, 0, 1, 1, 1, 1, 0, 0, 1),
449         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_0_1, 0, 1, 1, 1, 1, 0, 0,
450                              1),
451         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_1_0, 0, 1, 1, 1, 1, 0, 0,
452                              1),
453         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_1_1, 0, 1, 1, 1, 1, 0, 0,
454                              1),
455         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DIRTY_BANK_0, 0, 1, 1, 1, 0, 0, 0,
456                              0),
457         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DIRTY_BANK_1, 0, 1, 1, 1, 0, 0, 0,
458                              0),
459         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_HIGH_RATE_TAG, 0, 1, 1, 1, 0, 0, 0, 0),
460         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LOW_RATE_TAG, 0, 1, 1, 1, 0, 0, 0, 0),
461         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_IN_USE_DEC, 1, 0, 0, 1, 0, 0, 0, 0),
462         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_IN_USE_TRANSFER, 1, 0, 0, 1, 0, 0, 0, 0),
463         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_RETURN_DATA, 1, 0, 0, 1, 0, 0, 0, 0),
464         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_RETURN_CONTROL, 1, 0, 0, 1, 0, 0, 0, 0),
465         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_UC_ATOMIC_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
466         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRITE_RETURN, 1, 0, 0, 1, 0, 1, 1, 0),
467         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRITE_CACHE_READ, 1, 0, 0, 1, 0, 0, 0, 0),
468         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_SRC_FIFO, 0, 1, 1, 1, 0, 0, 0, 0),
469         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_SRC_FIFO_NEXT_RAM, 1, 0, 0, 1, 0, 0, 1, 0),
470         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_TAG_PROBE_FIFO, 1, 0, 0, 1, 0, 0, 0,
471                              0),
472         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LATENCY_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
473         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LATENCY_FIFO_NEXT_RAM, 1, 0, 0, 1, 0, 0, 0,
474                              0),
475         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRRET_TAG_WRITE_RETURN, 1, 0, 0, 1, 0, 0,
476                              0, 0),
477         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_ATOMIC_RETURN_BUFFER, 1, 0, 0, 1, 0, 0, 0,
478                              0),
479         AMDGPU_RAS_SUB_BLOCK(GFX_TCI_WRITE_RAM, 1, 0, 0, 1, 0, 0, 0, 0),
480         AMDGPU_RAS_SUB_BLOCK(GFX_TCP_CACHE_RAM, 0, 1, 1, 1, 1, 0, 0, 1),
481         AMDGPU_RAS_SUB_BLOCK(GFX_TCP_LFIFO_RAM, 0, 1, 1, 1, 0, 0, 0, 0),
482         AMDGPU_RAS_SUB_BLOCK(GFX_TCP_CMD_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
483         AMDGPU_RAS_SUB_BLOCK(GFX_TCP_VM_FIFO, 0, 1, 1, 1, 0, 0, 0, 0),
484         AMDGPU_RAS_SUB_BLOCK(GFX_TCP_DB_RAM, 1, 0, 0, 1, 0, 0, 0, 0),
485         AMDGPU_RAS_SUB_BLOCK(GFX_TCP_UTCL1_LFIFO0, 0, 1, 1, 1, 0, 0, 0, 0),
486         AMDGPU_RAS_SUB_BLOCK(GFX_TCP_UTCL1_LFIFO1, 0, 1, 1, 1, 0, 0, 0, 0),
487         AMDGPU_RAS_SUB_BLOCK(GFX_TD_SS_FIFO_LO, 0, 1, 1, 1, 1, 0, 0, 1),
488         AMDGPU_RAS_SUB_BLOCK(GFX_TD_SS_FIFO_HI, 0, 1, 1, 1, 0, 0, 0, 0),
489         AMDGPU_RAS_SUB_BLOCK(GFX_TD_CS_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
490         AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMRD_CMDMEM, 0, 1, 1, 1, 1, 0, 0, 1),
491         AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0),
492         AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_DATAMEM, 0, 1, 1, 1, 0, 0, 0, 0),
493         AMDGPU_RAS_SUB_BLOCK(GFX_EA_RRET_TAGMEM, 0, 1, 1, 1, 0, 0, 0, 0),
494         AMDGPU_RAS_SUB_BLOCK(GFX_EA_WRET_TAGMEM, 0, 1, 1, 1, 0, 0, 0, 0),
495         AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIRD_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0),
496         AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0),
497         AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_DATAMEM, 0, 1, 1, 1, 0, 0, 0, 0),
498         AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMRD_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
499         AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
500         AMDGPU_RAS_SUB_BLOCK(GFX_EA_IORD_CMDMEM, 1, 0, 0, 1, 0, 0, 0, 0),
501         AMDGPU_RAS_SUB_BLOCK(GFX_EA_IOWR_CMDMEM, 1, 0, 0, 1, 0, 0, 0, 0),
502         AMDGPU_RAS_SUB_BLOCK(GFX_EA_IOWR_DATAMEM, 1, 0, 0, 1, 0, 0, 0, 0),
503         AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIRD_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
504         AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
505         AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D0MEM, 1, 0, 0, 1, 0, 0, 0, 0),
506         AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D1MEM, 1, 0, 0, 1, 0, 0, 0, 0),
507         AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D2MEM, 1, 0, 0, 1, 0, 0, 0, 0),
508         AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D3MEM, 1, 0, 0, 1, 0, 0, 0, 0),
509         AMDGPU_RAS_SUB_BLOCK(UTC_VML2_BANK_CACHE, 0, 1, 1, 1, 0, 0, 0, 0),
510         AMDGPU_RAS_SUB_BLOCK(UTC_VML2_WALKER, 0, 1, 1, 1, 0, 0, 0, 0),
511         AMDGPU_RAS_SUB_BLOCK(UTC_ATCL2_CACHE_2M_BANK, 1, 0, 0, 1, 0, 0, 0, 0),
512         AMDGPU_RAS_SUB_BLOCK(UTC_ATCL2_CACHE_4K_BANK, 0, 1, 1, 1, 0, 0, 0, 0),
513 };
514
515 static const struct soc15_reg_golden golden_settings_gc_9_0[] =
516 {
517         SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000400),
518         SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0x80000000, 0x80000000),
519         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
520         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
521         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
522         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
523         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000),
524         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800),
525         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800),
526         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x00ffff87),
527         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x00ffff8f),
528         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000),
529         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
530         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68),
531         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197),
532         SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
533         SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff),
534         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800),
535         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800),
536         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000)
537 };
538
539 static const struct soc15_reg_golden golden_settings_gc_9_0_vg10[] =
540 {
541         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0x0000f000, 0x00012107),
542         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
543         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080),
544         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080),
545         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080),
546         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042),
547         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x2a114042),
548         SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080),
549         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0x00008000, 0x00048000),
550         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080),
551         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080),
552         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080),
553         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080),
554         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080),
555         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000),
556         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x01000107),
557         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x00001800, 0x00000800),
558         SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080)
559 };
560
561 static const struct soc15_reg_golden golden_settings_gc_9_0_vg20[] =
562 {
563         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x0f000080, 0x04000080),
564         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000),
565         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
566         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xf3e777ff, 0x22014042),
567         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xf3e777ff, 0x22014042),
568         SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0x00003e00, 0x00000400),
569         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xff840000, 0x04040000),
570         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00030000),
571         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff010f, 0x01000107),
572         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0x000b0000, 0x000b0000),
573         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01000000, 0x01000000)
574 };
575
576 static const struct soc15_reg_golden golden_settings_gc_9_1[] =
577 {
578         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
579         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080),
580         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080),
581         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080),
582         SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420),
583         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
584         SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080),
585         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
586         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
587         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
588         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080),
589         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080),
590         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080),
591         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080),
592         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080),
593         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
594         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
595         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003120),
596         SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
597         SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000000ff),
598         SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080),
599         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800),
600         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800),
601         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000)
602 };
603
604 static const struct soc15_reg_golden golden_settings_gc_9_1_rv1[] =
605 {
606         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
607         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24000042),
608         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24000042),
609         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04048000),
610         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_MODE_CNTL_1, 0x06000000, 0x06000000),
611         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000),
612         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x00000800)
613 };
614
615 static const struct soc15_reg_golden golden_settings_gc_9_1_rv2[] =
616 {
617         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0xff7fffff, 0x04000000),
618         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
619         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0xff7fffff, 0x0a000000),
620         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x7f0fffff, 0x08000080),
621         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0xff8fffff, 0x08000080),
622         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x7f8fffff, 0x08000080),
623         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x26013041),
624         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x26013041),
625         SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x3f8fffff, 0x08000080),
626         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
627         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0xff0fffff, 0x08000080),
628         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0xff0fffff, 0x08000080),
629         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0xff0fffff, 0x08000080),
630         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0xff0fffff, 0x08000080),
631         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0xff0fffff, 0x08000080),
632         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
633         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010),
634         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000),
635         SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x3f8fffff, 0x08000080),
636 };
637
638 static const struct soc15_reg_golden golden_settings_gc_9_1_rn[] =
639 {
640         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
641         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0xff7fffff, 0x0a000000),
642         SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000400),
643         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xf3e777ff, 0x24000042),
644         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xf3e777ff, 0x24000042),
645         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
646         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
647         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
648         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
649         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
650         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003120),
651         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCEA_PROBE_MAP, 0xffffffff, 0x0000cccc),
652 };
653
654 static const struct soc15_reg_golden golden_settings_gc_9_x_common[] =
655 {
656         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_SD_CNTL, 0xffffffff, 0x000001ff),
657         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_INDEX, 0xffffffff, 0x00000000),
658         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_DATA, 0xffffffff, 0x2544c382)
659 };
660
661 static const struct soc15_reg_golden golden_settings_gc_9_2_1[] =
662 {
663         SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420),
664         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
665         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
666         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
667         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
668         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000),
669         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800),
670         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800),
671         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x0000ff87),
672         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x0000ff8f),
673         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000),
674         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
675         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68),
676         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197),
677         SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
678         SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff)
679 };
680
681 static const struct soc15_reg_golden golden_settings_gc_9_2_1_vg12[] =
682 {
683         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x00000080, 0x04000080),
684         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
685         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000),
686         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24104041),
687         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24104041),
688         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
689         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff03ff, 0x01000107),
690         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
691         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x76325410),
692         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000),
693         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800),
694         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800),
695         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000)
696 };
697
698 static const struct soc15_reg_golden golden_settings_gc_9_4_1_arct[] =
699 {
700         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042),
701         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x10b0000),
702         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_0_ARCT, 0x3fffffff, 0x346f0a4e),
703         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_1_ARCT, 0x3fffffff, 0x1c642ca),
704         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_2_ARCT, 0x3fffffff, 0x26f45098),
705         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_3_ARCT, 0x3fffffff, 0x2ebd9fe3),
706         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_4_ARCT, 0x3fffffff, 0xb90f5b1),
707         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_5_ARCT, 0x3ff, 0x135),
708         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_CONFIG, 0xffffffff, 0x011A0000),
709         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_FIFO_SIZES, 0xffffffff, 0x00000f00),
710         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_UTCL1_CNTL1, 0x30000000, 0x30000000)
711 };
712
713 static const struct soc15_reg_rlcg rlcg_access_gc_9_0[] = {
714         {SOC15_REG_ENTRY(GC, 0, mmGRBM_GFX_INDEX)},
715         {SOC15_REG_ENTRY(GC, 0, mmSQ_IND_INDEX)},
716 };
717
718 static const u32 GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[] =
719 {
720         mmRLC_SRM_INDEX_CNTL_ADDR_0 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
721         mmRLC_SRM_INDEX_CNTL_ADDR_1 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
722         mmRLC_SRM_INDEX_CNTL_ADDR_2 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
723         mmRLC_SRM_INDEX_CNTL_ADDR_3 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
724         mmRLC_SRM_INDEX_CNTL_ADDR_4 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
725         mmRLC_SRM_INDEX_CNTL_ADDR_5 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
726         mmRLC_SRM_INDEX_CNTL_ADDR_6 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
727         mmRLC_SRM_INDEX_CNTL_ADDR_7 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
728 };
729
730 static const u32 GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[] =
731 {
732         mmRLC_SRM_INDEX_CNTL_DATA_0 - mmRLC_SRM_INDEX_CNTL_DATA_0,
733         mmRLC_SRM_INDEX_CNTL_DATA_1 - mmRLC_SRM_INDEX_CNTL_DATA_0,
734         mmRLC_SRM_INDEX_CNTL_DATA_2 - mmRLC_SRM_INDEX_CNTL_DATA_0,
735         mmRLC_SRM_INDEX_CNTL_DATA_3 - mmRLC_SRM_INDEX_CNTL_DATA_0,
736         mmRLC_SRM_INDEX_CNTL_DATA_4 - mmRLC_SRM_INDEX_CNTL_DATA_0,
737         mmRLC_SRM_INDEX_CNTL_DATA_5 - mmRLC_SRM_INDEX_CNTL_DATA_0,
738         mmRLC_SRM_INDEX_CNTL_DATA_6 - mmRLC_SRM_INDEX_CNTL_DATA_0,
739         mmRLC_SRM_INDEX_CNTL_DATA_7 - mmRLC_SRM_INDEX_CNTL_DATA_0,
740 };
741
742 #define VEGA10_GB_ADDR_CONFIG_GOLDEN 0x2a114042
743 #define VEGA12_GB_ADDR_CONFIG_GOLDEN 0x24104041
744 #define RAVEN_GB_ADDR_CONFIG_GOLDEN 0x24000042
745 #define RAVEN2_GB_ADDR_CONFIG_GOLDEN 0x26013041
746
747 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev);
748 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev);
749 static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev);
750 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev);
751 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
752                                 struct amdgpu_cu_info *cu_info);
753 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev);
754 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring);
755 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring);
756 static void gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev,
757                                           void *ras_error_status);
758 static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev,
759                                      void *inject_if);
760 static void gfx_v9_0_reset_ras_error_count(struct amdgpu_device *adev);
761
762 static void gfx_v9_0_kiq_set_resources(struct amdgpu_ring *kiq_ring,
763                                 uint64_t queue_mask)
764 {
765         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
766         amdgpu_ring_write(kiq_ring,
767                 PACKET3_SET_RESOURCES_VMID_MASK(0) |
768                 /* vmid_mask:0* queue_type:0 (KIQ) */
769                 PACKET3_SET_RESOURCES_QUEUE_TYPE(0));
770         amdgpu_ring_write(kiq_ring,
771                         lower_32_bits(queue_mask));     /* queue mask lo */
772         amdgpu_ring_write(kiq_ring,
773                         upper_32_bits(queue_mask));     /* queue mask hi */
774         amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */
775         amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */
776         amdgpu_ring_write(kiq_ring, 0); /* oac mask */
777         amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */
778 }
779
780 static void gfx_v9_0_kiq_map_queues(struct amdgpu_ring *kiq_ring,
781                                  struct amdgpu_ring *ring)
782 {
783         struct amdgpu_device *adev = kiq_ring->adev;
784         uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
785         uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
786         uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
787
788         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
789         /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
790         amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
791                          PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */
792                          PACKET3_MAP_QUEUES_VMID(0) | /* VMID */
793                          PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
794                          PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
795                          PACKET3_MAP_QUEUES_ME((ring->me == 1 ? 0 : 1)) |
796                          /*queue_type: normal compute queue */
797                          PACKET3_MAP_QUEUES_QUEUE_TYPE(0) |
798                          /* alloc format: all_on_one_pipe */
799                          PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) |
800                          PACKET3_MAP_QUEUES_ENGINE_SEL(eng_sel) |
801                          /* num_queues: must be 1 */
802                          PACKET3_MAP_QUEUES_NUM_QUEUES(1));
803         amdgpu_ring_write(kiq_ring,
804                         PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index));
805         amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
806         amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
807         amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
808         amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
809 }
810
811 static void gfx_v9_0_kiq_unmap_queues(struct amdgpu_ring *kiq_ring,
812                                    struct amdgpu_ring *ring,
813                                    enum amdgpu_unmap_queues_action action,
814                                    u64 gpu_addr, u64 seq)
815 {
816         uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
817
818         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
819         amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
820                           PACKET3_UNMAP_QUEUES_ACTION(action) |
821                           PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) |
822                           PACKET3_UNMAP_QUEUES_ENGINE_SEL(eng_sel) |
823                           PACKET3_UNMAP_QUEUES_NUM_QUEUES(1));
824         amdgpu_ring_write(kiq_ring,
825                         PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
826
827         if (action == PREEMPT_QUEUES_NO_UNMAP) {
828                 amdgpu_ring_write(kiq_ring, lower_32_bits(gpu_addr));
829                 amdgpu_ring_write(kiq_ring, upper_32_bits(gpu_addr));
830                 amdgpu_ring_write(kiq_ring, seq);
831         } else {
832                 amdgpu_ring_write(kiq_ring, 0);
833                 amdgpu_ring_write(kiq_ring, 0);
834                 amdgpu_ring_write(kiq_ring, 0);
835         }
836 }
837
838 static void gfx_v9_0_kiq_query_status(struct amdgpu_ring *kiq_ring,
839                                    struct amdgpu_ring *ring,
840                                    u64 addr,
841                                    u64 seq)
842 {
843         uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
844
845         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_QUERY_STATUS, 5));
846         amdgpu_ring_write(kiq_ring,
847                           PACKET3_QUERY_STATUS_CONTEXT_ID(0) |
848                           PACKET3_QUERY_STATUS_INTERRUPT_SEL(0) |
849                           PACKET3_QUERY_STATUS_COMMAND(2));
850         /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
851         amdgpu_ring_write(kiq_ring,
852                         PACKET3_QUERY_STATUS_DOORBELL_OFFSET(ring->doorbell_index) |
853                         PACKET3_QUERY_STATUS_ENG_SEL(eng_sel));
854         amdgpu_ring_write(kiq_ring, lower_32_bits(addr));
855         amdgpu_ring_write(kiq_ring, upper_32_bits(addr));
856         amdgpu_ring_write(kiq_ring, lower_32_bits(seq));
857         amdgpu_ring_write(kiq_ring, upper_32_bits(seq));
858 }
859
860 static void gfx_v9_0_kiq_invalidate_tlbs(struct amdgpu_ring *kiq_ring,
861                                 uint16_t pasid, uint32_t flush_type,
862                                 bool all_hub)
863 {
864         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0));
865         amdgpu_ring_write(kiq_ring,
866                         PACKET3_INVALIDATE_TLBS_DST_SEL(1) |
867                         PACKET3_INVALIDATE_TLBS_ALL_HUB(all_hub) |
868                         PACKET3_INVALIDATE_TLBS_PASID(pasid) |
869                         PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type));
870 }
871
872 static const struct kiq_pm4_funcs gfx_v9_0_kiq_pm4_funcs = {
873         .kiq_set_resources = gfx_v9_0_kiq_set_resources,
874         .kiq_map_queues = gfx_v9_0_kiq_map_queues,
875         .kiq_unmap_queues = gfx_v9_0_kiq_unmap_queues,
876         .kiq_query_status = gfx_v9_0_kiq_query_status,
877         .kiq_invalidate_tlbs = gfx_v9_0_kiq_invalidate_tlbs,
878         .set_resources_size = 8,
879         .map_queues_size = 7,
880         .unmap_queues_size = 6,
881         .query_status_size = 7,
882         .invalidate_tlbs_size = 2,
883 };
884
885 static void gfx_v9_0_set_kiq_pm4_funcs(struct amdgpu_device *adev)
886 {
887         adev->gfx.kiq.pmf = &gfx_v9_0_kiq_pm4_funcs;
888 }
889
890 static void gfx_v9_0_init_golden_registers(struct amdgpu_device *adev)
891 {
892         switch (adev->ip_versions[GC_HWIP][0]) {
893         case IP_VERSION(9, 0, 1):
894                 soc15_program_register_sequence(adev,
895                                                 golden_settings_gc_9_0,
896                                                 ARRAY_SIZE(golden_settings_gc_9_0));
897                 soc15_program_register_sequence(adev,
898                                                 golden_settings_gc_9_0_vg10,
899                                                 ARRAY_SIZE(golden_settings_gc_9_0_vg10));
900                 break;
901         case IP_VERSION(9, 2, 1):
902                 soc15_program_register_sequence(adev,
903                                                 golden_settings_gc_9_2_1,
904                                                 ARRAY_SIZE(golden_settings_gc_9_2_1));
905                 soc15_program_register_sequence(adev,
906                                                 golden_settings_gc_9_2_1_vg12,
907                                                 ARRAY_SIZE(golden_settings_gc_9_2_1_vg12));
908                 break;
909         case IP_VERSION(9, 4, 0):
910                 soc15_program_register_sequence(adev,
911                                                 golden_settings_gc_9_0,
912                                                 ARRAY_SIZE(golden_settings_gc_9_0));
913                 soc15_program_register_sequence(adev,
914                                                 golden_settings_gc_9_0_vg20,
915                                                 ARRAY_SIZE(golden_settings_gc_9_0_vg20));
916                 break;
917         case IP_VERSION(9, 4, 1):
918                 soc15_program_register_sequence(adev,
919                                                 golden_settings_gc_9_4_1_arct,
920                                                 ARRAY_SIZE(golden_settings_gc_9_4_1_arct));
921                 break;
922         case IP_VERSION(9, 2, 2):
923         case IP_VERSION(9, 1, 0):
924                 soc15_program_register_sequence(adev, golden_settings_gc_9_1,
925                                                 ARRAY_SIZE(golden_settings_gc_9_1));
926                 if (adev->apu_flags & AMD_APU_IS_RAVEN2)
927                         soc15_program_register_sequence(adev,
928                                                         golden_settings_gc_9_1_rv2,
929                                                         ARRAY_SIZE(golden_settings_gc_9_1_rv2));
930                 else
931                         soc15_program_register_sequence(adev,
932                                                         golden_settings_gc_9_1_rv1,
933                                                         ARRAY_SIZE(golden_settings_gc_9_1_rv1));
934                 break;
935          case IP_VERSION(9, 3, 0):
936                 soc15_program_register_sequence(adev,
937                                                 golden_settings_gc_9_1_rn,
938                                                 ARRAY_SIZE(golden_settings_gc_9_1_rn));
939                 return; /* for renoir, don't need common goldensetting */
940         case IP_VERSION(9, 4, 2):
941                 gfx_v9_4_2_init_golden_registers(adev,
942                                                  adev->smuio.funcs->get_die_id(adev));
943                 break;
944         default:
945                 break;
946         }
947
948         if ((adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 4, 1)) &&
949             (adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 4, 2)))
950                 soc15_program_register_sequence(adev, golden_settings_gc_9_x_common,
951                                                 (const u32)ARRAY_SIZE(golden_settings_gc_9_x_common));
952 }
953
954 static void gfx_v9_0_scratch_init(struct amdgpu_device *adev)
955 {
956         adev->gfx.scratch.num_reg = 8;
957         adev->gfx.scratch.reg_base = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0);
958         adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1;
959 }
960
961 static void gfx_v9_0_write_data_to_reg(struct amdgpu_ring *ring, int eng_sel,
962                                        bool wc, uint32_t reg, uint32_t val)
963 {
964         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
965         amdgpu_ring_write(ring, WRITE_DATA_ENGINE_SEL(eng_sel) |
966                                 WRITE_DATA_DST_SEL(0) |
967                                 (wc ? WR_CONFIRM : 0));
968         amdgpu_ring_write(ring, reg);
969         amdgpu_ring_write(ring, 0);
970         amdgpu_ring_write(ring, val);
971 }
972
973 static void gfx_v9_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel,
974                                   int mem_space, int opt, uint32_t addr0,
975                                   uint32_t addr1, uint32_t ref, uint32_t mask,
976                                   uint32_t inv)
977 {
978         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
979         amdgpu_ring_write(ring,
980                                  /* memory (1) or register (0) */
981                                  (WAIT_REG_MEM_MEM_SPACE(mem_space) |
982                                  WAIT_REG_MEM_OPERATION(opt) | /* wait */
983                                  WAIT_REG_MEM_FUNCTION(3) |  /* equal */
984                                  WAIT_REG_MEM_ENGINE(eng_sel)));
985
986         if (mem_space)
987                 BUG_ON(addr0 & 0x3); /* Dword align */
988         amdgpu_ring_write(ring, addr0);
989         amdgpu_ring_write(ring, addr1);
990         amdgpu_ring_write(ring, ref);
991         amdgpu_ring_write(ring, mask);
992         amdgpu_ring_write(ring, inv); /* poll interval */
993 }
994
995 static int gfx_v9_0_ring_test_ring(struct amdgpu_ring *ring)
996 {
997         struct amdgpu_device *adev = ring->adev;
998         uint32_t scratch;
999         uint32_t tmp = 0;
1000         unsigned i;
1001         int r;
1002
1003         r = amdgpu_gfx_scratch_get(adev, &scratch);
1004         if (r)
1005                 return r;
1006
1007         WREG32(scratch, 0xCAFEDEAD);
1008         r = amdgpu_ring_alloc(ring, 3);
1009         if (r)
1010                 goto error_free_scratch;
1011
1012         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
1013         amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
1014         amdgpu_ring_write(ring, 0xDEADBEEF);
1015         amdgpu_ring_commit(ring);
1016
1017         for (i = 0; i < adev->usec_timeout; i++) {
1018                 tmp = RREG32(scratch);
1019                 if (tmp == 0xDEADBEEF)
1020                         break;
1021                 udelay(1);
1022         }
1023
1024         if (i >= adev->usec_timeout)
1025                 r = -ETIMEDOUT;
1026
1027 error_free_scratch:
1028         amdgpu_gfx_scratch_free(adev, scratch);
1029         return r;
1030 }
1031
1032 static int gfx_v9_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
1033 {
1034         struct amdgpu_device *adev = ring->adev;
1035         struct amdgpu_ib ib;
1036         struct dma_fence *f = NULL;
1037
1038         unsigned index;
1039         uint64_t gpu_addr;
1040         uint32_t tmp;
1041         long r;
1042
1043         r = amdgpu_device_wb_get(adev, &index);
1044         if (r)
1045                 return r;
1046
1047         gpu_addr = adev->wb.gpu_addr + (index * 4);
1048         adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
1049         memset(&ib, 0, sizeof(ib));
1050         r = amdgpu_ib_get(adev, NULL, 16,
1051                                         AMDGPU_IB_POOL_DIRECT, &ib);
1052         if (r)
1053                 goto err1;
1054
1055         ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3);
1056         ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
1057         ib.ptr[2] = lower_32_bits(gpu_addr);
1058         ib.ptr[3] = upper_32_bits(gpu_addr);
1059         ib.ptr[4] = 0xDEADBEEF;
1060         ib.length_dw = 5;
1061
1062         r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
1063         if (r)
1064                 goto err2;
1065
1066         r = dma_fence_wait_timeout(f, false, timeout);
1067         if (r == 0) {
1068                 r = -ETIMEDOUT;
1069                 goto err2;
1070         } else if (r < 0) {
1071                 goto err2;
1072         }
1073
1074         tmp = adev->wb.wb[index];
1075         if (tmp == 0xDEADBEEF)
1076                 r = 0;
1077         else
1078                 r = -EINVAL;
1079
1080 err2:
1081         amdgpu_ib_free(adev, &ib, NULL);
1082         dma_fence_put(f);
1083 err1:
1084         amdgpu_device_wb_free(adev, index);
1085         return r;
1086 }
1087
1088
1089 static void gfx_v9_0_free_microcode(struct amdgpu_device *adev)
1090 {
1091         release_firmware(adev->gfx.pfp_fw);
1092         adev->gfx.pfp_fw = NULL;
1093         release_firmware(adev->gfx.me_fw);
1094         adev->gfx.me_fw = NULL;
1095         release_firmware(adev->gfx.ce_fw);
1096         adev->gfx.ce_fw = NULL;
1097         release_firmware(adev->gfx.rlc_fw);
1098         adev->gfx.rlc_fw = NULL;
1099         release_firmware(adev->gfx.mec_fw);
1100         adev->gfx.mec_fw = NULL;
1101         release_firmware(adev->gfx.mec2_fw);
1102         adev->gfx.mec2_fw = NULL;
1103
1104         kfree(adev->gfx.rlc.register_list_format);
1105 }
1106
1107 static void gfx_v9_0_init_rlc_ext_microcode(struct amdgpu_device *adev)
1108 {
1109         const struct rlc_firmware_header_v2_1 *rlc_hdr;
1110
1111         rlc_hdr = (const struct rlc_firmware_header_v2_1 *)adev->gfx.rlc_fw->data;
1112         adev->gfx.rlc_srlc_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_ucode_ver);
1113         adev->gfx.rlc_srlc_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_feature_ver);
1114         adev->gfx.rlc.save_restore_list_cntl_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_cntl_size_bytes);
1115         adev->gfx.rlc.save_restore_list_cntl = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_cntl_offset_bytes);
1116         adev->gfx.rlc_srlg_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_ucode_ver);
1117         adev->gfx.rlc_srlg_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_feature_ver);
1118         adev->gfx.rlc.save_restore_list_gpm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_gpm_size_bytes);
1119         adev->gfx.rlc.save_restore_list_gpm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_gpm_offset_bytes);
1120         adev->gfx.rlc_srls_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_ucode_ver);
1121         adev->gfx.rlc_srls_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_feature_ver);
1122         adev->gfx.rlc.save_restore_list_srm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_srm_size_bytes);
1123         adev->gfx.rlc.save_restore_list_srm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_srm_offset_bytes);
1124         adev->gfx.rlc.reg_list_format_direct_reg_list_length =
1125                         le32_to_cpu(rlc_hdr->reg_list_format_direct_reg_list_length);
1126 }
1127
1128 static void gfx_v9_0_check_fw_write_wait(struct amdgpu_device *adev)
1129 {
1130         adev->gfx.me_fw_write_wait = false;
1131         adev->gfx.mec_fw_write_wait = false;
1132
1133         if ((adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 4, 1)) &&
1134             ((adev->gfx.mec_fw_version < 0x000001a5) ||
1135             (adev->gfx.mec_feature_version < 46) ||
1136             (adev->gfx.pfp_fw_version < 0x000000b7) ||
1137             (adev->gfx.pfp_feature_version < 46)))
1138                 DRM_WARN_ONCE("CP firmware version too old, please update!");
1139
1140         switch (adev->ip_versions[GC_HWIP][0]) {
1141         case IP_VERSION(9, 0, 1):
1142                 if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1143                     (adev->gfx.me_feature_version >= 42) &&
1144                     (adev->gfx.pfp_fw_version >=  0x000000b1) &&
1145                     (adev->gfx.pfp_feature_version >= 42))
1146                         adev->gfx.me_fw_write_wait = true;
1147
1148                 if ((adev->gfx.mec_fw_version >=  0x00000193) &&
1149                     (adev->gfx.mec_feature_version >= 42))
1150                         adev->gfx.mec_fw_write_wait = true;
1151                 break;
1152         case IP_VERSION(9, 2, 1):
1153                 if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1154                     (adev->gfx.me_feature_version >= 44) &&
1155                     (adev->gfx.pfp_fw_version >=  0x000000b2) &&
1156                     (adev->gfx.pfp_feature_version >= 44))
1157                         adev->gfx.me_fw_write_wait = true;
1158
1159                 if ((adev->gfx.mec_fw_version >=  0x00000196) &&
1160                     (adev->gfx.mec_feature_version >= 44))
1161                         adev->gfx.mec_fw_write_wait = true;
1162                 break;
1163         case IP_VERSION(9, 4, 0):
1164                 if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1165                     (adev->gfx.me_feature_version >= 44) &&
1166                     (adev->gfx.pfp_fw_version >=  0x000000b2) &&
1167                     (adev->gfx.pfp_feature_version >= 44))
1168                         adev->gfx.me_fw_write_wait = true;
1169
1170                 if ((adev->gfx.mec_fw_version >=  0x00000197) &&
1171                     (adev->gfx.mec_feature_version >= 44))
1172                         adev->gfx.mec_fw_write_wait = true;
1173                 break;
1174         case IP_VERSION(9, 1, 0):
1175         case IP_VERSION(9, 2, 2):
1176                 if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1177                     (adev->gfx.me_feature_version >= 42) &&
1178                     (adev->gfx.pfp_fw_version >=  0x000000b1) &&
1179                     (adev->gfx.pfp_feature_version >= 42))
1180                         adev->gfx.me_fw_write_wait = true;
1181
1182                 if ((adev->gfx.mec_fw_version >=  0x00000192) &&
1183                     (adev->gfx.mec_feature_version >= 42))
1184                         adev->gfx.mec_fw_write_wait = true;
1185                 break;
1186         default:
1187                 adev->gfx.me_fw_write_wait = true;
1188                 adev->gfx.mec_fw_write_wait = true;
1189                 break;
1190         }
1191 }
1192
1193 struct amdgpu_gfxoff_quirk {
1194         u16 chip_vendor;
1195         u16 chip_device;
1196         u16 subsys_vendor;
1197         u16 subsys_device;
1198         u8 revision;
1199 };
1200
1201 static const struct amdgpu_gfxoff_quirk amdgpu_gfxoff_quirk_list[] = {
1202         /* https://bugzilla.kernel.org/show_bug.cgi?id=204689 */
1203         { 0x1002, 0x15dd, 0x1002, 0x15dd, 0xc8 },
1204         /* https://bugzilla.kernel.org/show_bug.cgi?id=207171 */
1205         { 0x1002, 0x15dd, 0x103c, 0x83e7, 0xd3 },
1206         /* GFXOFF is unstable on C6 parts with a VBIOS 113-RAVEN-114 */
1207         { 0x1002, 0x15dd, 0x1002, 0x15dd, 0xc6 },
1208         { 0, 0, 0, 0, 0 },
1209 };
1210
1211 static bool gfx_v9_0_should_disable_gfxoff(struct pci_dev *pdev)
1212 {
1213         const struct amdgpu_gfxoff_quirk *p = amdgpu_gfxoff_quirk_list;
1214
1215         while (p && p->chip_device != 0) {
1216                 if (pdev->vendor == p->chip_vendor &&
1217                     pdev->device == p->chip_device &&
1218                     pdev->subsystem_vendor == p->subsys_vendor &&
1219                     pdev->subsystem_device == p->subsys_device &&
1220                     pdev->revision == p->revision) {
1221                         return true;
1222                 }
1223                 ++p;
1224         }
1225         return false;
1226 }
1227
1228 static bool is_raven_kicker(struct amdgpu_device *adev)
1229 {
1230         if (adev->pm.fw_version >= 0x41e2b)
1231                 return true;
1232         else
1233                 return false;
1234 }
1235
1236 static bool check_if_enlarge_doorbell_range(struct amdgpu_device *adev)
1237 {
1238         if ((adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 3, 0)) &&
1239             (adev->gfx.me_fw_version >= 0x000000a5) &&
1240             (adev->gfx.me_feature_version >= 52))
1241                 return true;
1242         else
1243                 return false;
1244 }
1245
1246 static void gfx_v9_0_check_if_need_gfxoff(struct amdgpu_device *adev)
1247 {
1248         if (gfx_v9_0_should_disable_gfxoff(adev->pdev))
1249                 adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
1250
1251         switch (adev->ip_versions[GC_HWIP][0]) {
1252         case IP_VERSION(9, 0, 1):
1253         case IP_VERSION(9, 2, 1):
1254         case IP_VERSION(9, 4, 0):
1255                 break;
1256         case IP_VERSION(9, 2, 2):
1257         case IP_VERSION(9, 1, 0):
1258                 if (!((adev->apu_flags & AMD_APU_IS_RAVEN2) ||
1259                       (adev->apu_flags & AMD_APU_IS_PICASSO)) &&
1260                     ((!is_raven_kicker(adev) &&
1261                       adev->gfx.rlc_fw_version < 531) ||
1262                      (adev->gfx.rlc_feature_version < 1) ||
1263                      !adev->gfx.rlc.is_rlc_v2_1))
1264                         adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
1265
1266                 if (adev->pm.pp_feature & PP_GFXOFF_MASK)
1267                         adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG |
1268                                 AMD_PG_SUPPORT_CP |
1269                                 AMD_PG_SUPPORT_RLC_SMU_HS;
1270                 break;
1271         case IP_VERSION(9, 3, 0):
1272                 if (adev->pm.pp_feature & PP_GFXOFF_MASK)
1273                         adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG |
1274                                 AMD_PG_SUPPORT_CP |
1275                                 AMD_PG_SUPPORT_RLC_SMU_HS;
1276                 break;
1277         default:
1278                 break;
1279         }
1280 }
1281
1282 static int gfx_v9_0_init_cp_gfx_microcode(struct amdgpu_device *adev,
1283                                           const char *chip_name)
1284 {
1285         char fw_name[30];
1286         int err;
1287         struct amdgpu_firmware_info *info = NULL;
1288         const struct common_firmware_header *header = NULL;
1289         const struct gfx_firmware_header_v1_0 *cp_hdr;
1290
1291         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
1292         err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
1293         if (err)
1294                 goto out;
1295         err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
1296         if (err)
1297                 goto out;
1298         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
1299         adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1300         adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1301
1302         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
1303         err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
1304         if (err)
1305                 goto out;
1306         err = amdgpu_ucode_validate(adev->gfx.me_fw);
1307         if (err)
1308                 goto out;
1309         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
1310         adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1311         adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1312
1313         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
1314         err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
1315         if (err)
1316                 goto out;
1317         err = amdgpu_ucode_validate(adev->gfx.ce_fw);
1318         if (err)
1319                 goto out;
1320         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
1321         adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1322         adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1323
1324         if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
1325                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
1326                 info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
1327                 info->fw = adev->gfx.pfp_fw;
1328                 header = (const struct common_firmware_header *)info->fw->data;
1329                 adev->firmware.fw_size +=
1330                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1331
1332                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
1333                 info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
1334                 info->fw = adev->gfx.me_fw;
1335                 header = (const struct common_firmware_header *)info->fw->data;
1336                 adev->firmware.fw_size +=
1337                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1338
1339                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
1340                 info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
1341                 info->fw = adev->gfx.ce_fw;
1342                 header = (const struct common_firmware_header *)info->fw->data;
1343                 adev->firmware.fw_size +=
1344                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1345         }
1346
1347 out:
1348         if (err) {
1349                 dev_err(adev->dev,
1350                         "gfx9: Failed to load firmware \"%s\"\n",
1351                         fw_name);
1352                 release_firmware(adev->gfx.pfp_fw);
1353                 adev->gfx.pfp_fw = NULL;
1354                 release_firmware(adev->gfx.me_fw);
1355                 adev->gfx.me_fw = NULL;
1356                 release_firmware(adev->gfx.ce_fw);
1357                 adev->gfx.ce_fw = NULL;
1358         }
1359         return err;
1360 }
1361
1362 static int gfx_v9_0_init_rlc_microcode(struct amdgpu_device *adev,
1363                                           const char *chip_name)
1364 {
1365         char fw_name[30];
1366         int err;
1367         struct amdgpu_firmware_info *info = NULL;
1368         const struct common_firmware_header *header = NULL;
1369         const struct rlc_firmware_header_v2_0 *rlc_hdr;
1370         unsigned int *tmp = NULL;
1371         unsigned int i = 0;
1372         uint16_t version_major;
1373         uint16_t version_minor;
1374         uint32_t smu_version;
1375
1376         /*
1377          * For Picasso && AM4 SOCKET board, we use picasso_rlc_am4.bin
1378          * instead of picasso_rlc.bin.
1379          * Judgment method:
1380          * PCO AM4: revision >= 0xC8 && revision <= 0xCF
1381          *          or revision >= 0xD8 && revision <= 0xDF
1382          * otherwise is PCO FP5
1383          */
1384         if (!strcmp(chip_name, "picasso") &&
1385                 (((adev->pdev->revision >= 0xC8) && (adev->pdev->revision <= 0xCF)) ||
1386                 ((adev->pdev->revision >= 0xD8) && (adev->pdev->revision <= 0xDF))))
1387                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc_am4.bin", chip_name);
1388         else if (!strcmp(chip_name, "raven") && (amdgpu_pm_load_smu_firmware(adev, &smu_version) == 0) &&
1389                 (smu_version >= 0x41e2b))
1390                 /**
1391                 *SMC is loaded by SBIOS on APU and it's able to get the SMU version directly.
1392                 */
1393                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_kicker_rlc.bin", chip_name);
1394         else
1395                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
1396         err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
1397         if (err)
1398                 goto out;
1399         err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
1400         rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
1401
1402         version_major = le16_to_cpu(rlc_hdr->header.header_version_major);
1403         version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor);
1404         if (version_major == 2 && version_minor == 1)
1405                 adev->gfx.rlc.is_rlc_v2_1 = true;
1406
1407         adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
1408         adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
1409         adev->gfx.rlc.save_and_restore_offset =
1410                         le32_to_cpu(rlc_hdr->save_and_restore_offset);
1411         adev->gfx.rlc.clear_state_descriptor_offset =
1412                         le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
1413         adev->gfx.rlc.avail_scratch_ram_locations =
1414                         le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
1415         adev->gfx.rlc.reg_restore_list_size =
1416                         le32_to_cpu(rlc_hdr->reg_restore_list_size);
1417         adev->gfx.rlc.reg_list_format_start =
1418                         le32_to_cpu(rlc_hdr->reg_list_format_start);
1419         adev->gfx.rlc.reg_list_format_separate_start =
1420                         le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
1421         adev->gfx.rlc.starting_offsets_start =
1422                         le32_to_cpu(rlc_hdr->starting_offsets_start);
1423         adev->gfx.rlc.reg_list_format_size_bytes =
1424                         le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
1425         adev->gfx.rlc.reg_list_size_bytes =
1426                         le32_to_cpu(rlc_hdr->reg_list_size_bytes);
1427         adev->gfx.rlc.register_list_format =
1428                         kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
1429                                 adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
1430         if (!adev->gfx.rlc.register_list_format) {
1431                 err = -ENOMEM;
1432                 goto out;
1433         }
1434
1435         tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1436                         le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
1437         for (i = 0 ; i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2); i++)
1438                 adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]);
1439
1440         adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
1441
1442         tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1443                         le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
1444         for (i = 0 ; i < (adev->gfx.rlc.reg_list_size_bytes >> 2); i++)
1445                 adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
1446
1447         if (adev->gfx.rlc.is_rlc_v2_1)
1448                 gfx_v9_0_init_rlc_ext_microcode(adev);
1449
1450         if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
1451                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
1452                 info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
1453                 info->fw = adev->gfx.rlc_fw;
1454                 header = (const struct common_firmware_header *)info->fw->data;
1455                 adev->firmware.fw_size +=
1456                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1457
1458                 if (adev->gfx.rlc.is_rlc_v2_1 &&
1459                     adev->gfx.rlc.save_restore_list_cntl_size_bytes &&
1460                     adev->gfx.rlc.save_restore_list_gpm_size_bytes &&
1461                     adev->gfx.rlc.save_restore_list_srm_size_bytes) {
1462                         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL];
1463                         info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL;
1464                         info->fw = adev->gfx.rlc_fw;
1465                         adev->firmware.fw_size +=
1466                                 ALIGN(adev->gfx.rlc.save_restore_list_cntl_size_bytes, PAGE_SIZE);
1467
1468                         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM];
1469                         info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM;
1470                         info->fw = adev->gfx.rlc_fw;
1471                         adev->firmware.fw_size +=
1472                                 ALIGN(adev->gfx.rlc.save_restore_list_gpm_size_bytes, PAGE_SIZE);
1473
1474                         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM];
1475                         info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM;
1476                         info->fw = adev->gfx.rlc_fw;
1477                         adev->firmware.fw_size +=
1478                                 ALIGN(adev->gfx.rlc.save_restore_list_srm_size_bytes, PAGE_SIZE);
1479                 }
1480         }
1481
1482 out:
1483         if (err) {
1484                 dev_err(adev->dev,
1485                         "gfx9: Failed to load firmware \"%s\"\n",
1486                         fw_name);
1487                 release_firmware(adev->gfx.rlc_fw);
1488                 adev->gfx.rlc_fw = NULL;
1489         }
1490         return err;
1491 }
1492
1493 static bool gfx_v9_0_load_mec2_fw_bin_support(struct amdgpu_device *adev)
1494 {
1495         if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2) ||
1496             adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 1) ||
1497             adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 3, 0))
1498                 return false;
1499
1500         return true;
1501 }
1502
1503 static int gfx_v9_0_init_cp_compute_microcode(struct amdgpu_device *adev,
1504                                           const char *chip_name)
1505 {
1506         char fw_name[30];
1507         int err;
1508         struct amdgpu_firmware_info *info = NULL;
1509         const struct common_firmware_header *header = NULL;
1510         const struct gfx_firmware_header_v1_0 *cp_hdr;
1511
1512         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
1513         err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1514         if (err)
1515                 goto out;
1516         err = amdgpu_ucode_validate(adev->gfx.mec_fw);
1517         if (err)
1518                 goto out;
1519         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1520         adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1521         adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1522
1523
1524         if (gfx_v9_0_load_mec2_fw_bin_support(adev)) {
1525                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1526                 err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1527                 if (!err) {
1528                         err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
1529                         if (err)
1530                                 goto out;
1531                         cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1532                         adev->gfx.mec2_fw->data;
1533                         adev->gfx.mec2_fw_version =
1534                         le32_to_cpu(cp_hdr->header.ucode_version);
1535                         adev->gfx.mec2_feature_version =
1536                         le32_to_cpu(cp_hdr->ucode_feature_version);
1537                 } else {
1538                         err = 0;
1539                         adev->gfx.mec2_fw = NULL;
1540                 }
1541         } else {
1542                 adev->gfx.mec2_fw_version = adev->gfx.mec_fw_version;
1543                 adev->gfx.mec2_feature_version = adev->gfx.mec_feature_version;
1544         }
1545
1546         if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
1547                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
1548                 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
1549                 info->fw = adev->gfx.mec_fw;
1550                 header = (const struct common_firmware_header *)info->fw->data;
1551                 cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data;
1552                 adev->firmware.fw_size +=
1553                         ALIGN(le32_to_cpu(header->ucode_size_bytes) - le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
1554
1555                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1_JT];
1556                 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1_JT;
1557                 info->fw = adev->gfx.mec_fw;
1558                 adev->firmware.fw_size +=
1559                         ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
1560
1561                 if (adev->gfx.mec2_fw) {
1562                         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
1563                         info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
1564                         info->fw = adev->gfx.mec2_fw;
1565                         header = (const struct common_firmware_header *)info->fw->data;
1566                         cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data;
1567                         adev->firmware.fw_size +=
1568                                 ALIGN(le32_to_cpu(header->ucode_size_bytes) - le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
1569
1570                         /* TODO: Determine if MEC2 JT FW loading can be removed
1571                                  for all GFX V9 asic and above */
1572                         if (gfx_v9_0_load_mec2_fw_bin_support(adev)) {
1573                                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2_JT];
1574                                 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2_JT;
1575                                 info->fw = adev->gfx.mec2_fw;
1576                                 adev->firmware.fw_size +=
1577                                         ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4,
1578                                         PAGE_SIZE);
1579                         }
1580                 }
1581         }
1582
1583 out:
1584         gfx_v9_0_check_if_need_gfxoff(adev);
1585         gfx_v9_0_check_fw_write_wait(adev);
1586         if (err) {
1587                 dev_err(adev->dev,
1588                         "gfx9: Failed to load firmware \"%s\"\n",
1589                         fw_name);
1590                 release_firmware(adev->gfx.mec_fw);
1591                 adev->gfx.mec_fw = NULL;
1592                 release_firmware(adev->gfx.mec2_fw);
1593                 adev->gfx.mec2_fw = NULL;
1594         }
1595         return err;
1596 }
1597
1598 static int gfx_v9_0_init_microcode(struct amdgpu_device *adev)
1599 {
1600         const char *chip_name;
1601         int r;
1602
1603         DRM_DEBUG("\n");
1604
1605         switch (adev->ip_versions[GC_HWIP][0]) {
1606         case IP_VERSION(9, 0, 1):
1607                 chip_name = "vega10";
1608                 break;
1609         case IP_VERSION(9, 2, 1):
1610                 chip_name = "vega12";
1611                 break;
1612         case IP_VERSION(9, 4, 0):
1613                 chip_name = "vega20";
1614                 break;
1615         case IP_VERSION(9, 2, 2):
1616         case IP_VERSION(9, 1, 0):
1617                 if (adev->apu_flags & AMD_APU_IS_RAVEN2)
1618                         chip_name = "raven2";
1619                 else if (adev->apu_flags & AMD_APU_IS_PICASSO)
1620                         chip_name = "picasso";
1621                 else
1622                         chip_name = "raven";
1623                 break;
1624         case IP_VERSION(9, 4, 1):
1625                 chip_name = "arcturus";
1626                 break;
1627         case IP_VERSION(9, 3, 0):
1628                 if (adev->apu_flags & AMD_APU_IS_RENOIR)
1629                         chip_name = "renoir";
1630                 else
1631                         chip_name = "green_sardine";
1632                 break;
1633         case IP_VERSION(9, 4, 2):
1634                 chip_name = "aldebaran";
1635                 break;
1636         default:
1637                 BUG();
1638         }
1639
1640         /* No CPG in Arcturus */
1641         if (adev->gfx.num_gfx_rings) {
1642                 r = gfx_v9_0_init_cp_gfx_microcode(adev, chip_name);
1643                 if (r)
1644                         return r;
1645         }
1646
1647         r = gfx_v9_0_init_rlc_microcode(adev, chip_name);
1648         if (r)
1649                 return r;
1650
1651         r = gfx_v9_0_init_cp_compute_microcode(adev, chip_name);
1652         if (r)
1653                 return r;
1654
1655         return r;
1656 }
1657
1658 static u32 gfx_v9_0_get_csb_size(struct amdgpu_device *adev)
1659 {
1660         u32 count = 0;
1661         const struct cs_section_def *sect = NULL;
1662         const struct cs_extent_def *ext = NULL;
1663
1664         /* begin clear state */
1665         count += 2;
1666         /* context control state */
1667         count += 3;
1668
1669         for (sect = gfx9_cs_data; sect->section != NULL; ++sect) {
1670                 for (ext = sect->section; ext->extent != NULL; ++ext) {
1671                         if (sect->id == SECT_CONTEXT)
1672                                 count += 2 + ext->reg_count;
1673                         else
1674                                 return 0;
1675                 }
1676         }
1677
1678         /* end clear state */
1679         count += 2;
1680         /* clear state */
1681         count += 2;
1682
1683         return count;
1684 }
1685
1686 static void gfx_v9_0_get_csb_buffer(struct amdgpu_device *adev,
1687                                     volatile u32 *buffer)
1688 {
1689         u32 count = 0, i;
1690         const struct cs_section_def *sect = NULL;
1691         const struct cs_extent_def *ext = NULL;
1692
1693         if (adev->gfx.rlc.cs_data == NULL)
1694                 return;
1695         if (buffer == NULL)
1696                 return;
1697
1698         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1699         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1700
1701         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
1702         buffer[count++] = cpu_to_le32(0x80000000);
1703         buffer[count++] = cpu_to_le32(0x80000000);
1704
1705         for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
1706                 for (ext = sect->section; ext->extent != NULL; ++ext) {
1707                         if (sect->id == SECT_CONTEXT) {
1708                                 buffer[count++] =
1709                                         cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
1710                                 buffer[count++] = cpu_to_le32(ext->reg_index -
1711                                                 PACKET3_SET_CONTEXT_REG_START);
1712                                 for (i = 0; i < ext->reg_count; i++)
1713                                         buffer[count++] = cpu_to_le32(ext->extent[i]);
1714                         } else {
1715                                 return;
1716                         }
1717                 }
1718         }
1719
1720         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1721         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
1722
1723         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
1724         buffer[count++] = cpu_to_le32(0);
1725 }
1726
1727 static void gfx_v9_0_init_always_on_cu_mask(struct amdgpu_device *adev)
1728 {
1729         struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
1730         uint32_t pg_always_on_cu_num = 2;
1731         uint32_t always_on_cu_num;
1732         uint32_t i, j, k;
1733         uint32_t mask, cu_bitmap, counter;
1734
1735         if (adev->flags & AMD_IS_APU)
1736                 always_on_cu_num = 4;
1737         else if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 2, 1))
1738                 always_on_cu_num = 8;
1739         else
1740                 always_on_cu_num = 12;
1741
1742         mutex_lock(&adev->grbm_idx_mutex);
1743         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
1744                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
1745                         mask = 1;
1746                         cu_bitmap = 0;
1747                         counter = 0;
1748                         gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
1749
1750                         for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
1751                                 if (cu_info->bitmap[i][j] & mask) {
1752                                         if (counter == pg_always_on_cu_num)
1753                                                 WREG32_SOC15(GC, 0, mmRLC_PG_ALWAYS_ON_CU_MASK, cu_bitmap);
1754                                         if (counter < always_on_cu_num)
1755                                                 cu_bitmap |= mask;
1756                                         else
1757                                                 break;
1758                                         counter++;
1759                                 }
1760                                 mask <<= 1;
1761                         }
1762
1763                         WREG32_SOC15(GC, 0, mmRLC_LB_ALWAYS_ACTIVE_CU_MASK, cu_bitmap);
1764                         cu_info->ao_cu_bitmap[i][j] = cu_bitmap;
1765                 }
1766         }
1767         gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1768         mutex_unlock(&adev->grbm_idx_mutex);
1769 }
1770
1771 static void gfx_v9_0_init_lbpw(struct amdgpu_device *adev)
1772 {
1773         uint32_t data;
1774
1775         /* set mmRLC_LB_THR_CONFIG_1/2/3/4 */
1776         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F);
1777         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x0333A5A7);
1778         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077);
1779         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x30 | 0x40 << 8 | 0x02FA << 16));
1780
1781         /* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */
1782         WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000);
1783
1784         /* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */
1785         WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000500);
1786
1787         mutex_lock(&adev->grbm_idx_mutex);
1788         /* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/
1789         gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1790         WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff);
1791
1792         /* set mmRLC_LB_PARAMS = 0x003F_1006 */
1793         data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003);
1794         data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010);
1795         data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F);
1796         WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data);
1797
1798         /* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */
1799         data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7);
1800         data &= 0x0000FFFF;
1801         data |= 0x00C00000;
1802         WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data);
1803
1804         /*
1805          * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xF (4 CUs AON for Raven),
1806          * programmed in gfx_v9_0_init_always_on_cu_mask()
1807          */
1808
1809         /* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved,
1810          * but used for RLC_LB_CNTL configuration */
1811         data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK;
1812         data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09);
1813         data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000);
1814         WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data);
1815         mutex_unlock(&adev->grbm_idx_mutex);
1816
1817         gfx_v9_0_init_always_on_cu_mask(adev);
1818 }
1819
1820 static void gfx_v9_4_init_lbpw(struct amdgpu_device *adev)
1821 {
1822         uint32_t data;
1823
1824         /* set mmRLC_LB_THR_CONFIG_1/2/3/4 */
1825         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F);
1826         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x033388F8);
1827         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077);
1828         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x10 | 0x27 << 8 | 0x02FA << 16));
1829
1830         /* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */
1831         WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000);
1832
1833         /* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */
1834         WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000800);
1835
1836         mutex_lock(&adev->grbm_idx_mutex);
1837         /* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/
1838         gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1839         WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff);
1840
1841         /* set mmRLC_LB_PARAMS = 0x003F_1006 */
1842         data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003);
1843         data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010);
1844         data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F);
1845         WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data);
1846
1847         /* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */
1848         data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7);
1849         data &= 0x0000FFFF;
1850         data |= 0x00C00000;
1851         WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data);
1852
1853         /*
1854          * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xFFF (12 CUs AON),
1855          * programmed in gfx_v9_0_init_always_on_cu_mask()
1856          */
1857
1858         /* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved,
1859          * but used for RLC_LB_CNTL configuration */
1860         data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK;
1861         data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09);
1862         data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000);
1863         WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data);
1864         mutex_unlock(&adev->grbm_idx_mutex);
1865
1866         gfx_v9_0_init_always_on_cu_mask(adev);
1867 }
1868
1869 static void gfx_v9_0_enable_lbpw(struct amdgpu_device *adev, bool enable)
1870 {
1871         WREG32_FIELD15(GC, 0, RLC_LB_CNTL, LOAD_BALANCE_ENABLE, enable ? 1 : 0);
1872 }
1873
1874 static int gfx_v9_0_cp_jump_table_num(struct amdgpu_device *adev)
1875 {
1876         if (gfx_v9_0_load_mec2_fw_bin_support(adev))
1877                 return 5;
1878         else
1879                 return 4;
1880 }
1881
1882 static void gfx_v9_0_init_rlcg_reg_access_ctrl(struct amdgpu_device *adev)
1883 {
1884         struct amdgpu_rlcg_reg_access_ctrl *reg_access_ctrl;
1885
1886         reg_access_ctrl = &adev->gfx.rlc.reg_access_ctrl;
1887         reg_access_ctrl->scratch_reg0 = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0);
1888         reg_access_ctrl->scratch_reg1 = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG1);
1889         reg_access_ctrl->scratch_reg2 = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG2);
1890         reg_access_ctrl->scratch_reg3 = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG3);
1891         reg_access_ctrl->grbm_cntl = SOC15_REG_OFFSET(GC, 0, mmGRBM_GFX_CNTL);
1892         reg_access_ctrl->grbm_idx = SOC15_REG_OFFSET(GC, 0, mmGRBM_GFX_INDEX);
1893         reg_access_ctrl->spare_int = SOC15_REG_OFFSET(GC, 0, mmRLC_SPARE_INT);
1894         adev->gfx.rlc.rlcg_reg_access_supported = true;
1895 }
1896
1897 static int gfx_v9_0_rlc_init(struct amdgpu_device *adev)
1898 {
1899         const struct cs_section_def *cs_data;
1900         int r;
1901
1902         adev->gfx.rlc.cs_data = gfx9_cs_data;
1903
1904         cs_data = adev->gfx.rlc.cs_data;
1905
1906         if (cs_data) {
1907                 /* init clear state block */
1908                 r = amdgpu_gfx_rlc_init_csb(adev);
1909                 if (r)
1910                         return r;
1911         }
1912
1913         if (adev->flags & AMD_IS_APU) {
1914                 /* TODO: double check the cp_table_size for RV */
1915                 adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
1916                 r = amdgpu_gfx_rlc_init_cpt(adev);
1917                 if (r)
1918                         return r;
1919         }
1920
1921         switch (adev->ip_versions[GC_HWIP][0]) {
1922         case IP_VERSION(9, 2, 2):
1923         case IP_VERSION(9, 1, 0):
1924                 gfx_v9_0_init_lbpw(adev);
1925                 break;
1926         case IP_VERSION(9, 4, 0):
1927                 gfx_v9_4_init_lbpw(adev);
1928                 break;
1929         default:
1930                 break;
1931         }
1932
1933         /* init spm vmid with 0xf */
1934         if (adev->gfx.rlc.funcs->update_spm_vmid)
1935                 adev->gfx.rlc.funcs->update_spm_vmid(adev, 0xf);
1936
1937         return 0;
1938 }
1939
1940 static void gfx_v9_0_mec_fini(struct amdgpu_device *adev)
1941 {
1942         amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
1943         amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_obj, NULL, NULL);
1944 }
1945
1946 static int gfx_v9_0_mec_init(struct amdgpu_device *adev)
1947 {
1948         int r;
1949         u32 *hpd;
1950         const __le32 *fw_data;
1951         unsigned fw_size;
1952         u32 *fw;
1953         size_t mec_hpd_size;
1954
1955         const struct gfx_firmware_header_v1_0 *mec_hdr;
1956
1957         bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
1958
1959         /* take ownership of the relevant compute queues */
1960         amdgpu_gfx_compute_queue_acquire(adev);
1961         mec_hpd_size = adev->gfx.num_compute_rings * GFX9_MEC_HPD_SIZE;
1962         if (mec_hpd_size) {
1963                 r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
1964                                               AMDGPU_GEM_DOMAIN_VRAM,
1965                                               &adev->gfx.mec.hpd_eop_obj,
1966                                               &adev->gfx.mec.hpd_eop_gpu_addr,
1967                                               (void **)&hpd);
1968                 if (r) {
1969                         dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
1970                         gfx_v9_0_mec_fini(adev);
1971                         return r;
1972                 }
1973
1974                 memset(hpd, 0, mec_hpd_size);
1975
1976                 amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
1977                 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1978         }
1979
1980         mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1981
1982         fw_data = (const __le32 *)
1983                 (adev->gfx.mec_fw->data +
1984                  le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
1985         fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes);
1986
1987         r = amdgpu_bo_create_reserved(adev, mec_hdr->header.ucode_size_bytes,
1988                                       PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
1989                                       &adev->gfx.mec.mec_fw_obj,
1990                                       &adev->gfx.mec.mec_fw_gpu_addr,
1991                                       (void **)&fw);
1992         if (r) {
1993                 dev_warn(adev->dev, "(%d) create mec firmware bo failed\n", r);
1994                 gfx_v9_0_mec_fini(adev);
1995                 return r;
1996         }
1997
1998         memcpy(fw, fw_data, fw_size);
1999
2000         amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj);
2001         amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj);
2002
2003         return 0;
2004 }
2005
2006 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
2007 {
2008         WREG32_SOC15_RLC(GC, 0, mmSQ_IND_INDEX,
2009                 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
2010                 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
2011                 (address << SQ_IND_INDEX__INDEX__SHIFT) |
2012                 (SQ_IND_INDEX__FORCE_READ_MASK));
2013         return RREG32_SOC15(GC, 0, mmSQ_IND_DATA);
2014 }
2015
2016 static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
2017                            uint32_t wave, uint32_t thread,
2018                            uint32_t regno, uint32_t num, uint32_t *out)
2019 {
2020         WREG32_SOC15_RLC(GC, 0, mmSQ_IND_INDEX,
2021                 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
2022                 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
2023                 (regno << SQ_IND_INDEX__INDEX__SHIFT) |
2024                 (thread << SQ_IND_INDEX__THREAD_ID__SHIFT) |
2025                 (SQ_IND_INDEX__FORCE_READ_MASK) |
2026                 (SQ_IND_INDEX__AUTO_INCR_MASK));
2027         while (num--)
2028                 *(out++) = RREG32_SOC15(GC, 0, mmSQ_IND_DATA);
2029 }
2030
2031 static void gfx_v9_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
2032 {
2033         /* type 1 wave data */
2034         dst[(*no_fields)++] = 1;
2035         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS);
2036         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO);
2037         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI);
2038         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO);
2039         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI);
2040         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID);
2041         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0);
2042         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1);
2043         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC);
2044         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC);
2045         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS);
2046         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS);
2047         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0);
2048         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0);
2049         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_MODE);
2050 }
2051
2052 static void gfx_v9_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
2053                                      uint32_t wave, uint32_t start,
2054                                      uint32_t size, uint32_t *dst)
2055 {
2056         wave_read_regs(
2057                 adev, simd, wave, 0,
2058                 start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
2059 }
2060
2061 static void gfx_v9_0_read_wave_vgprs(struct amdgpu_device *adev, uint32_t simd,
2062                                      uint32_t wave, uint32_t thread,
2063                                      uint32_t start, uint32_t size,
2064                                      uint32_t *dst)
2065 {
2066         wave_read_regs(
2067                 adev, simd, wave, thread,
2068                 start + SQIND_WAVE_VGPRS_OFFSET, size, dst);
2069 }
2070
2071 static void gfx_v9_0_select_me_pipe_q(struct amdgpu_device *adev,
2072                                   u32 me, u32 pipe, u32 q, u32 vm)
2073 {
2074         soc15_grbm_select(adev, me, pipe, q, vm);
2075 }
2076
2077 static const struct amdgpu_gfx_funcs gfx_v9_0_gfx_funcs = {
2078         .get_gpu_clock_counter = &gfx_v9_0_get_gpu_clock_counter,
2079         .select_se_sh = &gfx_v9_0_select_se_sh,
2080         .read_wave_data = &gfx_v9_0_read_wave_data,
2081         .read_wave_sgprs = &gfx_v9_0_read_wave_sgprs,
2082         .read_wave_vgprs = &gfx_v9_0_read_wave_vgprs,
2083         .select_me_pipe_q = &gfx_v9_0_select_me_pipe_q,
2084 };
2085
2086 const struct amdgpu_ras_block_hw_ops  gfx_v9_0_ras_ops = {
2087                 .ras_error_inject = &gfx_v9_0_ras_error_inject,
2088                 .query_ras_error_count = &gfx_v9_0_query_ras_error_count,
2089                 .reset_ras_error_count = &gfx_v9_0_reset_ras_error_count,
2090 };
2091
2092 static struct amdgpu_gfx_ras gfx_v9_0_ras = {
2093         .ras_block = {
2094                 .hw_ops = &gfx_v9_0_ras_ops,
2095         },
2096 };
2097
2098 static int gfx_v9_0_gpu_early_init(struct amdgpu_device *adev)
2099 {
2100         u32 gb_addr_config;
2101         int err;
2102
2103         adev->gfx.funcs = &gfx_v9_0_gfx_funcs;
2104
2105         switch (adev->ip_versions[GC_HWIP][0]) {
2106         case IP_VERSION(9, 0, 1):
2107                 adev->gfx.config.max_hw_contexts = 8;
2108                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2109                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2110                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2111                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2112                 gb_addr_config = VEGA10_GB_ADDR_CONFIG_GOLDEN;
2113                 break;
2114         case IP_VERSION(9, 2, 1):
2115                 adev->gfx.config.max_hw_contexts = 8;
2116                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2117                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2118                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2119                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2120                 gb_addr_config = VEGA12_GB_ADDR_CONFIG_GOLDEN;
2121                 DRM_INFO("fix gfx.config for vega12\n");
2122                 break;
2123         case IP_VERSION(9, 4, 0):
2124                 adev->gfx.ras = &gfx_v9_0_ras;
2125                 adev->gfx.config.max_hw_contexts = 8;
2126                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2127                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2128                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2129                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2130                 gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
2131                 gb_addr_config &= ~0xf3e777ff;
2132                 gb_addr_config |= 0x22014042;
2133                 /* check vbios table if gpu info is not available */
2134                 err = amdgpu_atomfirmware_get_gfx_info(adev);
2135                 if (err)
2136                         return err;
2137                 break;
2138         case IP_VERSION(9, 2, 2):
2139         case IP_VERSION(9, 1, 0):
2140                 adev->gfx.config.max_hw_contexts = 8;
2141                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2142                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2143                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2144                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2145                 if (adev->apu_flags & AMD_APU_IS_RAVEN2)
2146                         gb_addr_config = RAVEN2_GB_ADDR_CONFIG_GOLDEN;
2147                 else
2148                         gb_addr_config = RAVEN_GB_ADDR_CONFIG_GOLDEN;
2149                 break;
2150         case IP_VERSION(9, 4, 1):
2151                 adev->gfx.ras = &gfx_v9_4_ras;
2152                 adev->gfx.config.max_hw_contexts = 8;
2153                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2154                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2155                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2156                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2157                 gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
2158                 gb_addr_config &= ~0xf3e777ff;
2159                 gb_addr_config |= 0x22014042;
2160                 break;
2161         case IP_VERSION(9, 3, 0):
2162                 adev->gfx.config.max_hw_contexts = 8;
2163                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2164                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2165                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x80;
2166                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2167                 gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
2168                 gb_addr_config &= ~0xf3e777ff;
2169                 gb_addr_config |= 0x22010042;
2170                 break;
2171         case IP_VERSION(9, 4, 2):
2172                 adev->gfx.ras = &gfx_v9_4_2_ras;
2173                 adev->gfx.config.max_hw_contexts = 8;
2174                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2175                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2176                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2177                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2178                 gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
2179                 gb_addr_config &= ~0xf3e777ff;
2180                 gb_addr_config |= 0x22014042;
2181                 /* check vbios table if gpu info is not available */
2182                 err = amdgpu_atomfirmware_get_gfx_info(adev);
2183                 if (err)
2184                         return err;
2185                 break;
2186         default:
2187                 BUG();
2188                 break;
2189         }
2190
2191         if (adev->gfx.ras) {
2192                 err = amdgpu_ras_register_ras_block(adev, &adev->gfx.ras->ras_block);
2193                 if (err) {
2194                         DRM_ERROR("Failed to register gfx ras block!\n");
2195                         return err;
2196                 }
2197
2198                 strcpy(adev->gfx.ras->ras_block.ras_comm.name, "gfx");
2199                 adev->gfx.ras->ras_block.ras_comm.block = AMDGPU_RAS_BLOCK__GFX;
2200                 adev->gfx.ras->ras_block.ras_comm.type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE;
2201                 adev->gfx.ras_if = &adev->gfx.ras->ras_block.ras_comm;
2202
2203                 /* If not define special ras_late_init function, use gfx default ras_late_init */
2204                 if (!adev->gfx.ras->ras_block.ras_late_init)
2205                         adev->gfx.ras->ras_block.ras_late_init = amdgpu_gfx_ras_late_init;
2206
2207                 /* If not defined special ras_cb function, use default ras_cb */
2208                 if (!adev->gfx.ras->ras_block.ras_cb)
2209                         adev->gfx.ras->ras_block.ras_cb = amdgpu_gfx_process_ras_data_cb;
2210         }
2211
2212         adev->gfx.config.gb_addr_config = gb_addr_config;
2213
2214         adev->gfx.config.gb_addr_config_fields.num_pipes = 1 <<
2215                         REG_GET_FIELD(
2216                                         adev->gfx.config.gb_addr_config,
2217                                         GB_ADDR_CONFIG,
2218                                         NUM_PIPES);
2219
2220         adev->gfx.config.max_tile_pipes =
2221                 adev->gfx.config.gb_addr_config_fields.num_pipes;
2222
2223         adev->gfx.config.gb_addr_config_fields.num_banks = 1 <<
2224                         REG_GET_FIELD(
2225                                         adev->gfx.config.gb_addr_config,
2226                                         GB_ADDR_CONFIG,
2227                                         NUM_BANKS);
2228         adev->gfx.config.gb_addr_config_fields.max_compress_frags = 1 <<
2229                         REG_GET_FIELD(
2230                                         adev->gfx.config.gb_addr_config,
2231                                         GB_ADDR_CONFIG,
2232                                         MAX_COMPRESSED_FRAGS);
2233         adev->gfx.config.gb_addr_config_fields.num_rb_per_se = 1 <<
2234                         REG_GET_FIELD(
2235                                         adev->gfx.config.gb_addr_config,
2236                                         GB_ADDR_CONFIG,
2237                                         NUM_RB_PER_SE);
2238         adev->gfx.config.gb_addr_config_fields.num_se = 1 <<
2239                         REG_GET_FIELD(
2240                                         adev->gfx.config.gb_addr_config,
2241                                         GB_ADDR_CONFIG,
2242                                         NUM_SHADER_ENGINES);
2243         adev->gfx.config.gb_addr_config_fields.pipe_interleave_size = 1 << (8 +
2244                         REG_GET_FIELD(
2245                                         adev->gfx.config.gb_addr_config,
2246                                         GB_ADDR_CONFIG,
2247                                         PIPE_INTERLEAVE_SIZE));
2248
2249         return 0;
2250 }
2251
2252 static int gfx_v9_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
2253                                       int mec, int pipe, int queue)
2254 {
2255         unsigned irq_type;
2256         struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
2257         unsigned int hw_prio;
2258
2259         ring = &adev->gfx.compute_ring[ring_id];
2260
2261         /* mec0 is me1 */
2262         ring->me = mec + 1;
2263         ring->pipe = pipe;
2264         ring->queue = queue;
2265
2266         ring->ring_obj = NULL;
2267         ring->use_doorbell = true;
2268         ring->doorbell_index = (adev->doorbell_index.mec_ring0 + ring_id) << 1;
2269         ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
2270                                 + (ring_id * GFX9_MEC_HPD_SIZE);
2271         sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
2272
2273         irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
2274                 + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
2275                 + ring->pipe;
2276         hw_prio = amdgpu_gfx_is_high_priority_compute_queue(adev, ring) ?
2277                         AMDGPU_GFX_PIPE_PRIO_HIGH : AMDGPU_GFX_PIPE_PRIO_NORMAL;
2278         /* type-2 packets are deprecated on MEC, use type-3 instead */
2279         return amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, irq_type,
2280                                 hw_prio, NULL);
2281 }
2282
2283 static int gfx_v9_0_sw_init(void *handle)
2284 {
2285         int i, j, k, r, ring_id;
2286         struct amdgpu_ring *ring;
2287         struct amdgpu_kiq *kiq;
2288         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2289
2290         switch (adev->ip_versions[GC_HWIP][0]) {
2291         case IP_VERSION(9, 0, 1):
2292         case IP_VERSION(9, 2, 1):
2293         case IP_VERSION(9, 4, 0):
2294         case IP_VERSION(9, 2, 2):
2295         case IP_VERSION(9, 1, 0):
2296         case IP_VERSION(9, 4, 1):
2297         case IP_VERSION(9, 3, 0):
2298         case IP_VERSION(9, 4, 2):
2299                 adev->gfx.mec.num_mec = 2;
2300                 break;
2301         default:
2302                 adev->gfx.mec.num_mec = 1;
2303                 break;
2304         }
2305
2306         adev->gfx.mec.num_pipe_per_mec = 4;
2307         adev->gfx.mec.num_queue_per_pipe = 8;
2308
2309         /* EOP Event */
2310         r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_EOP_INTERRUPT, &adev->gfx.eop_irq);
2311         if (r)
2312                 return r;
2313
2314         /* Privileged reg */
2315         r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_REG_FAULT,
2316                               &adev->gfx.priv_reg_irq);
2317         if (r)
2318                 return r;
2319
2320         /* Privileged inst */
2321         r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_INSTR_FAULT,
2322                               &adev->gfx.priv_inst_irq);
2323         if (r)
2324                 return r;
2325
2326         /* ECC error */
2327         r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_ECC_ERROR,
2328                               &adev->gfx.cp_ecc_error_irq);
2329         if (r)
2330                 return r;
2331
2332         /* FUE error */
2333         r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_FUE_ERROR,
2334                               &adev->gfx.cp_ecc_error_irq);
2335         if (r)
2336                 return r;
2337
2338         adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
2339
2340         gfx_v9_0_scratch_init(adev);
2341
2342         r = gfx_v9_0_init_microcode(adev);
2343         if (r) {
2344                 DRM_ERROR("Failed to load gfx firmware!\n");
2345                 return r;
2346         }
2347
2348         if (adev->gfx.rlc.funcs) {
2349                 if (adev->gfx.rlc.funcs->init) {
2350                         r = adev->gfx.rlc.funcs->init(adev);
2351                         if (r) {
2352                                 dev_err(adev->dev, "Failed to init rlc BOs!\n");
2353                                 return r;
2354                         }
2355                 }
2356         }
2357
2358         r = gfx_v9_0_mec_init(adev);
2359         if (r) {
2360                 DRM_ERROR("Failed to init MEC BOs!\n");
2361                 return r;
2362         }
2363
2364         /* set up the gfx ring */
2365         for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
2366                 ring = &adev->gfx.gfx_ring[i];
2367                 ring->ring_obj = NULL;
2368                 if (!i)
2369                         sprintf(ring->name, "gfx");
2370                 else
2371                         sprintf(ring->name, "gfx_%d", i);
2372                 ring->use_doorbell = true;
2373                 ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1;
2374                 r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
2375                                      AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP,
2376                                      AMDGPU_RING_PRIO_DEFAULT, NULL);
2377                 if (r)
2378                         return r;
2379         }
2380
2381         /* set up the compute queues - allocate horizontally across pipes */
2382         ring_id = 0;
2383         for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
2384                 for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
2385                         for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
2386                                 if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, j))
2387                                         continue;
2388
2389                                 r = gfx_v9_0_compute_ring_init(adev,
2390                                                                ring_id,
2391                                                                i, k, j);
2392                                 if (r)
2393                                         return r;
2394
2395                                 ring_id++;
2396                         }
2397                 }
2398         }
2399
2400         r = amdgpu_gfx_kiq_init(adev, GFX9_MEC_HPD_SIZE);
2401         if (r) {
2402                 DRM_ERROR("Failed to init KIQ BOs!\n");
2403                 return r;
2404         }
2405
2406         kiq = &adev->gfx.kiq;
2407         r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
2408         if (r)
2409                 return r;
2410
2411         /* create MQD for all compute queues as wel as KIQ for SRIOV case */
2412         r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct v9_mqd_allocation));
2413         if (r)
2414                 return r;
2415
2416         adev->gfx.ce_ram_size = 0x8000;
2417
2418         r = gfx_v9_0_gpu_early_init(adev);
2419         if (r)
2420                 return r;
2421
2422         return 0;
2423 }
2424
2425
2426 static int gfx_v9_0_sw_fini(void *handle)
2427 {
2428         int i;
2429         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2430
2431         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2432                 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
2433         for (i = 0; i < adev->gfx.num_compute_rings; i++)
2434                 amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
2435
2436         amdgpu_gfx_mqd_sw_fini(adev);
2437         amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring);
2438         amdgpu_gfx_kiq_fini(adev);
2439
2440         gfx_v9_0_mec_fini(adev);
2441         amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj,
2442                                 &adev->gfx.rlc.clear_state_gpu_addr,
2443                                 (void **)&adev->gfx.rlc.cs_ptr);
2444         if (adev->flags & AMD_IS_APU) {
2445                 amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj,
2446                                 &adev->gfx.rlc.cp_table_gpu_addr,
2447                                 (void **)&adev->gfx.rlc.cp_table_ptr);
2448         }
2449         gfx_v9_0_free_microcode(adev);
2450
2451         return 0;
2452 }
2453
2454
2455 static void gfx_v9_0_tiling_mode_table_init(struct amdgpu_device *adev)
2456 {
2457         /* TODO */
2458 }
2459
2460 void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num,
2461                            u32 instance)
2462 {
2463         u32 data;
2464
2465         if (instance == 0xffffffff)
2466                 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
2467         else
2468                 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
2469
2470         if (se_num == 0xffffffff)
2471                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
2472         else
2473                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
2474
2475         if (sh_num == 0xffffffff)
2476                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
2477         else
2478                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
2479
2480         WREG32_SOC15_RLC_SHADOW(GC, 0, mmGRBM_GFX_INDEX, data);
2481 }
2482
2483 static u32 gfx_v9_0_get_rb_active_bitmap(struct amdgpu_device *adev)
2484 {
2485         u32 data, mask;
2486
2487         data = RREG32_SOC15(GC, 0, mmCC_RB_BACKEND_DISABLE);
2488         data |= RREG32_SOC15(GC, 0, mmGC_USER_RB_BACKEND_DISABLE);
2489
2490         data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK;
2491         data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT;
2492
2493         mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se /
2494                                          adev->gfx.config.max_sh_per_se);
2495
2496         return (~data) & mask;
2497 }
2498
2499 static void gfx_v9_0_setup_rb(struct amdgpu_device *adev)
2500 {
2501         int i, j;
2502         u32 data;
2503         u32 active_rbs = 0;
2504         u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
2505                                         adev->gfx.config.max_sh_per_se;
2506
2507         mutex_lock(&adev->grbm_idx_mutex);
2508         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
2509                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
2510                         gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
2511                         data = gfx_v9_0_get_rb_active_bitmap(adev);
2512                         active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
2513                                                rb_bitmap_width_per_sh);
2514                 }
2515         }
2516         gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
2517         mutex_unlock(&adev->grbm_idx_mutex);
2518
2519         adev->gfx.config.backend_enable_mask = active_rbs;
2520         adev->gfx.config.num_rbs = hweight32(active_rbs);
2521 }
2522
2523 #define DEFAULT_SH_MEM_BASES    (0x6000)
2524 static void gfx_v9_0_init_compute_vmid(struct amdgpu_device *adev)
2525 {
2526         int i;
2527         uint32_t sh_mem_config;
2528         uint32_t sh_mem_bases;
2529
2530         /*
2531          * Configure apertures:
2532          * LDS:         0x60000000'00000000 - 0x60000001'00000000 (4GB)
2533          * Scratch:     0x60000001'00000000 - 0x60000002'00000000 (4GB)
2534          * GPUVM:       0x60010000'00000000 - 0x60020000'00000000 (1TB)
2535          */
2536         sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
2537
2538         sh_mem_config = SH_MEM_ADDRESS_MODE_64 |
2539                         SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
2540                         SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT;
2541
2542         mutex_lock(&adev->srbm_mutex);
2543         for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
2544                 soc15_grbm_select(adev, 0, 0, 0, i);
2545                 /* CP and shaders */
2546                 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, sh_mem_config);
2547                 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, sh_mem_bases);
2548         }
2549         soc15_grbm_select(adev, 0, 0, 0, 0);
2550         mutex_unlock(&adev->srbm_mutex);
2551
2552         /* Initialize all compute VMIDs to have no GDS, GWS, or OA
2553            acccess. These should be enabled by FW for target VMIDs. */
2554         for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
2555                 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * i, 0);
2556                 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * i, 0);
2557                 WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, i, 0);
2558                 WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, i, 0);
2559         }
2560 }
2561
2562 static void gfx_v9_0_init_gds_vmid(struct amdgpu_device *adev)
2563 {
2564         int vmid;
2565
2566         /*
2567          * Initialize all compute and user-gfx VMIDs to have no GDS, GWS, or OA
2568          * access. Compute VMIDs should be enabled by FW for target VMIDs,
2569          * the driver can enable them for graphics. VMID0 should maintain
2570          * access so that HWS firmware can save/restore entries.
2571          */
2572         for (vmid = 1; vmid < AMDGPU_NUM_VMID; vmid++) {
2573                 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * vmid, 0);
2574                 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * vmid, 0);
2575                 WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, vmid, 0);
2576                 WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, vmid, 0);
2577         }
2578 }
2579
2580 static void gfx_v9_0_init_sq_config(struct amdgpu_device *adev)
2581 {
2582         uint32_t tmp;
2583
2584         switch (adev->ip_versions[GC_HWIP][0]) {
2585         case IP_VERSION(9, 4, 1):
2586                 tmp = RREG32_SOC15(GC, 0, mmSQ_CONFIG);
2587                 tmp = REG_SET_FIELD(tmp, SQ_CONFIG,
2588                                         DISABLE_BARRIER_WAITCNT, 1);
2589                 WREG32_SOC15(GC, 0, mmSQ_CONFIG, tmp);
2590                 break;
2591         default:
2592                 break;
2593         }
2594 }
2595
2596 static void gfx_v9_0_constants_init(struct amdgpu_device *adev)
2597 {
2598         u32 tmp;
2599         int i;
2600
2601         WREG32_FIELD15_RLC(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff);
2602
2603         gfx_v9_0_tiling_mode_table_init(adev);
2604
2605         gfx_v9_0_setup_rb(adev);
2606         gfx_v9_0_get_cu_info(adev, &adev->gfx.cu_info);
2607         adev->gfx.config.db_debug2 = RREG32_SOC15(GC, 0, mmDB_DEBUG2);
2608
2609         /* XXX SH_MEM regs */
2610         /* where to put LDS, scratch, GPUVM in FSA64 space */
2611         mutex_lock(&adev->srbm_mutex);
2612         for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB_0].num_ids; i++) {
2613                 soc15_grbm_select(adev, 0, 0, 0, i);
2614                 /* CP and shaders */
2615                 if (i == 0) {
2616                         tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
2617                                             SH_MEM_ALIGNMENT_MODE_UNALIGNED);
2618                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE,
2619                                             !!adev->gmc.noretry);
2620                         WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp);
2621                         WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, 0);
2622                 } else {
2623                         tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
2624                                             SH_MEM_ALIGNMENT_MODE_UNALIGNED);
2625                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE,
2626                                             !!adev->gmc.noretry);
2627                         WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp);
2628                         tmp = REG_SET_FIELD(0, SH_MEM_BASES, PRIVATE_BASE,
2629                                 (adev->gmc.private_aperture_start >> 48));
2630                         tmp = REG_SET_FIELD(tmp, SH_MEM_BASES, SHARED_BASE,
2631                                 (adev->gmc.shared_aperture_start >> 48));
2632                         WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, tmp);
2633                 }
2634         }
2635         soc15_grbm_select(adev, 0, 0, 0, 0);
2636
2637         mutex_unlock(&adev->srbm_mutex);
2638
2639         gfx_v9_0_init_compute_vmid(adev);
2640         gfx_v9_0_init_gds_vmid(adev);
2641         gfx_v9_0_init_sq_config(adev);
2642 }
2643
2644 static void gfx_v9_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
2645 {
2646         u32 i, j, k;
2647         u32 mask;
2648
2649         mutex_lock(&adev->grbm_idx_mutex);
2650         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
2651                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
2652                         gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
2653                         for (k = 0; k < adev->usec_timeout; k++) {
2654                                 if (RREG32_SOC15(GC, 0, mmRLC_SERDES_CU_MASTER_BUSY) == 0)
2655                                         break;
2656                                 udelay(1);
2657                         }
2658                         if (k == adev->usec_timeout) {
2659                                 gfx_v9_0_select_se_sh(adev, 0xffffffff,
2660                                                       0xffffffff, 0xffffffff);
2661                                 mutex_unlock(&adev->grbm_idx_mutex);
2662                                 DRM_INFO("Timeout wait for RLC serdes %u,%u\n",
2663                                          i, j);
2664                                 return;
2665                         }
2666                 }
2667         }
2668         gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
2669         mutex_unlock(&adev->grbm_idx_mutex);
2670
2671         mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
2672                 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
2673                 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
2674                 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
2675         for (k = 0; k < adev->usec_timeout; k++) {
2676                 if ((RREG32_SOC15(GC, 0, mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
2677                         break;
2678                 udelay(1);
2679         }
2680 }
2681
2682 static void gfx_v9_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
2683                                                bool enable)
2684 {
2685         u32 tmp;
2686
2687         /* These interrupts should be enabled to drive DS clock */
2688
2689         tmp= RREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0);
2690
2691         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
2692         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
2693         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
2694         if(adev->gfx.num_gfx_rings)
2695                 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
2696
2697         WREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0, tmp);
2698 }
2699
2700 static void gfx_v9_0_init_csb(struct amdgpu_device *adev)
2701 {
2702         adev->gfx.rlc.funcs->get_csb_buffer(adev, adev->gfx.rlc.cs_ptr);
2703         /* csib */
2704         WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_HI),
2705                         adev->gfx.rlc.clear_state_gpu_addr >> 32);
2706         WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_LO),
2707                         adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
2708         WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_LENGTH),
2709                         adev->gfx.rlc.clear_state_size);
2710 }
2711
2712 static void gfx_v9_1_parse_ind_reg_list(int *register_list_format,
2713                                 int indirect_offset,
2714                                 int list_size,
2715                                 int *unique_indirect_regs,
2716                                 int unique_indirect_reg_count,
2717                                 int *indirect_start_offsets,
2718                                 int *indirect_start_offsets_count,
2719                                 int max_start_offsets_count)
2720 {
2721         int idx;
2722
2723         for (; indirect_offset < list_size; indirect_offset++) {
2724                 WARN_ON(*indirect_start_offsets_count >= max_start_offsets_count);
2725                 indirect_start_offsets[*indirect_start_offsets_count] = indirect_offset;
2726                 *indirect_start_offsets_count = *indirect_start_offsets_count + 1;
2727
2728                 while (register_list_format[indirect_offset] != 0xFFFFFFFF) {
2729                         indirect_offset += 2;
2730
2731                         /* look for the matching indice */
2732                         for (idx = 0; idx < unique_indirect_reg_count; idx++) {
2733                                 if (unique_indirect_regs[idx] ==
2734                                         register_list_format[indirect_offset] ||
2735                                         !unique_indirect_regs[idx])
2736                                         break;
2737                         }
2738
2739                         BUG_ON(idx >= unique_indirect_reg_count);
2740
2741                         if (!unique_indirect_regs[idx])
2742                                 unique_indirect_regs[idx] = register_list_format[indirect_offset];
2743
2744                         indirect_offset++;
2745                 }
2746         }
2747 }
2748
2749 static int gfx_v9_1_init_rlc_save_restore_list(struct amdgpu_device *adev)
2750 {
2751         int unique_indirect_regs[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
2752         int unique_indirect_reg_count = 0;
2753
2754         int indirect_start_offsets[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
2755         int indirect_start_offsets_count = 0;
2756
2757         int list_size = 0;
2758         int i = 0, j = 0;
2759         u32 tmp = 0;
2760
2761         u32 *register_list_format =
2762                 kmemdup(adev->gfx.rlc.register_list_format,
2763                         adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
2764         if (!register_list_format)
2765                 return -ENOMEM;
2766
2767         /* setup unique_indirect_regs array and indirect_start_offsets array */
2768         unique_indirect_reg_count = ARRAY_SIZE(unique_indirect_regs);
2769         gfx_v9_1_parse_ind_reg_list(register_list_format,
2770                                     adev->gfx.rlc.reg_list_format_direct_reg_list_length,
2771                                     adev->gfx.rlc.reg_list_format_size_bytes >> 2,
2772                                     unique_indirect_regs,
2773                                     unique_indirect_reg_count,
2774                                     indirect_start_offsets,
2775                                     &indirect_start_offsets_count,
2776                                     ARRAY_SIZE(indirect_start_offsets));
2777
2778         /* enable auto inc in case it is disabled */
2779         tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL));
2780         tmp |= RLC_SRM_CNTL__AUTO_INCR_ADDR_MASK;
2781         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL), tmp);
2782
2783         /* write register_restore table to offset 0x0 using RLC_SRM_ARAM_ADDR/DATA */
2784         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_ADDR),
2785                 RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET);
2786         for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
2787                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_DATA),
2788                         adev->gfx.rlc.register_restore[i]);
2789
2790         /* load indirect register */
2791         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2792                 adev->gfx.rlc.reg_list_format_start);
2793
2794         /* direct register portion */
2795         for (i = 0; i < adev->gfx.rlc.reg_list_format_direct_reg_list_length; i++)
2796                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA),
2797                         register_list_format[i]);
2798
2799         /* indirect register portion */
2800         while (i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2)) {
2801                 if (register_list_format[i] == 0xFFFFFFFF) {
2802                         WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2803                         continue;
2804                 }
2805
2806                 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2807                 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2808
2809                 for (j = 0; j < unique_indirect_reg_count; j++) {
2810                         if (register_list_format[i] == unique_indirect_regs[j]) {
2811                                 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, j);
2812                                 break;
2813                         }
2814                 }
2815
2816                 BUG_ON(j >= unique_indirect_reg_count);
2817
2818                 i++;
2819         }
2820
2821         /* set save/restore list size */
2822         list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
2823         list_size = list_size >> 1;
2824         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2825                 adev->gfx.rlc.reg_restore_list_size);
2826         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA), list_size);
2827
2828         /* write the starting offsets to RLC scratch ram */
2829         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2830                 adev->gfx.rlc.starting_offsets_start);
2831         for (i = 0; i < ARRAY_SIZE(indirect_start_offsets); i++)
2832                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA),
2833                        indirect_start_offsets[i]);
2834
2835         /* load unique indirect regs*/
2836         for (i = 0; i < ARRAY_SIZE(unique_indirect_regs); i++) {
2837                 if (unique_indirect_regs[i] != 0) {
2838                         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_ADDR_0)
2839                                + GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[i],
2840                                unique_indirect_regs[i] & 0x3FFFF);
2841
2842                         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_DATA_0)
2843                                + GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[i],
2844                                unique_indirect_regs[i] >> 20);
2845                 }
2846         }
2847
2848         kfree(register_list_format);
2849         return 0;
2850 }
2851
2852 static void gfx_v9_0_enable_save_restore_machine(struct amdgpu_device *adev)
2853 {
2854         WREG32_FIELD15(GC, 0, RLC_SRM_CNTL, SRM_ENABLE, 1);
2855 }
2856
2857 static void pwr_10_0_gfxip_control_over_cgpg(struct amdgpu_device *adev,
2858                                              bool enable)
2859 {
2860         uint32_t data = 0;
2861         uint32_t default_data = 0;
2862
2863         default_data = data = RREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS));
2864         if (enable) {
2865                 /* enable GFXIP control over CGPG */
2866                 data |= PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK;
2867                 if(default_data != data)
2868                         WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2869
2870                 /* update status */
2871                 data &= ~PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK;
2872                 data |= (2 << PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT);
2873                 if(default_data != data)
2874                         WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2875         } else {
2876                 /* restore GFXIP control over GCPG */
2877                 data &= ~PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK;
2878                 if(default_data != data)
2879                         WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2880         }
2881 }
2882
2883 static void gfx_v9_0_init_gfx_power_gating(struct amdgpu_device *adev)
2884 {
2885         uint32_t data = 0;
2886
2887         if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
2888                               AMD_PG_SUPPORT_GFX_SMG |
2889                               AMD_PG_SUPPORT_GFX_DMG)) {
2890                 /* init IDLE_POLL_COUNT = 60 */
2891                 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL));
2892                 data &= ~CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT_MASK;
2893                 data |= (0x60 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
2894                 WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL), data);
2895
2896                 /* init RLC PG Delay */
2897                 data = 0;
2898                 data |= (0x10 << RLC_PG_DELAY__POWER_UP_DELAY__SHIFT);
2899                 data |= (0x10 << RLC_PG_DELAY__POWER_DOWN_DELAY__SHIFT);
2900                 data |= (0x10 << RLC_PG_DELAY__CMD_PROPAGATE_DELAY__SHIFT);
2901                 data |= (0x40 << RLC_PG_DELAY__MEM_SLEEP_DELAY__SHIFT);
2902                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY), data);
2903
2904                 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2));
2905                 data &= ~RLC_PG_DELAY_2__SERDES_CMD_DELAY_MASK;
2906                 data |= (0x4 << RLC_PG_DELAY_2__SERDES_CMD_DELAY__SHIFT);
2907                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2), data);
2908
2909                 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3));
2910                 data &= ~RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG_MASK;
2911                 data |= (0xff << RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG__SHIFT);
2912                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3), data);
2913
2914                 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL));
2915                 data &= ~RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD_MASK;
2916
2917                 /* program GRBM_REG_SAVE_GFX_IDLE_THRESHOLD to 0x55f0 */
2918                 data |= (0x55f0 << RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD__SHIFT);
2919                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL), data);
2920                 if (adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 3, 0))
2921                         pwr_10_0_gfxip_control_over_cgpg(adev, true);
2922         }
2923 }
2924
2925 static void gfx_v9_0_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
2926                                                 bool enable)
2927 {
2928         uint32_t data = 0;
2929         uint32_t default_data = 0;
2930
2931         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2932         data = REG_SET_FIELD(data, RLC_PG_CNTL,
2933                              SMU_CLK_SLOWDOWN_ON_PU_ENABLE,
2934                              enable ? 1 : 0);
2935         if (default_data != data)
2936                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2937 }
2938
2939 static void gfx_v9_0_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
2940                                                 bool enable)
2941 {
2942         uint32_t data = 0;
2943         uint32_t default_data = 0;
2944
2945         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2946         data = REG_SET_FIELD(data, RLC_PG_CNTL,
2947                              SMU_CLK_SLOWDOWN_ON_PD_ENABLE,
2948                              enable ? 1 : 0);
2949         if(default_data != data)
2950                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2951 }
2952
2953 static void gfx_v9_0_enable_cp_power_gating(struct amdgpu_device *adev,
2954                                         bool enable)
2955 {
2956         uint32_t data = 0;
2957         uint32_t default_data = 0;
2958
2959         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2960         data = REG_SET_FIELD(data, RLC_PG_CNTL,
2961                              CP_PG_DISABLE,
2962                              enable ? 0 : 1);
2963         if(default_data != data)
2964                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2965 }
2966
2967 static void gfx_v9_0_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
2968                                                 bool enable)
2969 {
2970         uint32_t data, default_data;
2971
2972         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2973         data = REG_SET_FIELD(data, RLC_PG_CNTL,
2974                              GFX_POWER_GATING_ENABLE,
2975                              enable ? 1 : 0);
2976         if(default_data != data)
2977                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2978 }
2979
2980 static void gfx_v9_0_enable_gfx_pipeline_powergating(struct amdgpu_device *adev,
2981                                                 bool enable)
2982 {
2983         uint32_t data, default_data;
2984
2985         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2986         data = REG_SET_FIELD(data, RLC_PG_CNTL,
2987                              GFX_PIPELINE_PG_ENABLE,
2988                              enable ? 1 : 0);
2989         if(default_data != data)
2990                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2991
2992         if (!enable)
2993                 /* read any GFX register to wake up GFX */
2994                 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmDB_RENDER_CONTROL));
2995 }
2996
2997 static void gfx_v9_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
2998                                                        bool enable)
2999 {
3000         uint32_t data, default_data;
3001
3002         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
3003         data = REG_SET_FIELD(data, RLC_PG_CNTL,
3004                              STATIC_PER_CU_PG_ENABLE,
3005                              enable ? 1 : 0);
3006         if(default_data != data)
3007                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
3008 }
3009
3010 static void gfx_v9_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
3011                                                 bool enable)
3012 {
3013         uint32_t data, default_data;
3014
3015         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
3016         data = REG_SET_FIELD(data, RLC_PG_CNTL,
3017                              DYN_PER_CU_PG_ENABLE,
3018                              enable ? 1 : 0);
3019         if(default_data != data)
3020                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
3021 }
3022
3023 static void gfx_v9_0_init_pg(struct amdgpu_device *adev)
3024 {
3025         gfx_v9_0_init_csb(adev);
3026
3027         /*
3028          * Rlc save restore list is workable since v2_1.
3029          * And it's needed by gfxoff feature.
3030          */
3031         if (adev->gfx.rlc.is_rlc_v2_1) {
3032                 if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 2, 1) ||
3033                     (adev->apu_flags & AMD_APU_IS_RAVEN2))
3034                         gfx_v9_1_init_rlc_save_restore_list(adev);
3035                 gfx_v9_0_enable_save_restore_machine(adev);
3036         }
3037
3038         if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
3039                               AMD_PG_SUPPORT_GFX_SMG |
3040                               AMD_PG_SUPPORT_GFX_DMG |
3041                               AMD_PG_SUPPORT_CP |
3042                               AMD_PG_SUPPORT_GDS |
3043                               AMD_PG_SUPPORT_RLC_SMU_HS)) {
3044                 WREG32_SOC15(GC, 0, mmRLC_JUMP_TABLE_RESTORE,
3045                              adev->gfx.rlc.cp_table_gpu_addr >> 8);
3046                 gfx_v9_0_init_gfx_power_gating(adev);
3047         }
3048 }
3049
3050 static void gfx_v9_0_rlc_stop(struct amdgpu_device *adev)
3051 {
3052         WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 0);
3053         gfx_v9_0_enable_gui_idle_interrupt(adev, false);
3054         gfx_v9_0_wait_for_rlc_serdes(adev);
3055 }
3056
3057 static void gfx_v9_0_rlc_reset(struct amdgpu_device *adev)
3058 {
3059         WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
3060         udelay(50);
3061         WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
3062         udelay(50);
3063 }
3064
3065 static void gfx_v9_0_rlc_start(struct amdgpu_device *adev)
3066 {
3067 #ifdef AMDGPU_RLC_DEBUG_RETRY
3068         u32 rlc_ucode_ver;
3069 #endif
3070
3071         WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 1);
3072         udelay(50);
3073
3074         /* carrizo do enable cp interrupt after cp inited */
3075         if (!(adev->flags & AMD_IS_APU)) {
3076                 gfx_v9_0_enable_gui_idle_interrupt(adev, true);
3077                 udelay(50);
3078         }
3079
3080 #ifdef AMDGPU_RLC_DEBUG_RETRY
3081         /* RLC_GPM_GENERAL_6 : RLC Ucode version */
3082         rlc_ucode_ver = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_6);
3083         if(rlc_ucode_ver == 0x108) {
3084                 DRM_INFO("Using rlc debug ucode. mmRLC_GPM_GENERAL_6 ==0x08%x / fw_ver == %i \n",
3085                                 rlc_ucode_ver, adev->gfx.rlc_fw_version);
3086                 /* RLC_GPM_TIMER_INT_3 : Timer interval in RefCLK cycles,
3087                  * default is 0x9C4 to create a 100us interval */
3088                 WREG32_SOC15(GC, 0, mmRLC_GPM_TIMER_INT_3, 0x9C4);
3089                 /* RLC_GPM_GENERAL_12 : Minimum gap between wptr and rptr
3090                  * to disable the page fault retry interrupts, default is
3091                  * 0x100 (256) */
3092                 WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_12, 0x100);
3093         }
3094 #endif
3095 }
3096
3097 static int gfx_v9_0_rlc_load_microcode(struct amdgpu_device *adev)
3098 {
3099         const struct rlc_firmware_header_v2_0 *hdr;
3100         const __le32 *fw_data;
3101         unsigned i, fw_size;
3102
3103         if (!adev->gfx.rlc_fw)
3104                 return -EINVAL;
3105
3106         hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
3107         amdgpu_ucode_print_rlc_hdr(&hdr->header);
3108
3109         fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
3110                            le32_to_cpu(hdr->header.ucode_array_offset_bytes));
3111         fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
3112
3113         WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR,
3114                         RLCG_UCODE_LOADING_START_ADDRESS);
3115         for (i = 0; i < fw_size; i++)
3116                 WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
3117         WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
3118
3119         return 0;
3120 }
3121
3122 static int gfx_v9_0_rlc_resume(struct amdgpu_device *adev)
3123 {
3124         int r;
3125
3126         if (amdgpu_sriov_vf(adev)) {
3127                 gfx_v9_0_init_csb(adev);
3128                 return 0;
3129         }
3130
3131         adev->gfx.rlc.funcs->stop(adev);
3132
3133         /* disable CG */
3134         WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, 0);
3135
3136         gfx_v9_0_init_pg(adev);
3137
3138         if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
3139                 /* legacy rlc firmware loading */
3140                 r = gfx_v9_0_rlc_load_microcode(adev);
3141                 if (r)
3142                         return r;
3143         }
3144
3145         switch (adev->ip_versions[GC_HWIP][0]) {
3146         case IP_VERSION(9, 2, 2):
3147         case IP_VERSION(9, 1, 0):
3148                 if (amdgpu_lbpw == 0)
3149                         gfx_v9_0_enable_lbpw(adev, false);
3150                 else
3151                         gfx_v9_0_enable_lbpw(adev, true);
3152                 break;
3153         case IP_VERSION(9, 4, 0):
3154                 if (amdgpu_lbpw > 0)
3155                         gfx_v9_0_enable_lbpw(adev, true);
3156                 else
3157                         gfx_v9_0_enable_lbpw(adev, false);
3158                 break;
3159         default:
3160                 break;
3161         }
3162
3163         adev->gfx.rlc.funcs->start(adev);
3164
3165         return 0;
3166 }
3167
3168 static void gfx_v9_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
3169 {
3170         u32 tmp = RREG32_SOC15(GC, 0, mmCP_ME_CNTL);
3171
3172         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, enable ? 0 : 1);
3173         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, enable ? 0 : 1);
3174         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, enable ? 0 : 1);
3175         WREG32_SOC15_RLC(GC, 0, mmCP_ME_CNTL, tmp);
3176         udelay(50);
3177 }
3178
3179 static int gfx_v9_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
3180 {
3181         const struct gfx_firmware_header_v1_0 *pfp_hdr;
3182         const struct gfx_firmware_header_v1_0 *ce_hdr;
3183         const struct gfx_firmware_header_v1_0 *me_hdr;
3184         const __le32 *fw_data;
3185         unsigned i, fw_size;
3186
3187         if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
3188                 return -EINVAL;
3189
3190         pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
3191                 adev->gfx.pfp_fw->data;
3192         ce_hdr = (const struct gfx_firmware_header_v1_0 *)
3193                 adev->gfx.ce_fw->data;
3194         me_hdr = (const struct gfx_firmware_header_v1_0 *)
3195                 adev->gfx.me_fw->data;
3196
3197         amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
3198         amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
3199         amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
3200
3201         gfx_v9_0_cp_gfx_enable(adev, false);
3202
3203         /* PFP */
3204         fw_data = (const __le32 *)
3205                 (adev->gfx.pfp_fw->data +
3206                  le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
3207         fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
3208         WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, 0);
3209         for (i = 0; i < fw_size; i++)
3210                 WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
3211         WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
3212
3213         /* CE */
3214         fw_data = (const __le32 *)
3215                 (adev->gfx.ce_fw->data +
3216                  le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
3217         fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
3218         WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, 0);
3219         for (i = 0; i < fw_size; i++)
3220                 WREG32_SOC15(GC, 0, mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
3221         WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
3222
3223         /* ME */
3224         fw_data = (const __le32 *)
3225                 (adev->gfx.me_fw->data +
3226                  le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
3227         fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
3228         WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, 0);
3229         for (i = 0; i < fw_size; i++)
3230                 WREG32_SOC15(GC, 0, mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
3231         WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version);
3232
3233         return 0;
3234 }
3235
3236 static int gfx_v9_0_cp_gfx_start(struct amdgpu_device *adev)
3237 {
3238         struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
3239         const struct cs_section_def *sect = NULL;
3240         const struct cs_extent_def *ext = NULL;
3241         int r, i, tmp;
3242
3243         /* init the CP */
3244         WREG32_SOC15(GC, 0, mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
3245         WREG32_SOC15(GC, 0, mmCP_DEVICE_ID, 1);
3246
3247         gfx_v9_0_cp_gfx_enable(adev, true);
3248
3249         r = amdgpu_ring_alloc(ring, gfx_v9_0_get_csb_size(adev) + 4 + 3);
3250         if (r) {
3251                 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
3252                 return r;
3253         }
3254
3255         amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3256         amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3257
3258         amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
3259         amdgpu_ring_write(ring, 0x80000000);
3260         amdgpu_ring_write(ring, 0x80000000);
3261
3262         for (sect = gfx9_cs_data; sect->section != NULL; ++sect) {
3263                 for (ext = sect->section; ext->extent != NULL; ++ext) {
3264                         if (sect->id == SECT_CONTEXT) {
3265                                 amdgpu_ring_write(ring,
3266                                        PACKET3(PACKET3_SET_CONTEXT_REG,
3267                                                ext->reg_count));
3268                                 amdgpu_ring_write(ring,
3269                                        ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
3270                                 for (i = 0; i < ext->reg_count; i++)
3271                                         amdgpu_ring_write(ring, ext->extent[i]);
3272                         }
3273                 }
3274         }
3275
3276         amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3277         amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
3278
3279         amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3280         amdgpu_ring_write(ring, 0);
3281
3282         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
3283         amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
3284         amdgpu_ring_write(ring, 0x8000);
3285         amdgpu_ring_write(ring, 0x8000);
3286
3287         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG,1));
3288         tmp = (PACKET3_SET_UCONFIG_REG_INDEX_TYPE |
3289                 (SOC15_REG_OFFSET(GC, 0, mmVGT_INDEX_TYPE) - PACKET3_SET_UCONFIG_REG_START));
3290         amdgpu_ring_write(ring, tmp);
3291         amdgpu_ring_write(ring, 0);
3292
3293         amdgpu_ring_commit(ring);
3294
3295         return 0;
3296 }
3297
3298 static int gfx_v9_0_cp_gfx_resume(struct amdgpu_device *adev)
3299 {
3300         struct amdgpu_ring *ring;
3301         u32 tmp;
3302         u32 rb_bufsz;
3303         u64 rb_addr, rptr_addr, wptr_gpu_addr;
3304
3305         /* Set the write pointer delay */
3306         WREG32_SOC15(GC, 0, mmCP_RB_WPTR_DELAY, 0);
3307
3308         /* set the RB to use vmid 0 */
3309         WREG32_SOC15(GC, 0, mmCP_RB_VMID, 0);
3310
3311         /* Set ring buffer size */
3312         ring = &adev->gfx.gfx_ring[0];
3313         rb_bufsz = order_base_2(ring->ring_size / 8);
3314         tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
3315         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
3316 #ifdef __BIG_ENDIAN
3317         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
3318 #endif
3319         WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp);
3320
3321         /* Initialize the ring buffer's write pointers */
3322         ring->wptr = 0;
3323         WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
3324         WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
3325
3326         /* set the wb address wether it's enabled or not */
3327         rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
3328         WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
3329         WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & CP_RB_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK);
3330
3331         wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
3332         WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr));
3333         WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr));
3334
3335         mdelay(1);
3336         WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp);
3337
3338         rb_addr = ring->gpu_addr >> 8;
3339         WREG32_SOC15(GC, 0, mmCP_RB0_BASE, rb_addr);
3340         WREG32_SOC15(GC, 0, mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
3341
3342         tmp = RREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL);
3343         if (ring->use_doorbell) {
3344                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3345                                     DOORBELL_OFFSET, ring->doorbell_index);
3346                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3347                                     DOORBELL_EN, 1);
3348         } else {
3349                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0);
3350         }
3351         WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL, tmp);
3352
3353         tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
3354                         DOORBELL_RANGE_LOWER, ring->doorbell_index);
3355         WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
3356
3357         WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_UPPER,
3358                        CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
3359
3360
3361         /* start the ring */
3362         gfx_v9_0_cp_gfx_start(adev);
3363         ring->sched.ready = true;
3364
3365         return 0;
3366 }
3367
3368 static void gfx_v9_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
3369 {
3370         if (enable) {
3371                 WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL, 0);
3372         } else {
3373                 WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL,
3374                         (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
3375                 adev->gfx.kiq.ring.sched.ready = false;
3376         }
3377         udelay(50);
3378 }
3379
3380 static int gfx_v9_0_cp_compute_load_microcode(struct amdgpu_device *adev)
3381 {
3382         const struct gfx_firmware_header_v1_0 *mec_hdr;
3383         const __le32 *fw_data;
3384         unsigned i;
3385         u32 tmp;
3386
3387         if (!adev->gfx.mec_fw)
3388                 return -EINVAL;
3389
3390         gfx_v9_0_cp_compute_enable(adev, false);
3391
3392         mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
3393         amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
3394
3395         fw_data = (const __le32 *)
3396                 (adev->gfx.mec_fw->data +
3397                  le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
3398         tmp = 0;
3399         tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, VMID, 0);
3400         tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0);
3401         WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_CNTL, tmp);
3402
3403         WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_LO,
3404                 adev->gfx.mec.mec_fw_gpu_addr & 0xFFFFF000);
3405         WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_HI,
3406                 upper_32_bits(adev->gfx.mec.mec_fw_gpu_addr));
3407
3408         /* MEC1 */
3409         WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR,
3410                          mec_hdr->jt_offset);
3411         for (i = 0; i < mec_hdr->jt_size; i++)
3412                 WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_DATA,
3413                         le32_to_cpup(fw_data + mec_hdr->jt_offset + i));
3414
3415         WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR,
3416                         adev->gfx.mec_fw_version);
3417         /* Todo : Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */
3418
3419         return 0;
3420 }
3421
3422 /* KIQ functions */
3423 static void gfx_v9_0_kiq_setting(struct amdgpu_ring *ring)
3424 {
3425         uint32_t tmp;
3426         struct amdgpu_device *adev = ring->adev;
3427
3428         /* tell RLC which is KIQ queue */
3429         tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS);
3430         tmp &= 0xffffff00;
3431         tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
3432         WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
3433         tmp |= 0x80;
3434         WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
3435 }
3436
3437 static void gfx_v9_0_mqd_set_priority(struct amdgpu_ring *ring, struct v9_mqd *mqd)
3438 {
3439         struct amdgpu_device *adev = ring->adev;
3440
3441         if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
3442                 if (amdgpu_gfx_is_high_priority_compute_queue(adev, ring)) {
3443                         mqd->cp_hqd_pipe_priority = AMDGPU_GFX_PIPE_PRIO_HIGH;
3444                         mqd->cp_hqd_queue_priority =
3445                                 AMDGPU_GFX_QUEUE_PRIORITY_MAXIMUM;
3446                 }
3447         }
3448 }
3449
3450 static int gfx_v9_0_mqd_init(struct amdgpu_ring *ring)
3451 {
3452         struct amdgpu_device *adev = ring->adev;
3453         struct v9_mqd *mqd = ring->mqd_ptr;
3454         uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
3455         uint32_t tmp;
3456
3457         mqd->header = 0xC0310800;
3458         mqd->compute_pipelinestat_enable = 0x00000001;
3459         mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
3460         mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
3461         mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
3462         mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
3463         mqd->compute_static_thread_mgmt_se4 = 0xffffffff;
3464         mqd->compute_static_thread_mgmt_se5 = 0xffffffff;
3465         mqd->compute_static_thread_mgmt_se6 = 0xffffffff;
3466         mqd->compute_static_thread_mgmt_se7 = 0xffffffff;
3467         mqd->compute_misc_reserved = 0x00000003;
3468
3469         mqd->dynamic_cu_mask_addr_lo =
3470                 lower_32_bits(ring->mqd_gpu_addr
3471                               + offsetof(struct v9_mqd_allocation, dynamic_cu_mask));
3472         mqd->dynamic_cu_mask_addr_hi =
3473                 upper_32_bits(ring->mqd_gpu_addr
3474                               + offsetof(struct v9_mqd_allocation, dynamic_cu_mask));
3475
3476         eop_base_addr = ring->eop_gpu_addr >> 8;
3477         mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
3478         mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
3479
3480         /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
3481         tmp = RREG32_SOC15(GC, 0, mmCP_HQD_EOP_CONTROL);
3482         tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
3483                         (order_base_2(GFX9_MEC_HPD_SIZE / 4) - 1));
3484
3485         mqd->cp_hqd_eop_control = tmp;
3486
3487         /* enable doorbell? */
3488         tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
3489
3490         if (ring->use_doorbell) {
3491                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3492                                     DOORBELL_OFFSET, ring->doorbell_index);
3493                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3494                                     DOORBELL_EN, 1);
3495                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3496                                     DOORBELL_SOURCE, 0);
3497                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3498                                     DOORBELL_HIT, 0);
3499         } else {
3500                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3501                                          DOORBELL_EN, 0);
3502         }
3503
3504         mqd->cp_hqd_pq_doorbell_control = tmp;
3505
3506         /* disable the queue if it's active */
3507         ring->wptr = 0;
3508         mqd->cp_hqd_dequeue_request = 0;
3509         mqd->cp_hqd_pq_rptr = 0;
3510         mqd->cp_hqd_pq_wptr_lo = 0;
3511         mqd->cp_hqd_pq_wptr_hi = 0;
3512
3513         /* set the pointer to the MQD */
3514         mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
3515         mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
3516
3517         /* set MQD vmid to 0 */
3518         tmp = RREG32_SOC15(GC, 0, mmCP_MQD_CONTROL);
3519         tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
3520         mqd->cp_mqd_control = tmp;
3521
3522         /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3523         hqd_gpu_addr = ring->gpu_addr >> 8;
3524         mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
3525         mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
3526
3527         /* set up the HQD, this is similar to CP_RB0_CNTL */
3528         tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_CONTROL);
3529         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
3530                             (order_base_2(ring->ring_size / 4) - 1));
3531         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
3532                         ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
3533 #ifdef __BIG_ENDIAN
3534         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
3535 #endif
3536         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
3537         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
3538         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
3539         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
3540         mqd->cp_hqd_pq_control = tmp;
3541
3542         /* set the wb address whether it's enabled or not */
3543         wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
3544         mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
3545         mqd->cp_hqd_pq_rptr_report_addr_hi =
3546                 upper_32_bits(wb_gpu_addr) & 0xffff;
3547
3548         /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
3549         wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
3550         mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
3551         mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
3552
3553         tmp = 0;
3554         /* enable the doorbell if requested */
3555         if (ring->use_doorbell) {
3556                 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
3557                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3558                                 DOORBELL_OFFSET, ring->doorbell_index);
3559
3560                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3561                                          DOORBELL_EN, 1);
3562                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3563                                          DOORBELL_SOURCE, 0);
3564                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3565                                          DOORBELL_HIT, 0);
3566         }
3567
3568         mqd->cp_hqd_pq_doorbell_control = tmp;
3569
3570         /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3571         ring->wptr = 0;
3572         mqd->cp_hqd_pq_rptr = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR);
3573
3574         /* set the vmid for the queue */
3575         mqd->cp_hqd_vmid = 0;
3576
3577         tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PERSISTENT_STATE);
3578         tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
3579         mqd->cp_hqd_persistent_state = tmp;
3580
3581         /* set MIN_IB_AVAIL_SIZE */
3582         tmp = RREG32_SOC15(GC, 0, mmCP_HQD_IB_CONTROL);
3583         tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
3584         mqd->cp_hqd_ib_control = tmp;
3585
3586         /* set static priority for a queue/ring */
3587         gfx_v9_0_mqd_set_priority(ring, mqd);
3588         mqd->cp_hqd_quantum = RREG32_SOC15(GC, 0, mmCP_HQD_QUANTUM);
3589
3590         /* map_queues packet doesn't need activate the queue,
3591          * so only kiq need set this field.
3592          */
3593         if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)
3594                 mqd->cp_hqd_active = 1;
3595
3596         return 0;
3597 }
3598
3599 static int gfx_v9_0_kiq_init_register(struct amdgpu_ring *ring)
3600 {
3601         struct amdgpu_device *adev = ring->adev;
3602         struct v9_mqd *mqd = ring->mqd_ptr;
3603         int j;
3604
3605         /* disable wptr polling */
3606         WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
3607
3608         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR,
3609                mqd->cp_hqd_eop_base_addr_lo);
3610         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI,
3611                mqd->cp_hqd_eop_base_addr_hi);
3612
3613         /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
3614         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_CONTROL,
3615                mqd->cp_hqd_eop_control);
3616
3617         /* enable doorbell? */
3618         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
3619                mqd->cp_hqd_pq_doorbell_control);
3620
3621         /* disable the queue if it's active */
3622         if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) {
3623                 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
3624                 for (j = 0; j < adev->usec_timeout; j++) {
3625                         if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
3626                                 break;
3627                         udelay(1);
3628                 }
3629                 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST,
3630                        mqd->cp_hqd_dequeue_request);
3631                 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR,
3632                        mqd->cp_hqd_pq_rptr);
3633                 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO,
3634                        mqd->cp_hqd_pq_wptr_lo);
3635                 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI,
3636                        mqd->cp_hqd_pq_wptr_hi);
3637         }
3638
3639         /* set the pointer to the MQD */
3640         WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR,
3641                mqd->cp_mqd_base_addr_lo);
3642         WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR_HI,
3643                mqd->cp_mqd_base_addr_hi);
3644
3645         /* set MQD vmid to 0 */
3646         WREG32_SOC15_RLC(GC, 0, mmCP_MQD_CONTROL,
3647                mqd->cp_mqd_control);
3648
3649         /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3650         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE,
3651                mqd->cp_hqd_pq_base_lo);
3652         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE_HI,
3653                mqd->cp_hqd_pq_base_hi);
3654
3655         /* set up the HQD, this is similar to CP_RB0_CNTL */
3656         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_CONTROL,
3657                mqd->cp_hqd_pq_control);
3658
3659         /* set the wb address whether it's enabled or not */
3660         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR,
3661                                 mqd->cp_hqd_pq_rptr_report_addr_lo);
3662         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
3663                                 mqd->cp_hqd_pq_rptr_report_addr_hi);
3664
3665         /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
3666         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR,
3667                mqd->cp_hqd_pq_wptr_poll_addr_lo);
3668         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
3669                mqd->cp_hqd_pq_wptr_poll_addr_hi);
3670
3671         /* enable the doorbell if requested */
3672         if (ring->use_doorbell) {
3673                 WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_LOWER,
3674                                         (adev->doorbell_index.kiq * 2) << 2);
3675                 /* If GC has entered CGPG, ringing doorbell > first page
3676                  * doesn't wakeup GC. Enlarge CP_MEC_DOORBELL_RANGE_UPPER to
3677                  * workaround this issue. And this change has to align with firmware
3678                  * update.
3679                  */
3680                 if (check_if_enlarge_doorbell_range(adev))
3681                         WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER,
3682                                         (adev->doorbell.size - 4));
3683                 else
3684                         WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER,
3685                                         (adev->doorbell_index.userqueue_end * 2) << 2);
3686         }
3687
3688         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
3689                mqd->cp_hqd_pq_doorbell_control);
3690
3691         /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3692         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO,
3693                mqd->cp_hqd_pq_wptr_lo);
3694         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI,
3695                mqd->cp_hqd_pq_wptr_hi);
3696
3697         /* set the vmid for the queue */
3698         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_VMID, mqd->cp_hqd_vmid);
3699
3700         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE,
3701                mqd->cp_hqd_persistent_state);
3702
3703         /* activate the queue */
3704         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE,
3705                mqd->cp_hqd_active);
3706
3707         if (ring->use_doorbell)
3708                 WREG32_FIELD15(GC, 0, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
3709
3710         return 0;
3711 }
3712
3713 static int gfx_v9_0_kiq_fini_register(struct amdgpu_ring *ring)
3714 {
3715         struct amdgpu_device *adev = ring->adev;
3716         int j;
3717
3718         /* disable the queue if it's active */
3719         if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) {
3720
3721                 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
3722
3723                 for (j = 0; j < adev->usec_timeout; j++) {
3724                         if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
3725                                 break;
3726                         udelay(1);
3727                 }
3728
3729                 if (j == AMDGPU_MAX_USEC_TIMEOUT) {
3730                         DRM_DEBUG("KIQ dequeue request failed.\n");
3731
3732                         /* Manual disable if dequeue request times out */
3733                         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE, 0);
3734                 }
3735
3736                 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST,
3737                       0);
3738         }
3739
3740         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IQ_TIMER, 0);
3741         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IB_CONTROL, 0);
3742         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE, 0);
3743         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0x40000000);
3744         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0);
3745         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR, 0);
3746         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI, 0);
3747         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO, 0);
3748
3749         return 0;
3750 }
3751
3752 static int gfx_v9_0_kiq_init_queue(struct amdgpu_ring *ring)
3753 {
3754         struct amdgpu_device *adev = ring->adev;
3755         struct v9_mqd *mqd = ring->mqd_ptr;
3756         int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS;
3757         struct v9_mqd *tmp_mqd;
3758
3759         gfx_v9_0_kiq_setting(ring);
3760
3761         /* GPU could be in bad state during probe, driver trigger the reset
3762          * after load the SMU, in this case , the mqd is not be initialized.
3763          * driver need to re-init the mqd.
3764          * check mqd->cp_hqd_pq_control since this value should not be 0
3765          */
3766         tmp_mqd = (struct v9_mqd *)adev->gfx.mec.mqd_backup[mqd_idx];
3767         if (amdgpu_in_reset(adev) && tmp_mqd->cp_hqd_pq_control){
3768                 /* for GPU_RESET case , reset MQD to a clean status */
3769                 if (adev->gfx.mec.mqd_backup[mqd_idx])
3770                         memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation));
3771
3772                 /* reset ring buffer */
3773                 ring->wptr = 0;
3774                 amdgpu_ring_clear_ring(ring);
3775
3776                 mutex_lock(&adev->srbm_mutex);
3777                 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3778                 gfx_v9_0_kiq_init_register(ring);
3779                 soc15_grbm_select(adev, 0, 0, 0, 0);
3780                 mutex_unlock(&adev->srbm_mutex);
3781         } else {
3782                 memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation));
3783                 ((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
3784                 ((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
3785                 mutex_lock(&adev->srbm_mutex);
3786                 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3787                 gfx_v9_0_mqd_init(ring);
3788                 gfx_v9_0_kiq_init_register(ring);
3789                 soc15_grbm_select(adev, 0, 0, 0, 0);
3790                 mutex_unlock(&adev->srbm_mutex);
3791
3792                 if (adev->gfx.mec.mqd_backup[mqd_idx])
3793                         memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation));
3794         }
3795
3796         return 0;
3797 }
3798
3799 static int gfx_v9_0_kcq_init_queue(struct amdgpu_ring *ring)
3800 {
3801         struct amdgpu_device *adev = ring->adev;
3802         struct v9_mqd *mqd = ring->mqd_ptr;
3803         int mqd_idx = ring - &adev->gfx.compute_ring[0];
3804         struct v9_mqd *tmp_mqd;
3805
3806         /* Same as above kiq init, driver need to re-init the mqd if mqd->cp_hqd_pq_control
3807          * is not be initialized before
3808          */
3809         tmp_mqd = (struct v9_mqd *)adev->gfx.mec.mqd_backup[mqd_idx];
3810
3811         if (!tmp_mqd->cp_hqd_pq_control ||
3812             (!amdgpu_in_reset(adev) && !adev->in_suspend)) {
3813                 memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation));
3814                 ((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
3815                 ((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
3816                 mutex_lock(&adev->srbm_mutex);
3817                 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3818                 gfx_v9_0_mqd_init(ring);
3819                 soc15_grbm_select(adev, 0, 0, 0, 0);
3820                 mutex_unlock(&adev->srbm_mutex);
3821
3822                 if (adev->gfx.mec.mqd_backup[mqd_idx])
3823                         memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation));
3824         } else if (amdgpu_in_reset(adev)) { /* for GPU_RESET case */
3825                 /* reset MQD to a clean status */
3826                 if (adev->gfx.mec.mqd_backup[mqd_idx])
3827                         memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation));
3828
3829                 /* reset ring buffer */
3830                 ring->wptr = 0;
3831                 atomic64_set((atomic64_t *)&adev->wb.wb[ring->wptr_offs], 0);
3832                 amdgpu_ring_clear_ring(ring);
3833         } else {
3834                 amdgpu_ring_clear_ring(ring);
3835         }
3836
3837         return 0;
3838 }
3839
3840 static int gfx_v9_0_kiq_resume(struct amdgpu_device *adev)
3841 {
3842         struct amdgpu_ring *ring;
3843         int r;
3844
3845         ring = &adev->gfx.kiq.ring;
3846
3847         r = amdgpu_bo_reserve(ring->mqd_obj, false);
3848         if (unlikely(r != 0))
3849                 return r;
3850
3851         r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
3852         if (unlikely(r != 0))
3853                 return r;
3854
3855         gfx_v9_0_kiq_init_queue(ring);
3856         amdgpu_bo_kunmap(ring->mqd_obj);
3857         ring->mqd_ptr = NULL;
3858         amdgpu_bo_unreserve(ring->mqd_obj);
3859         ring->sched.ready = true;
3860         return 0;
3861 }
3862
3863 static int gfx_v9_0_kcq_resume(struct amdgpu_device *adev)
3864 {
3865         struct amdgpu_ring *ring = NULL;
3866         int r = 0, i;
3867
3868         gfx_v9_0_cp_compute_enable(adev, true);
3869
3870         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3871                 ring = &adev->gfx.compute_ring[i];
3872
3873                 r = amdgpu_bo_reserve(ring->mqd_obj, false);
3874                 if (unlikely(r != 0))
3875                         goto done;
3876                 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
3877                 if (!r) {
3878                         r = gfx_v9_0_kcq_init_queue(ring);
3879                         amdgpu_bo_kunmap(ring->mqd_obj);
3880                         ring->mqd_ptr = NULL;
3881                 }
3882                 amdgpu_bo_unreserve(ring->mqd_obj);
3883                 if (r)
3884                         goto done;
3885         }
3886
3887         r = amdgpu_gfx_enable_kcq(adev);
3888 done:
3889         return r;
3890 }
3891
3892 static int gfx_v9_0_cp_resume(struct amdgpu_device *adev)
3893 {
3894         int r, i;
3895         struct amdgpu_ring *ring;
3896
3897         if (!(adev->flags & AMD_IS_APU))
3898                 gfx_v9_0_enable_gui_idle_interrupt(adev, false);
3899
3900         if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
3901                 if (adev->gfx.num_gfx_rings) {
3902                         /* legacy firmware loading */
3903                         r = gfx_v9_0_cp_gfx_load_microcode(adev);
3904                         if (r)
3905                                 return r;
3906                 }
3907
3908                 r = gfx_v9_0_cp_compute_load_microcode(adev);
3909                 if (r)
3910                         return r;
3911         }
3912
3913         r = gfx_v9_0_kiq_resume(adev);
3914         if (r)
3915                 return r;
3916
3917         if (adev->gfx.num_gfx_rings) {
3918                 r = gfx_v9_0_cp_gfx_resume(adev);
3919                 if (r)
3920                         return r;
3921         }
3922
3923         r = gfx_v9_0_kcq_resume(adev);
3924         if (r)
3925                 return r;
3926
3927         if (adev->gfx.num_gfx_rings) {
3928                 ring = &adev->gfx.gfx_ring[0];
3929                 r = amdgpu_ring_test_helper(ring);
3930                 if (r)
3931                         return r;
3932         }
3933
3934         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3935                 ring = &adev->gfx.compute_ring[i];
3936                 amdgpu_ring_test_helper(ring);
3937         }
3938
3939         gfx_v9_0_enable_gui_idle_interrupt(adev, true);
3940
3941         return 0;
3942 }
3943
3944 static void gfx_v9_0_init_tcp_config(struct amdgpu_device *adev)
3945 {
3946         u32 tmp;
3947
3948         if (adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 4, 1) &&
3949             adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 4, 2))
3950                 return;
3951
3952         tmp = RREG32_SOC15(GC, 0, mmTCP_ADDR_CONFIG);
3953         tmp = REG_SET_FIELD(tmp, TCP_ADDR_CONFIG, ENABLE64KHASH,
3954                                 adev->df.hash_status.hash_64k);
3955         tmp = REG_SET_FIELD(tmp, TCP_ADDR_CONFIG, ENABLE2MHASH,
3956                                 adev->df.hash_status.hash_2m);
3957         tmp = REG_SET_FIELD(tmp, TCP_ADDR_CONFIG, ENABLE1GHASH,
3958                                 adev->df.hash_status.hash_1g);
3959         WREG32_SOC15(GC, 0, mmTCP_ADDR_CONFIG, tmp);
3960 }
3961
3962 static void gfx_v9_0_cp_enable(struct amdgpu_device *adev, bool enable)
3963 {
3964         if (adev->gfx.num_gfx_rings)
3965                 gfx_v9_0_cp_gfx_enable(adev, enable);
3966         gfx_v9_0_cp_compute_enable(adev, enable);
3967 }
3968
3969 static int gfx_v9_0_hw_init(void *handle)
3970 {
3971         int r;
3972         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3973
3974         if (!amdgpu_sriov_vf(adev))
3975                 gfx_v9_0_init_golden_registers(adev);
3976
3977         gfx_v9_0_constants_init(adev);
3978
3979         gfx_v9_0_init_tcp_config(adev);
3980
3981         r = adev->gfx.rlc.funcs->resume(adev);
3982         if (r)
3983                 return r;
3984
3985         r = gfx_v9_0_cp_resume(adev);
3986         if (r)
3987                 return r;
3988
3989         if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2))
3990                 gfx_v9_4_2_set_power_brake_sequence(adev);
3991
3992         return r;
3993 }
3994
3995 static int gfx_v9_0_hw_fini(void *handle)
3996 {
3997         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3998
3999         amdgpu_irq_put(adev, &adev->gfx.cp_ecc_error_irq, 0);
4000         amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
4001         amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
4002
4003         /* DF freeze and kcq disable will fail */
4004         if (!amdgpu_ras_intr_triggered())
4005                 /* disable KCQ to avoid CPC touch memory not valid anymore */
4006                 amdgpu_gfx_disable_kcq(adev);
4007
4008         if (amdgpu_sriov_vf(adev)) {
4009                 gfx_v9_0_cp_gfx_enable(adev, false);
4010                 /* must disable polling for SRIOV when hw finished, otherwise
4011                  * CPC engine may still keep fetching WB address which is already
4012                  * invalid after sw finished and trigger DMAR reading error in
4013                  * hypervisor side.
4014                  */
4015                 WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
4016                 return 0;
4017         }
4018
4019         /* Use deinitialize sequence from CAIL when unbinding device from driver,
4020          * otherwise KIQ is hanging when binding back
4021          */
4022         if (!amdgpu_in_reset(adev) && !adev->in_suspend) {
4023                 mutex_lock(&adev->srbm_mutex);
4024                 soc15_grbm_select(adev, adev->gfx.kiq.ring.me,
4025                                 adev->gfx.kiq.ring.pipe,
4026                                 adev->gfx.kiq.ring.queue, 0);
4027                 gfx_v9_0_kiq_fini_register(&adev->gfx.kiq.ring);
4028                 soc15_grbm_select(adev, 0, 0, 0, 0);
4029                 mutex_unlock(&adev->srbm_mutex);
4030         }
4031
4032         gfx_v9_0_cp_enable(adev, false);
4033
4034         /* Skip stopping RLC with A+A reset or when RLC controls GFX clock */
4035         if ((adev->gmc.xgmi.connected_to_cpu && amdgpu_in_reset(adev)) ||
4036             (adev->ip_versions[GC_HWIP][0] >= IP_VERSION(9, 4, 2))) {
4037                 dev_dbg(adev->dev, "Skipping RLC halt\n");
4038                 return 0;
4039         }
4040
4041         adev->gfx.rlc.funcs->stop(adev);
4042         return 0;
4043 }
4044
4045 static int gfx_v9_0_suspend(void *handle)
4046 {
4047         return gfx_v9_0_hw_fini(handle);
4048 }
4049
4050 static int gfx_v9_0_resume(void *handle)
4051 {
4052         return gfx_v9_0_hw_init(handle);
4053 }
4054
4055 static bool gfx_v9_0_is_idle(void *handle)
4056 {
4057         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4058
4059         if (REG_GET_FIELD(RREG32_SOC15(GC, 0, mmGRBM_STATUS),
4060                                 GRBM_STATUS, GUI_ACTIVE))
4061                 return false;
4062         else
4063                 return true;
4064 }
4065
4066 static int gfx_v9_0_wait_for_idle(void *handle)
4067 {
4068         unsigned i;
4069         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4070
4071         for (i = 0; i < adev->usec_timeout; i++) {
4072                 if (gfx_v9_0_is_idle(handle))
4073                         return 0;
4074                 udelay(1);
4075         }
4076         return -ETIMEDOUT;
4077 }
4078
4079 static int gfx_v9_0_soft_reset(void *handle)
4080 {
4081         u32 grbm_soft_reset = 0;
4082         u32 tmp;
4083         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4084
4085         /* GRBM_STATUS */
4086         tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS);
4087         if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
4088                    GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
4089                    GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
4090                    GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
4091                    GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
4092                    GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK)) {
4093                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4094                                                 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
4095                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4096                                                 GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
4097         }
4098
4099         if (tmp & (GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
4100                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4101                                                 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
4102         }
4103
4104         /* GRBM_STATUS2 */
4105         tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS2);
4106         if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
4107                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4108                                                 GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
4109
4110
4111         if (grbm_soft_reset) {
4112                 /* stop the rlc */
4113                 adev->gfx.rlc.funcs->stop(adev);
4114
4115                 if (adev->gfx.num_gfx_rings)
4116                         /* Disable GFX parsing/prefetching */
4117                         gfx_v9_0_cp_gfx_enable(adev, false);
4118
4119                 /* Disable MEC parsing/prefetching */
4120                 gfx_v9_0_cp_compute_enable(adev, false);
4121
4122                 if (grbm_soft_reset) {
4123                         tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
4124                         tmp |= grbm_soft_reset;
4125                         dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
4126                         WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
4127                         tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
4128
4129                         udelay(50);
4130
4131                         tmp &= ~grbm_soft_reset;
4132                         WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
4133                         tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
4134                 }
4135
4136                 /* Wait a little for things to settle down */
4137                 udelay(50);
4138         }
4139         return 0;
4140 }
4141
4142 static uint64_t gfx_v9_0_kiq_read_clock(struct amdgpu_device *adev)
4143 {
4144         signed long r, cnt = 0;
4145         unsigned long flags;
4146         uint32_t seq, reg_val_offs = 0;
4147         uint64_t value = 0;
4148         struct amdgpu_kiq *kiq = &adev->gfx.kiq;
4149         struct amdgpu_ring *ring = &kiq->ring;
4150
4151         BUG_ON(!ring->funcs->emit_rreg);
4152
4153         spin_lock_irqsave(&kiq->ring_lock, flags);
4154         if (amdgpu_device_wb_get(adev, &reg_val_offs)) {
4155                 pr_err("critical bug! too many kiq readers\n");
4156                 goto failed_unlock;
4157         }
4158         amdgpu_ring_alloc(ring, 32);
4159         amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
4160         amdgpu_ring_write(ring, 9 |     /* src: register*/
4161                                 (5 << 8) |      /* dst: memory */
4162                                 (1 << 16) |     /* count sel */
4163                                 (1 << 20));     /* write confirm */
4164         amdgpu_ring_write(ring, 0);
4165         amdgpu_ring_write(ring, 0);
4166         amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
4167                                 reg_val_offs * 4));
4168         amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
4169                                 reg_val_offs * 4));
4170         r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT);
4171         if (r)
4172                 goto failed_undo;
4173
4174         amdgpu_ring_commit(ring);
4175         spin_unlock_irqrestore(&kiq->ring_lock, flags);
4176
4177         r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
4178
4179         /* don't wait anymore for gpu reset case because this way may
4180          * block gpu_recover() routine forever, e.g. this virt_kiq_rreg
4181          * is triggered in TTM and ttm_bo_lock_delayed_workqueue() will
4182          * never return if we keep waiting in virt_kiq_rreg, which cause
4183          * gpu_recover() hang there.
4184          *
4185          * also don't wait anymore for IRQ context
4186          * */
4187         if (r < 1 && (amdgpu_in_reset(adev)))
4188                 goto failed_kiq_read;
4189
4190         might_sleep();
4191         while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) {
4192                 msleep(MAX_KIQ_REG_BAILOUT_INTERVAL);
4193                 r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
4194         }
4195
4196         if (cnt > MAX_KIQ_REG_TRY)
4197                 goto failed_kiq_read;
4198
4199         mb();
4200         value = (uint64_t)adev->wb.wb[reg_val_offs] |
4201                 (uint64_t)adev->wb.wb[reg_val_offs + 1 ] << 32ULL;
4202         amdgpu_device_wb_free(adev, reg_val_offs);
4203         return value;
4204
4205 failed_undo:
4206         amdgpu_ring_undo(ring);
4207 failed_unlock:
4208         spin_unlock_irqrestore(&kiq->ring_lock, flags);
4209 failed_kiq_read:
4210         if (reg_val_offs)
4211                 amdgpu_device_wb_free(adev, reg_val_offs);
4212         pr_err("failed to read gpu clock\n");
4213         return ~0;
4214 }
4215
4216 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev)
4217 {
4218         uint64_t clock, clock_lo, clock_hi, hi_check;
4219
4220         switch (adev->ip_versions[GC_HWIP][0]) {
4221         case IP_VERSION(9, 3, 0):
4222                 preempt_disable();
4223                 clock_hi = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_UPPER_Renoir);
4224                 clock_lo = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_LOWER_Renoir);
4225                 hi_check = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_UPPER_Renoir);
4226                 /* The SMUIO TSC clock frequency is 100MHz, which sets 32-bit carry over
4227                  * roughly every 42 seconds.
4228                  */
4229                 if (hi_check != clock_hi) {
4230                         clock_lo = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_LOWER_Renoir);
4231                         clock_hi = hi_check;
4232                 }
4233                 preempt_enable();
4234                 clock = clock_lo | (clock_hi << 32ULL);
4235                 break;
4236         default:
4237                 amdgpu_gfx_off_ctrl(adev, false);
4238                 mutex_lock(&adev->gfx.gpu_clock_mutex);
4239                 if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 0, 1) && amdgpu_sriov_runtime(adev)) {
4240                         clock = gfx_v9_0_kiq_read_clock(adev);
4241                 } else {
4242                         WREG32_SOC15(GC, 0, mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
4243                         clock = (uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_LSB) |
4244                                 ((uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
4245                 }
4246                 mutex_unlock(&adev->gfx.gpu_clock_mutex);
4247                 amdgpu_gfx_off_ctrl(adev, true);
4248                 break;
4249         }
4250         return clock;
4251 }
4252
4253 static void gfx_v9_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
4254                                           uint32_t vmid,
4255                                           uint32_t gds_base, uint32_t gds_size,
4256                                           uint32_t gws_base, uint32_t gws_size,
4257                                           uint32_t oa_base, uint32_t oa_size)
4258 {
4259         struct amdgpu_device *adev = ring->adev;
4260
4261         /* GDS Base */
4262         gfx_v9_0_write_data_to_reg(ring, 0, false,
4263                                    SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_BASE) + 2 * vmid,
4264                                    gds_base);
4265
4266         /* GDS Size */
4267         gfx_v9_0_write_data_to_reg(ring, 0, false,
4268                                    SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE) + 2 * vmid,
4269                                    gds_size);
4270
4271         /* GWS */
4272         gfx_v9_0_write_data_to_reg(ring, 0, false,
4273                                    SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID0) + vmid,
4274                                    gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
4275
4276         /* OA */
4277         gfx_v9_0_write_data_to_reg(ring, 0, false,
4278                                    SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID0) + vmid,
4279                                    (1 << (oa_size + oa_base)) - (1 << oa_base));
4280 }
4281
4282 static const u32 vgpr_init_compute_shader[] =
4283 {
4284         0xb07c0000, 0xbe8000ff,
4285         0x000000f8, 0xbf110800,
4286         0x7e000280, 0x7e020280,
4287         0x7e040280, 0x7e060280,
4288         0x7e080280, 0x7e0a0280,
4289         0x7e0c0280, 0x7e0e0280,
4290         0x80808800, 0xbe803200,
4291         0xbf84fff5, 0xbf9c0000,
4292         0xd28c0001, 0x0001007f,
4293         0xd28d0001, 0x0002027e,
4294         0x10020288, 0xb8810904,
4295         0xb7814000, 0xd1196a01,
4296         0x00000301, 0xbe800087,
4297         0xbefc00c1, 0xd89c4000,
4298         0x00020201, 0xd89cc080,
4299         0x00040401, 0x320202ff,
4300         0x00000800, 0x80808100,
4301         0xbf84fff8, 0x7e020280,
4302         0xbf810000, 0x00000000,
4303 };
4304
4305 static const u32 sgpr_init_compute_shader[] =
4306 {
4307         0xb07c0000, 0xbe8000ff,
4308         0x0000005f, 0xbee50080,
4309         0xbe812c65, 0xbe822c65,
4310         0xbe832c65, 0xbe842c65,
4311         0xbe852c65, 0xb77c0005,
4312         0x80808500, 0xbf84fff8,
4313         0xbe800080, 0xbf810000,
4314 };
4315
4316 static const u32 vgpr_init_compute_shader_arcturus[] = {
4317         0xd3d94000, 0x18000080, 0xd3d94001, 0x18000080, 0xd3d94002, 0x18000080,
4318         0xd3d94003, 0x18000080, 0xd3d94004, 0x18000080, 0xd3d94005, 0x18000080,
4319         0xd3d94006, 0x18000080, 0xd3d94007, 0x18000080, 0xd3d94008, 0x18000080,
4320         0xd3d94009, 0x18000080, 0xd3d9400a, 0x18000080, 0xd3d9400b, 0x18000080,
4321         0xd3d9400c, 0x18000080, 0xd3d9400d, 0x18000080, 0xd3d9400e, 0x18000080,
4322         0xd3d9400f, 0x18000080, 0xd3d94010, 0x18000080, 0xd3d94011, 0x18000080,
4323         0xd3d94012, 0x18000080, 0xd3d94013, 0x18000080, 0xd3d94014, 0x18000080,
4324         0xd3d94015, 0x18000080, 0xd3d94016, 0x18000080, 0xd3d94017, 0x18000080,
4325         0xd3d94018, 0x18000080, 0xd3d94019, 0x18000080, 0xd3d9401a, 0x18000080,
4326         0xd3d9401b, 0x18000080, 0xd3d9401c, 0x18000080, 0xd3d9401d, 0x18000080,
4327         0xd3d9401e, 0x18000080, 0xd3d9401f, 0x18000080, 0xd3d94020, 0x18000080,
4328         0xd3d94021, 0x18000080, 0xd3d94022, 0x18000080, 0xd3d94023, 0x18000080,
4329         0xd3d94024, 0x18000080, 0xd3d94025, 0x18000080, 0xd3d94026, 0x18000080,
4330         0xd3d94027, 0x18000080, 0xd3d94028, 0x18000080, 0xd3d94029, 0x18000080,
4331         0xd3d9402a, 0x18000080, 0xd3d9402b, 0x18000080, 0xd3d9402c, 0x18000080,
4332         0xd3d9402d, 0x18000080, 0xd3d9402e, 0x18000080, 0xd3d9402f, 0x18000080,
4333         0xd3d94030, 0x18000080, 0xd3d94031, 0x18000080, 0xd3d94032, 0x18000080,
4334         0xd3d94033, 0x18000080, 0xd3d94034, 0x18000080, 0xd3d94035, 0x18000080,
4335         0xd3d94036, 0x18000080, 0xd3d94037, 0x18000080, 0xd3d94038, 0x18000080,
4336         0xd3d94039, 0x18000080, 0xd3d9403a, 0x18000080, 0xd3d9403b, 0x18000080,
4337         0xd3d9403c, 0x18000080, 0xd3d9403d, 0x18000080, 0xd3d9403e, 0x18000080,
4338         0xd3d9403f, 0x18000080, 0xd3d94040, 0x18000080, 0xd3d94041, 0x18000080,
4339         0xd3d94042, 0x18000080, 0xd3d94043, 0x18000080, 0xd3d94044, 0x18000080,
4340         0xd3d94045, 0x18000080, 0xd3d94046, 0x18000080, 0xd3d94047, 0x18000080,
4341         0xd3d94048, 0x18000080, 0xd3d94049, 0x18000080, 0xd3d9404a, 0x18000080,
4342         0xd3d9404b, 0x18000080, 0xd3d9404c, 0x18000080, 0xd3d9404d, 0x18000080,
4343         0xd3d9404e, 0x18000080, 0xd3d9404f, 0x18000080, 0xd3d94050, 0x18000080,
4344         0xd3d94051, 0x18000080, 0xd3d94052, 0x18000080, 0xd3d94053, 0x18000080,
4345         0xd3d94054, 0x18000080, 0xd3d94055, 0x18000080, 0xd3d94056, 0x18000080,
4346         0xd3d94057, 0x18000080, 0xd3d94058, 0x18000080, 0xd3d94059, 0x18000080,
4347         0xd3d9405a, 0x18000080, 0xd3d9405b, 0x18000080, 0xd3d9405c, 0x18000080,
4348         0xd3d9405d, 0x18000080, 0xd3d9405e, 0x18000080, 0xd3d9405f, 0x18000080,
4349         0xd3d94060, 0x18000080, 0xd3d94061, 0x18000080, 0xd3d94062, 0x18000080,
4350         0xd3d94063, 0x18000080, 0xd3d94064, 0x18000080, 0xd3d94065, 0x18000080,
4351         0xd3d94066, 0x18000080, 0xd3d94067, 0x18000080, 0xd3d94068, 0x18000080,
4352         0xd3d94069, 0x18000080, 0xd3d9406a, 0x18000080, 0xd3d9406b, 0x18000080,
4353         0xd3d9406c, 0x18000080, 0xd3d9406d, 0x18000080, 0xd3d9406e, 0x18000080,
4354         0xd3d9406f, 0x18000080, 0xd3d94070, 0x18000080, 0xd3d94071, 0x18000080,
4355         0xd3d94072, 0x18000080, 0xd3d94073, 0x18000080, 0xd3d94074, 0x18000080,
4356         0xd3d94075, 0x18000080, 0xd3d94076, 0x18000080, 0xd3d94077, 0x18000080,
4357         0xd3d94078, 0x18000080, 0xd3d94079, 0x18000080, 0xd3d9407a, 0x18000080,
4358         0xd3d9407b, 0x18000080, 0xd3d9407c, 0x18000080, 0xd3d9407d, 0x18000080,
4359         0xd3d9407e, 0x18000080, 0xd3d9407f, 0x18000080, 0xd3d94080, 0x18000080,
4360         0xd3d94081, 0x18000080, 0xd3d94082, 0x18000080, 0xd3d94083, 0x18000080,
4361         0xd3d94084, 0x18000080, 0xd3d94085, 0x18000080, 0xd3d94086, 0x18000080,
4362         0xd3d94087, 0x18000080, 0xd3d94088, 0x18000080, 0xd3d94089, 0x18000080,
4363         0xd3d9408a, 0x18000080, 0xd3d9408b, 0x18000080, 0xd3d9408c, 0x18000080,
4364         0xd3d9408d, 0x18000080, 0xd3d9408e, 0x18000080, 0xd3d9408f, 0x18000080,
4365         0xd3d94090, 0x18000080, 0xd3d94091, 0x18000080, 0xd3d94092, 0x18000080,
4366         0xd3d94093, 0x18000080, 0xd3d94094, 0x18000080, 0xd3d94095, 0x18000080,
4367         0xd3d94096, 0x18000080, 0xd3d94097, 0x18000080, 0xd3d94098, 0x18000080,
4368         0xd3d94099, 0x18000080, 0xd3d9409a, 0x18000080, 0xd3d9409b, 0x18000080,
4369         0xd3d9409c, 0x18000080, 0xd3d9409d, 0x18000080, 0xd3d9409e, 0x18000080,
4370         0xd3d9409f, 0x18000080, 0xd3d940a0, 0x18000080, 0xd3d940a1, 0x18000080,
4371         0xd3d940a2, 0x18000080, 0xd3d940a3, 0x18000080, 0xd3d940a4, 0x18000080,
4372         0xd3d940a5, 0x18000080, 0xd3d940a6, 0x18000080, 0xd3d940a7, 0x18000080,
4373         0xd3d940a8, 0x18000080, 0xd3d940a9, 0x18000080, 0xd3d940aa, 0x18000080,
4374         0xd3d940ab, 0x18000080, 0xd3d940ac, 0x18000080, 0xd3d940ad, 0x18000080,
4375         0xd3d940ae, 0x18000080, 0xd3d940af, 0x18000080, 0xd3d940b0, 0x18000080,
4376         0xd3d940b1, 0x18000080, 0xd3d940b2, 0x18000080, 0xd3d940b3, 0x18000080,
4377         0xd3d940b4, 0x18000080, 0xd3d940b5, 0x18000080, 0xd3d940b6, 0x18000080,
4378         0xd3d940b7, 0x18000080, 0xd3d940b8, 0x18000080, 0xd3d940b9, 0x18000080,
4379         0xd3d940ba, 0x18000080, 0xd3d940bb, 0x18000080, 0xd3d940bc, 0x18000080,
4380         0xd3d940bd, 0x18000080, 0xd3d940be, 0x18000080, 0xd3d940bf, 0x18000080,
4381         0xd3d940c0, 0x18000080, 0xd3d940c1, 0x18000080, 0xd3d940c2, 0x18000080,
4382         0xd3d940c3, 0x18000080, 0xd3d940c4, 0x18000080, 0xd3d940c5, 0x18000080,
4383         0xd3d940c6, 0x18000080, 0xd3d940c7, 0x18000080, 0xd3d940c8, 0x18000080,
4384         0xd3d940c9, 0x18000080, 0xd3d940ca, 0x18000080, 0xd3d940cb, 0x18000080,
4385         0xd3d940cc, 0x18000080, 0xd3d940cd, 0x18000080, 0xd3d940ce, 0x18000080,
4386         0xd3d940cf, 0x18000080, 0xd3d940d0, 0x18000080, 0xd3d940d1, 0x18000080,
4387         0xd3d940d2, 0x18000080, 0xd3d940d3, 0x18000080, 0xd3d940d4, 0x18000080,
4388         0xd3d940d5, 0x18000080, 0xd3d940d6, 0x18000080, 0xd3d940d7, 0x18000080,
4389         0xd3d940d8, 0x18000080, 0xd3d940d9, 0x18000080, 0xd3d940da, 0x18000080,
4390         0xd3d940db, 0x18000080, 0xd3d940dc, 0x18000080, 0xd3d940dd, 0x18000080,
4391         0xd3d940de, 0x18000080, 0xd3d940df, 0x18000080, 0xd3d940e0, 0x18000080,
4392         0xd3d940e1, 0x18000080, 0xd3d940e2, 0x18000080, 0xd3d940e3, 0x18000080,
4393         0xd3d940e4, 0x18000080, 0xd3d940e5, 0x18000080, 0xd3d940e6, 0x18000080,
4394         0xd3d940e7, 0x18000080, 0xd3d940e8, 0x18000080, 0xd3d940e9, 0x18000080,
4395         0xd3d940ea, 0x18000080, 0xd3d940eb, 0x18000080, 0xd3d940ec, 0x18000080,
4396         0xd3d940ed, 0x18000080, 0xd3d940ee, 0x18000080, 0xd3d940ef, 0x18000080,
4397         0xd3d940f0, 0x18000080, 0xd3d940f1, 0x18000080, 0xd3d940f2, 0x18000080,
4398         0xd3d940f3, 0x18000080, 0xd3d940f4, 0x18000080, 0xd3d940f5, 0x18000080,
4399         0xd3d940f6, 0x18000080, 0xd3d940f7, 0x18000080, 0xd3d940f8, 0x18000080,
4400         0xd3d940f9, 0x18000080, 0xd3d940fa, 0x18000080, 0xd3d940fb, 0x18000080,
4401         0xd3d940fc, 0x18000080, 0xd3d940fd, 0x18000080, 0xd3d940fe, 0x18000080,
4402         0xd3d940ff, 0x18000080, 0xb07c0000, 0xbe8a00ff, 0x000000f8, 0xbf11080a,
4403         0x7e000280, 0x7e020280, 0x7e040280, 0x7e060280, 0x7e080280, 0x7e0a0280,
4404         0x7e0c0280, 0x7e0e0280, 0x808a880a, 0xbe80320a, 0xbf84fff5, 0xbf9c0000,
4405         0xd28c0001, 0x0001007f, 0xd28d0001, 0x0002027e, 0x10020288, 0xb88b0904,
4406         0xb78b4000, 0xd1196a01, 0x00001701, 0xbe8a0087, 0xbefc00c1, 0xd89c4000,
4407         0x00020201, 0xd89cc080, 0x00040401, 0x320202ff, 0x00000800, 0x808a810a,
4408         0xbf84fff8, 0xbf810000,
4409 };
4410
4411 /* When below register arrays changed, please update gpr_reg_size,
4412   and sec_ded_counter_reg_size in function gfx_v9_0_do_edc_gpr_workarounds,
4413   to cover all gfx9 ASICs */
4414 static const struct soc15_reg_entry vgpr_init_regs[] = {
4415    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
4416    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 },
4417    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 4 },
4418    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
4419    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x3f },
4420    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x400000 },  /* 64KB LDS */
4421    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff },
4422    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff },
4423    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff },
4424    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff },
4425    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0xffffffff },
4426    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0xffffffff },
4427    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0xffffffff },
4428    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0xffffffff },
4429 };
4430
4431 static const struct soc15_reg_entry vgpr_init_regs_arcturus[] = {
4432    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
4433    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 },
4434    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 4 },
4435    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
4436    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0xbf },
4437    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x400000 },  /* 64KB LDS */
4438    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff },
4439    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff },
4440    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff },
4441    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff },
4442    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0xffffffff },
4443    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0xffffffff },
4444    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0xffffffff },
4445    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0xffffffff },
4446 };
4447
4448 static const struct soc15_reg_entry sgpr1_init_regs[] = {
4449    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
4450    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 },
4451    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 8 },
4452    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
4453    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x240 }, /* (80 GPRS) */
4454    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x0 },
4455    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0x000000ff },
4456    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0x000000ff },
4457    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0x000000ff },
4458    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0x000000ff },
4459    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0x000000ff },
4460    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0x000000ff },
4461    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0x000000ff },
4462    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0x000000ff },
4463 };
4464
4465 static const struct soc15_reg_entry sgpr2_init_regs[] = {
4466    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
4467    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 },
4468    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 8 },
4469    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
4470    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x240 }, /* (80 GPRS) */
4471    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x0 },
4472    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0x0000ff00 },
4473    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0x0000ff00 },
4474    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0x0000ff00 },
4475    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0x0000ff00 },
4476    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0x0000ff00 },
4477    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0x0000ff00 },
4478    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0x0000ff00 },
4479    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0x0000ff00 },
4480 };
4481
4482 static const struct soc15_reg_entry gfx_v9_0_edc_counter_regs[] = {
4483    { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT), 0, 1, 1},
4484    { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT), 0, 1, 1},
4485    { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT), 0, 1, 1},
4486    { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT), 0, 1, 1},
4487    { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT), 0, 1, 1},
4488    { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT), 0, 1, 1},
4489    { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT), 0, 1, 1},
4490    { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT), 0, 1, 1},
4491    { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT), 0, 1, 1},
4492    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT), 0, 1, 1},
4493    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_GRBM_CNT), 0, 1, 1},
4494    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_DED), 0, 1, 1},
4495    { SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT), 0, 4, 1},
4496    { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 0, 4, 6},
4497    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_DED_CNT), 0, 4, 16},
4498    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_INFO), 0, 4, 16},
4499    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_SEC_CNT), 0, 4, 16},
4500    { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 1, 16},
4501    { SOC15_REG_ENTRY(GC, 0, mmTCP_ATC_EDC_GATCL1_CNT), 0, 4, 16},
4502    { SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT), 0, 4, 16},
4503    { SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 0, 4, 16},
4504    { SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 0, 4, 16},
4505    { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 0, 4, 6},
4506    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 0, 4, 16},
4507    { SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 0, 4, 16},
4508    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), 0, 1, 1},
4509    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 0, 1, 1},
4510    { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 1, 32},
4511    { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 1, 32},
4512    { SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT), 0, 1, 72},
4513    { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0, 1, 16},
4514    { SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT), 0, 1, 2},
4515    { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 0, 4, 6},
4516 };
4517
4518 static int gfx_v9_0_do_edc_gds_workarounds(struct amdgpu_device *adev)
4519 {
4520         struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
4521         int i, r;
4522
4523         /* only support when RAS is enabled */
4524         if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
4525                 return 0;
4526
4527         r = amdgpu_ring_alloc(ring, 7);
4528         if (r) {
4529                 DRM_ERROR("amdgpu: GDS workarounds failed to lock ring %s (%d).\n",
4530                         ring->name, r);
4531                 return r;
4532         }
4533
4534         WREG32_SOC15(GC, 0, mmGDS_VMID0_BASE, 0x00000000);
4535         WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, adev->gds.gds_size);
4536
4537         amdgpu_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
4538         amdgpu_ring_write(ring, (PACKET3_DMA_DATA_CP_SYNC |
4539                                 PACKET3_DMA_DATA_DST_SEL(1) |
4540                                 PACKET3_DMA_DATA_SRC_SEL(2) |
4541                                 PACKET3_DMA_DATA_ENGINE(0)));
4542         amdgpu_ring_write(ring, 0);
4543         amdgpu_ring_write(ring, 0);
4544         amdgpu_ring_write(ring, 0);
4545         amdgpu_ring_write(ring, 0);
4546         amdgpu_ring_write(ring, PACKET3_DMA_DATA_CMD_RAW_WAIT |
4547                                 adev->gds.gds_size);
4548
4549         amdgpu_ring_commit(ring);
4550
4551         for (i = 0; i < adev->usec_timeout; i++) {
4552                 if (ring->wptr == gfx_v9_0_ring_get_rptr_compute(ring))
4553                         break;
4554                 udelay(1);
4555         }
4556
4557         if (i >= adev->usec_timeout)
4558                 r = -ETIMEDOUT;
4559
4560         WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, 0x00000000);
4561
4562         return r;
4563 }
4564
4565 static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
4566 {
4567         struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
4568         struct amdgpu_ib ib;
4569         struct dma_fence *f = NULL;
4570         int r, i;
4571         unsigned total_size, vgpr_offset, sgpr_offset;
4572         u64 gpu_addr;
4573
4574         int compute_dim_x = adev->gfx.config.max_shader_engines *
4575                                                 adev->gfx.config.max_cu_per_sh *
4576                                                 adev->gfx.config.max_sh_per_se;
4577         int sgpr_work_group_size = 5;
4578         int gpr_reg_size = adev->gfx.config.max_shader_engines + 6;
4579         int vgpr_init_shader_size;
4580         const u32 *vgpr_init_shader_ptr;
4581         const struct soc15_reg_entry *vgpr_init_regs_ptr;
4582
4583         /* only support when RAS is enabled */
4584         if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
4585                 return 0;
4586
4587         /* bail if the compute ring is not ready */
4588         if (!ring->sched.ready)
4589                 return 0;
4590
4591         if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 1)) {
4592                 vgpr_init_shader_ptr = vgpr_init_compute_shader_arcturus;
4593                 vgpr_init_shader_size = sizeof(vgpr_init_compute_shader_arcturus);
4594                 vgpr_init_regs_ptr = vgpr_init_regs_arcturus;
4595         } else {
4596                 vgpr_init_shader_ptr = vgpr_init_compute_shader;
4597                 vgpr_init_shader_size = sizeof(vgpr_init_compute_shader);
4598                 vgpr_init_regs_ptr = vgpr_init_regs;
4599         }
4600
4601         total_size =
4602                 (gpr_reg_size * 3 + 4 + 5 + 2) * 4; /* VGPRS */
4603         total_size +=
4604                 (gpr_reg_size * 3 + 4 + 5 + 2) * 4; /* SGPRS1 */
4605         total_size +=
4606                 (gpr_reg_size * 3 + 4 + 5 + 2) * 4; /* SGPRS2 */
4607         total_size = ALIGN(total_size, 256);
4608         vgpr_offset = total_size;
4609         total_size += ALIGN(vgpr_init_shader_size, 256);
4610         sgpr_offset = total_size;
4611         total_size += sizeof(sgpr_init_compute_shader);
4612
4613         /* allocate an indirect buffer to put the commands in */
4614         memset(&ib, 0, sizeof(ib));
4615         r = amdgpu_ib_get(adev, NULL, total_size,
4616                                         AMDGPU_IB_POOL_DIRECT, &ib);
4617         if (r) {
4618                 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
4619                 return r;
4620         }
4621
4622         /* load the compute shaders */
4623         for (i = 0; i < vgpr_init_shader_size/sizeof(u32); i++)
4624                 ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_shader_ptr[i];
4625
4626         for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
4627                 ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
4628
4629         /* init the ib length to 0 */
4630         ib.length_dw = 0;
4631
4632         /* VGPR */
4633         /* write the register state for the compute dispatch */
4634         for (i = 0; i < gpr_reg_size; i++) {
4635                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
4636                 ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(vgpr_init_regs_ptr[i])
4637                                                                 - PACKET3_SET_SH_REG_START;
4638                 ib.ptr[ib.length_dw++] = vgpr_init_regs_ptr[i].reg_value;
4639         }
4640         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
4641         gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
4642         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
4643         ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
4644                                                         - PACKET3_SET_SH_REG_START;
4645         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
4646         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
4647
4648         /* write dispatch packet */
4649         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
4650         ib.ptr[ib.length_dw++] = compute_dim_x * 2; /* x */
4651         ib.ptr[ib.length_dw++] = 1; /* y */
4652         ib.ptr[ib.length_dw++] = 1; /* z */
4653         ib.ptr[ib.length_dw++] =
4654                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
4655
4656         /* write CS partial flush packet */
4657         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
4658         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
4659
4660         /* SGPR1 */
4661         /* write the register state for the compute dispatch */
4662         for (i = 0; i < gpr_reg_size; i++) {
4663                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
4664                 ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(sgpr1_init_regs[i])
4665                                                                 - PACKET3_SET_SH_REG_START;
4666                 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i].reg_value;
4667         }
4668         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
4669         gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
4670         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
4671         ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
4672                                                         - PACKET3_SET_SH_REG_START;
4673         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
4674         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
4675
4676         /* write dispatch packet */
4677         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
4678         ib.ptr[ib.length_dw++] = compute_dim_x / 2 * sgpr_work_group_size; /* x */
4679         ib.ptr[ib.length_dw++] = 1; /* y */
4680         ib.ptr[ib.length_dw++] = 1; /* z */
4681         ib.ptr[ib.length_dw++] =
4682                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
4683
4684         /* write CS partial flush packet */
4685         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
4686         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
4687
4688         /* SGPR2 */
4689         /* write the register state for the compute dispatch */
4690         for (i = 0; i < gpr_reg_size; i++) {
4691                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
4692                 ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(sgpr2_init_regs[i])
4693                                                                 - PACKET3_SET_SH_REG_START;
4694                 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i].reg_value;
4695         }
4696         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
4697         gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
4698         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
4699         ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
4700                                                         - PACKET3_SET_SH_REG_START;
4701         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
4702         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
4703
4704         /* write dispatch packet */
4705         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
4706         ib.ptr[ib.length_dw++] = compute_dim_x / 2 * sgpr_work_group_size; /* x */
4707         ib.ptr[ib.length_dw++] = 1; /* y */
4708         ib.ptr[ib.length_dw++] = 1; /* z */
4709         ib.ptr[ib.length_dw++] =
4710                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
4711
4712         /* write CS partial flush packet */
4713         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
4714         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
4715
4716         /* shedule the ib on the ring */
4717         r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
4718         if (r) {
4719                 DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
4720                 goto fail;
4721         }
4722
4723         /* wait for the GPU to finish processing the IB */
4724         r = dma_fence_wait(f, false);
4725         if (r) {
4726                 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
4727                 goto fail;
4728         }
4729
4730 fail:
4731         amdgpu_ib_free(adev, &ib, NULL);
4732         dma_fence_put(f);
4733
4734         return r;
4735 }
4736
4737 static int gfx_v9_0_early_init(void *handle)
4738 {
4739         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4740
4741         if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 1) ||
4742             adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2))
4743                 adev->gfx.num_gfx_rings = 0;
4744         else
4745                 adev->gfx.num_gfx_rings = GFX9_NUM_GFX_RINGS;
4746         adev->gfx.num_compute_rings = min(amdgpu_gfx_get_num_kcq(adev),
4747                                           AMDGPU_MAX_COMPUTE_RINGS);
4748         gfx_v9_0_set_kiq_pm4_funcs(adev);
4749         gfx_v9_0_set_ring_funcs(adev);
4750         gfx_v9_0_set_irq_funcs(adev);
4751         gfx_v9_0_set_gds_init(adev);
4752         gfx_v9_0_set_rlc_funcs(adev);
4753
4754         /* init rlcg reg access ctrl */
4755         gfx_v9_0_init_rlcg_reg_access_ctrl(adev);
4756
4757         return 0;
4758 }
4759
4760 static int gfx_v9_0_ecc_late_init(void *handle)
4761 {
4762         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4763         int r;
4764
4765         /*
4766          * Temp workaround to fix the issue that CP firmware fails to
4767          * update read pointer when CPDMA is writing clearing operation
4768          * to GDS in suspend/resume sequence on several cards. So just
4769          * limit this operation in cold boot sequence.
4770          */
4771         if ((!adev->in_suspend) &&
4772             (adev->gds.gds_size)) {
4773                 r = gfx_v9_0_do_edc_gds_workarounds(adev);
4774                 if (r)
4775                         return r;
4776         }
4777
4778         /* requires IBs so do in late init after IB pool is initialized */
4779         if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2))
4780                 r = gfx_v9_4_2_do_edc_gpr_workarounds(adev);
4781         else
4782                 r = gfx_v9_0_do_edc_gpr_workarounds(adev);
4783
4784         if (r)
4785                 return r;
4786
4787         if (adev->gfx.ras &&
4788             adev->gfx.ras->enable_watchdog_timer)
4789                 adev->gfx.ras->enable_watchdog_timer(adev);
4790
4791         return 0;
4792 }
4793
4794 static int gfx_v9_0_late_init(void *handle)
4795 {
4796         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4797         int r;
4798
4799         r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
4800         if (r)
4801                 return r;
4802
4803         r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
4804         if (r)
4805                 return r;
4806
4807         r = gfx_v9_0_ecc_late_init(handle);
4808         if (r)
4809                 return r;
4810
4811         return 0;
4812 }
4813
4814 static bool gfx_v9_0_is_rlc_enabled(struct amdgpu_device *adev)
4815 {
4816         uint32_t rlc_setting;
4817
4818         /* if RLC is not enabled, do nothing */
4819         rlc_setting = RREG32_SOC15(GC, 0, mmRLC_CNTL);
4820         if (!(rlc_setting & RLC_CNTL__RLC_ENABLE_F32_MASK))
4821                 return false;
4822
4823         return true;
4824 }
4825
4826 static void gfx_v9_0_set_safe_mode(struct amdgpu_device *adev)
4827 {
4828         uint32_t data;
4829         unsigned i;
4830
4831         data = RLC_SAFE_MODE__CMD_MASK;
4832         data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
4833         WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data);
4834
4835         /* wait for RLC_SAFE_MODE */
4836         for (i = 0; i < adev->usec_timeout; i++) {
4837                 if (!REG_GET_FIELD(RREG32_SOC15(GC, 0, mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
4838                         break;
4839                 udelay(1);
4840         }
4841 }
4842
4843 static void gfx_v9_0_unset_safe_mode(struct amdgpu_device *adev)
4844 {
4845         uint32_t data;
4846
4847         data = RLC_SAFE_MODE__CMD_MASK;
4848         WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data);
4849 }
4850
4851 static void gfx_v9_0_update_gfx_cg_power_gating(struct amdgpu_device *adev,
4852                                                 bool enable)
4853 {
4854         amdgpu_gfx_rlc_enter_safe_mode(adev);
4855
4856         if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
4857                 gfx_v9_0_enable_gfx_cg_power_gating(adev, true);
4858                 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
4859                         gfx_v9_0_enable_gfx_pipeline_powergating(adev, true);
4860         } else {
4861                 gfx_v9_0_enable_gfx_cg_power_gating(adev, false);
4862                 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
4863                         gfx_v9_0_enable_gfx_pipeline_powergating(adev, false);
4864         }
4865
4866         amdgpu_gfx_rlc_exit_safe_mode(adev);
4867 }
4868
4869 static void gfx_v9_0_update_gfx_mg_power_gating(struct amdgpu_device *adev,
4870                                                 bool enable)
4871 {
4872         /* TODO: double check if we need to perform under safe mode */
4873         /* gfx_v9_0_enter_rlc_safe_mode(adev); */
4874
4875         if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
4876                 gfx_v9_0_enable_gfx_static_mg_power_gating(adev, true);
4877         else
4878                 gfx_v9_0_enable_gfx_static_mg_power_gating(adev, false);
4879
4880         if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
4881                 gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, true);
4882         else
4883                 gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, false);
4884
4885         /* gfx_v9_0_exit_rlc_safe_mode(adev); */
4886 }
4887
4888 static void gfx_v9_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
4889                                                       bool enable)
4890 {
4891         uint32_t data, def;
4892
4893         amdgpu_gfx_rlc_enter_safe_mode(adev);
4894
4895         /* It is disabled by HW by default */
4896         if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
4897                 /* 1 - RLC_CGTT_MGCG_OVERRIDE */
4898                 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4899
4900                 if (adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 2, 1))
4901                         data &= ~RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK;
4902
4903                 data &= ~(RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
4904                           RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK |
4905                           RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK);
4906
4907                 /* only for Vega10 & Raven1 */
4908                 data |= RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK;
4909
4910                 if (def != data)
4911                         WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4912
4913                 /* MGLS is a global flag to control all MGLS in GFX */
4914                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
4915                         /* 2 - RLC memory Light sleep */
4916                         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
4917                                 def = data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
4918                                 data |= RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
4919                                 if (def != data)
4920                                         WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data);
4921                         }
4922                         /* 3 - CP memory Light sleep */
4923                         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
4924                                 def = data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
4925                                 data |= CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
4926                                 if (def != data)
4927                                         WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data);
4928                         }
4929                 }
4930         } else {
4931                 /* 1 - MGCG_OVERRIDE */
4932                 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4933
4934                 if (adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 2, 1))
4935                         data |= RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK;
4936
4937                 data |= (RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK |
4938                          RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
4939                          RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK |
4940                          RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK);
4941
4942                 if (def != data)
4943                         WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4944
4945                 /* 2 - disable MGLS in RLC */
4946                 data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
4947                 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
4948                         data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
4949                         WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data);
4950                 }
4951
4952                 /* 3 - disable MGLS in CP */
4953                 data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
4954                 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
4955                         data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
4956                         WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data);
4957                 }
4958         }
4959
4960         amdgpu_gfx_rlc_exit_safe_mode(adev);
4961 }
4962
4963 static void gfx_v9_0_update_3d_clock_gating(struct amdgpu_device *adev,
4964                                            bool enable)
4965 {
4966         uint32_t data, def;
4967
4968         if (!adev->gfx.num_gfx_rings)
4969                 return;
4970
4971         amdgpu_gfx_rlc_enter_safe_mode(adev);
4972
4973         /* Enable 3D CGCG/CGLS */
4974         if (enable) {
4975                 /* write cmd to clear cgcg/cgls ov */
4976                 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4977                 /* unset CGCG override */
4978                 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_GFX3D_CG_OVERRIDE_MASK;
4979                 /* update CGCG and CGLS override bits */
4980                 if (def != data)
4981                         WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4982
4983                 /* enable 3Dcgcg FSM(0x0000363f) */
4984                 def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
4985
4986                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG)
4987                         data = (0x36 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
4988                                 RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK;
4989                 else
4990                         data = 0x0 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT;
4991
4992                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS)
4993                         data |= (0x000F << RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
4994                                 RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK;
4995                 if (def != data)
4996                         WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data);
4997
4998                 /* set IDLE_POLL_COUNT(0x00900100) */
4999                 def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL);
5000                 data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
5001                         (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
5002                 if (def != data)
5003                         WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data);
5004         } else {
5005                 /* Disable CGCG/CGLS */
5006                 def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
5007                 /* disable cgcg, cgls should be disabled */
5008                 data &= ~(RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK |
5009                           RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK);
5010                 /* disable cgcg and cgls in FSM */
5011                 if (def != data)
5012                         WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data);
5013         }
5014
5015         amdgpu_gfx_rlc_exit_safe_mode(adev);
5016 }
5017
5018 static void gfx_v9_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
5019                                                       bool enable)
5020 {
5021         uint32_t def, data;
5022
5023         amdgpu_gfx_rlc_enter_safe_mode(adev);
5024
5025         if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
5026                 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
5027                 /* unset CGCG override */
5028                 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGCG_OVERRIDE_MASK;
5029                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
5030                         data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
5031                 else
5032                         data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
5033                 /* update CGCG and CGLS override bits */
5034                 if (def != data)
5035                         WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
5036
5037                 /* enable cgcg FSM(0x0000363F) */
5038                 def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
5039
5040                 if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 1))
5041                         data = (0x2000 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
5042                                 RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
5043                 else
5044                         data = (0x36 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
5045                                 RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
5046                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
5047                         data |= (0x000F << RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
5048                                 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5049                 if (def != data)
5050                         WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data);
5051
5052                 /* set IDLE_POLL_COUNT(0x00900100) */
5053                 def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL);
5054                 data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
5055                         (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
5056                 if (def != data)
5057                         WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data);
5058         } else {
5059                 def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
5060                 /* reset CGCG/CGLS bits */
5061                 data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
5062                 /* disable cgcg and cgls in FSM */
5063                 if (def != data)
5064                         WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data);
5065         }
5066
5067         amdgpu_gfx_rlc_exit_safe_mode(adev);
5068 }
5069
5070 static int gfx_v9_0_update_gfx_clock_gating(struct amdgpu_device *adev,
5071                                             bool enable)
5072 {
5073         if (enable) {
5074                 /* CGCG/CGLS should be enabled after MGCG/MGLS
5075                  * ===  MGCG + MGLS ===
5076                  */
5077                 gfx_v9_0_update_medium_grain_clock_gating(adev, enable);
5078                 /* ===  CGCG /CGLS for GFX 3D Only === */
5079                 gfx_v9_0_update_3d_clock_gating(adev, enable);
5080                 /* ===  CGCG + CGLS === */
5081                 gfx_v9_0_update_coarse_grain_clock_gating(adev, enable);
5082         } else {
5083                 /* CGCG/CGLS should be disabled before MGCG/MGLS
5084                  * ===  CGCG + CGLS ===
5085                  */
5086                 gfx_v9_0_update_coarse_grain_clock_gating(adev, enable);
5087                 /* ===  CGCG /CGLS for GFX 3D Only === */
5088                 gfx_v9_0_update_3d_clock_gating(adev, enable);
5089                 /* ===  MGCG + MGLS === */
5090                 gfx_v9_0_update_medium_grain_clock_gating(adev, enable);
5091         }
5092         return 0;
5093 }
5094
5095 static void gfx_v9_0_update_spm_vmid(struct amdgpu_device *adev, unsigned vmid)
5096 {
5097         u32 reg, data;
5098
5099         amdgpu_gfx_off_ctrl(adev, false);
5100
5101         reg = SOC15_REG_OFFSET(GC, 0, mmRLC_SPM_MC_CNTL);
5102         if (amdgpu_sriov_is_pp_one_vf(adev))
5103                 data = RREG32_NO_KIQ(reg);
5104         else
5105                 data = RREG32_SOC15(GC, 0, mmRLC_SPM_MC_CNTL);
5106
5107         data &= ~RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK;
5108         data |= (vmid & RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK) << RLC_SPM_MC_CNTL__RLC_SPM_VMID__SHIFT;
5109
5110         if (amdgpu_sriov_is_pp_one_vf(adev))
5111                 WREG32_SOC15_NO_KIQ(GC, 0, mmRLC_SPM_MC_CNTL, data);
5112         else
5113                 WREG32_SOC15(GC, 0, mmRLC_SPM_MC_CNTL, data);
5114
5115         amdgpu_gfx_off_ctrl(adev, true);
5116 }
5117
5118 static bool gfx_v9_0_check_rlcg_range(struct amdgpu_device *adev,
5119                                         uint32_t offset,
5120                                         struct soc15_reg_rlcg *entries, int arr_size)
5121 {
5122         int i;
5123         uint32_t reg;
5124
5125         if (!entries)
5126                 return false;
5127
5128         for (i = 0; i < arr_size; i++) {
5129                 const struct soc15_reg_rlcg *entry;
5130
5131                 entry = &entries[i];
5132                 reg = adev->reg_offset[entry->hwip][entry->instance][entry->segment] + entry->reg;
5133                 if (offset == reg)
5134                         return true;
5135         }
5136
5137         return false;
5138 }
5139
5140 static bool gfx_v9_0_is_rlcg_access_range(struct amdgpu_device *adev, u32 offset)
5141 {
5142         return gfx_v9_0_check_rlcg_range(adev, offset,
5143                                         (void *)rlcg_access_gc_9_0,
5144                                         ARRAY_SIZE(rlcg_access_gc_9_0));
5145 }
5146
5147 static const struct amdgpu_rlc_funcs gfx_v9_0_rlc_funcs = {
5148         .is_rlc_enabled = gfx_v9_0_is_rlc_enabled,
5149         .set_safe_mode = gfx_v9_0_set_safe_mode,
5150         .unset_safe_mode = gfx_v9_0_unset_safe_mode,
5151         .init = gfx_v9_0_rlc_init,
5152         .get_csb_size = gfx_v9_0_get_csb_size,
5153         .get_csb_buffer = gfx_v9_0_get_csb_buffer,
5154         .get_cp_table_num = gfx_v9_0_cp_jump_table_num,
5155         .resume = gfx_v9_0_rlc_resume,
5156         .stop = gfx_v9_0_rlc_stop,
5157         .reset = gfx_v9_0_rlc_reset,
5158         .start = gfx_v9_0_rlc_start,
5159         .update_spm_vmid = gfx_v9_0_update_spm_vmid,
5160         .is_rlcg_access_range = gfx_v9_0_is_rlcg_access_range,
5161 };
5162
5163 static int gfx_v9_0_set_powergating_state(void *handle,
5164                                           enum amd_powergating_state state)
5165 {
5166         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5167         bool enable = (state == AMD_PG_STATE_GATE);
5168
5169         switch (adev->ip_versions[GC_HWIP][0]) {
5170         case IP_VERSION(9, 2, 2):
5171         case IP_VERSION(9, 1, 0):
5172         case IP_VERSION(9, 3, 0):
5173                 if (!enable)
5174                         amdgpu_gfx_off_ctrl(adev, false);
5175
5176                 if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
5177                         gfx_v9_0_enable_sck_slow_down_on_power_up(adev, true);
5178                         gfx_v9_0_enable_sck_slow_down_on_power_down(adev, true);
5179                 } else {
5180                         gfx_v9_0_enable_sck_slow_down_on_power_up(adev, false);
5181                         gfx_v9_0_enable_sck_slow_down_on_power_down(adev, false);
5182                 }
5183
5184                 if (adev->pg_flags & AMD_PG_SUPPORT_CP)
5185                         gfx_v9_0_enable_cp_power_gating(adev, true);
5186                 else
5187                         gfx_v9_0_enable_cp_power_gating(adev, false);
5188
5189                 /* update gfx cgpg state */
5190                 gfx_v9_0_update_gfx_cg_power_gating(adev, enable);
5191
5192                 /* update mgcg state */
5193                 gfx_v9_0_update_gfx_mg_power_gating(adev, enable);
5194
5195                 if (enable)
5196                         amdgpu_gfx_off_ctrl(adev, true);
5197                 break;
5198         case IP_VERSION(9, 2, 1):
5199                 amdgpu_gfx_off_ctrl(adev, enable);
5200                 break;
5201         default:
5202                 break;
5203         }
5204
5205         return 0;
5206 }
5207
5208 static int gfx_v9_0_set_clockgating_state(void *handle,
5209                                           enum amd_clockgating_state state)
5210 {
5211         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5212
5213         if (amdgpu_sriov_vf(adev))
5214                 return 0;
5215
5216         switch (adev->ip_versions[GC_HWIP][0]) {
5217         case IP_VERSION(9, 0, 1):
5218         case IP_VERSION(9, 2, 1):
5219         case IP_VERSION(9, 4, 0):
5220         case IP_VERSION(9, 2, 2):
5221         case IP_VERSION(9, 1, 0):
5222         case IP_VERSION(9, 4, 1):
5223         case IP_VERSION(9, 3, 0):
5224         case IP_VERSION(9, 4, 2):
5225                 gfx_v9_0_update_gfx_clock_gating(adev,
5226                                                  state == AMD_CG_STATE_GATE);
5227                 break;
5228         default:
5229                 break;
5230         }
5231         return 0;
5232 }
5233
5234 static void gfx_v9_0_get_clockgating_state(void *handle, u32 *flags)
5235 {
5236         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5237         int data;
5238
5239         if (amdgpu_sriov_vf(adev))
5240                 *flags = 0;
5241
5242         /* AMD_CG_SUPPORT_GFX_MGCG */
5243         data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE));
5244         if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK))
5245                 *flags |= AMD_CG_SUPPORT_GFX_MGCG;
5246
5247         /* AMD_CG_SUPPORT_GFX_CGCG */
5248         data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL));
5249         if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
5250                 *flags |= AMD_CG_SUPPORT_GFX_CGCG;
5251
5252         /* AMD_CG_SUPPORT_GFX_CGLS */
5253         if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
5254                 *flags |= AMD_CG_SUPPORT_GFX_CGLS;
5255
5256         /* AMD_CG_SUPPORT_GFX_RLC_LS */
5257         data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_MEM_SLP_CNTL));
5258         if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK)
5259                 *flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS;
5260
5261         /* AMD_CG_SUPPORT_GFX_CP_LS */
5262         data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmCP_MEM_SLP_CNTL));
5263         if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK)
5264                 *flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS;
5265
5266         if (adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 4, 1)) {
5267                 /* AMD_CG_SUPPORT_GFX_3D_CGCG */
5268                 data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D));
5269                 if (data & RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK)
5270                         *flags |= AMD_CG_SUPPORT_GFX_3D_CGCG;
5271
5272                 /* AMD_CG_SUPPORT_GFX_3D_CGLS */
5273                 if (data & RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK)
5274                         *flags |= AMD_CG_SUPPORT_GFX_3D_CGLS;
5275         }
5276 }
5277
5278 static u64 gfx_v9_0_ring_get_rptr_gfx(struct amdgpu_ring *ring)
5279 {
5280         return ring->adev->wb.wb[ring->rptr_offs]; /* gfx9 is 32bit rptr*/
5281 }
5282
5283 static u64 gfx_v9_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
5284 {
5285         struct amdgpu_device *adev = ring->adev;
5286         u64 wptr;
5287
5288         /* XXX check if swapping is necessary on BE */
5289         if (ring->use_doorbell) {
5290                 wptr = atomic64_read((atomic64_t *)&adev->wb.wb[ring->wptr_offs]);
5291         } else {
5292                 wptr = RREG32_SOC15(GC, 0, mmCP_RB0_WPTR);
5293                 wptr += (u64)RREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI) << 32;
5294         }
5295
5296         return wptr;
5297 }
5298
5299 static void gfx_v9_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
5300 {
5301         struct amdgpu_device *adev = ring->adev;
5302
5303         if (ring->use_doorbell) {
5304                 /* XXX check if swapping is necessary on BE */
5305                 atomic64_set((atomic64_t *)&adev->wb.wb[ring->wptr_offs], ring->wptr);
5306                 WDOORBELL64(ring->doorbell_index, ring->wptr);
5307         } else {
5308                 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
5309                 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
5310         }
5311 }
5312
5313 static void gfx_v9_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
5314 {
5315         struct amdgpu_device *adev = ring->adev;
5316         u32 ref_and_mask, reg_mem_engine;
5317         const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio.hdp_flush_reg;
5318
5319         if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
5320                 switch (ring->me) {
5321                 case 1:
5322                         ref_and_mask = nbio_hf_reg->ref_and_mask_cp2 << ring->pipe;
5323                         break;
5324                 case 2:
5325                         ref_and_mask = nbio_hf_reg->ref_and_mask_cp6 << ring->pipe;
5326                         break;
5327                 default:
5328                         return;
5329                 }
5330                 reg_mem_engine = 0;
5331         } else {
5332                 ref_and_mask = nbio_hf_reg->ref_and_mask_cp0;
5333                 reg_mem_engine = 1; /* pfp */
5334         }
5335
5336         gfx_v9_0_wait_reg_mem(ring, reg_mem_engine, 0, 1,
5337                               adev->nbio.funcs->get_hdp_flush_req_offset(adev),
5338                               adev->nbio.funcs->get_hdp_flush_done_offset(adev),
5339                               ref_and_mask, ref_and_mask, 0x20);
5340 }
5341
5342 static void gfx_v9_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
5343                                         struct amdgpu_job *job,
5344                                         struct amdgpu_ib *ib,
5345                                         uint32_t flags)
5346 {
5347         unsigned vmid = AMDGPU_JOB_GET_VMID(job);
5348         u32 header, control = 0;
5349
5350         if (ib->flags & AMDGPU_IB_FLAG_CE)
5351                 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
5352         else
5353                 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
5354
5355         control |= ib->length_dw | (vmid << 24);
5356
5357         if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) {
5358                 control |= INDIRECT_BUFFER_PRE_ENB(1);
5359
5360                 if (!(ib->flags & AMDGPU_IB_FLAG_CE) && vmid)
5361                         gfx_v9_0_ring_emit_de_meta(ring);
5362         }
5363
5364         amdgpu_ring_write(ring, header);
5365         BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
5366         amdgpu_ring_write(ring,
5367 #ifdef __BIG_ENDIAN
5368                 (2 << 0) |
5369 #endif
5370                 lower_32_bits(ib->gpu_addr));
5371         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
5372         amdgpu_ring_write(ring, control);
5373 }
5374
5375 static void gfx_v9_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
5376                                           struct amdgpu_job *job,
5377                                           struct amdgpu_ib *ib,
5378                                           uint32_t flags)
5379 {
5380         unsigned vmid = AMDGPU_JOB_GET_VMID(job);
5381         u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
5382
5383         /* Currently, there is a high possibility to get wave ID mismatch
5384          * between ME and GDS, leading to a hw deadlock, because ME generates
5385          * different wave IDs than the GDS expects. This situation happens
5386          * randomly when at least 5 compute pipes use GDS ordered append.
5387          * The wave IDs generated by ME are also wrong after suspend/resume.
5388          * Those are probably bugs somewhere else in the kernel driver.
5389          *
5390          * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and
5391          * GDS to 0 for this ring (me/pipe).
5392          */
5393         if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) {
5394                 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
5395                 amdgpu_ring_write(ring, mmGDS_COMPUTE_MAX_WAVE_ID);
5396                 amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id);
5397         }
5398
5399         amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
5400         BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
5401         amdgpu_ring_write(ring,
5402 #ifdef __BIG_ENDIAN
5403                                 (2 << 0) |
5404 #endif
5405                                 lower_32_bits(ib->gpu_addr));
5406         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
5407         amdgpu_ring_write(ring, control);
5408 }
5409
5410 static void gfx_v9_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
5411                                      u64 seq, unsigned flags)
5412 {
5413         bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
5414         bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
5415         bool writeback = flags & AMDGPU_FENCE_FLAG_TC_WB_ONLY;
5416
5417         /* RELEASE_MEM - flush caches, send int */
5418         amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6));
5419         amdgpu_ring_write(ring, ((writeback ? (EOP_TC_WB_ACTION_EN |
5420                                                EOP_TC_NC_ACTION_EN) :
5421                                               (EOP_TCL1_ACTION_EN |
5422                                                EOP_TC_ACTION_EN |
5423                                                EOP_TC_WB_ACTION_EN |
5424                                                EOP_TC_MD_ACTION_EN)) |
5425                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
5426                                  EVENT_INDEX(5)));
5427         amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
5428
5429         /*
5430          * the address should be Qword aligned if 64bit write, Dword
5431          * aligned if only send 32bit data low (discard data high)
5432          */
5433         if (write64bit)
5434                 BUG_ON(addr & 0x7);
5435         else
5436                 BUG_ON(addr & 0x3);
5437         amdgpu_ring_write(ring, lower_32_bits(addr));
5438         amdgpu_ring_write(ring, upper_32_bits(addr));
5439         amdgpu_ring_write(ring, lower_32_bits(seq));
5440         amdgpu_ring_write(ring, upper_32_bits(seq));
5441         amdgpu_ring_write(ring, 0);
5442 }
5443
5444 static void gfx_v9_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
5445 {
5446         int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
5447         uint32_t seq = ring->fence_drv.sync_seq;
5448         uint64_t addr = ring->fence_drv.gpu_addr;
5449
5450         gfx_v9_0_wait_reg_mem(ring, usepfp, 1, 0,
5451                               lower_32_bits(addr), upper_32_bits(addr),
5452                               seq, 0xffffffff, 4);
5453 }
5454
5455 static void gfx_v9_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
5456                                         unsigned vmid, uint64_t pd_addr)
5457 {
5458         amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
5459
5460         /* compute doesn't have PFP */
5461         if (ring->funcs->type == AMDGPU_RING_TYPE_GFX) {
5462                 /* sync PFP to ME, otherwise we might get invalid PFP reads */
5463                 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
5464                 amdgpu_ring_write(ring, 0x0);
5465         }
5466 }
5467
5468 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
5469 {
5470         return ring->adev->wb.wb[ring->rptr_offs]; /* gfx9 hardware is 32bit rptr */
5471 }
5472
5473 static u64 gfx_v9_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
5474 {
5475         u64 wptr;
5476
5477         /* XXX check if swapping is necessary on BE */
5478         if (ring->use_doorbell)
5479                 wptr = atomic64_read((atomic64_t *)&ring->adev->wb.wb[ring->wptr_offs]);
5480         else
5481                 BUG();
5482         return wptr;
5483 }
5484
5485 static void gfx_v9_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
5486 {
5487         struct amdgpu_device *adev = ring->adev;
5488
5489         /* XXX check if swapping is necessary on BE */
5490         if (ring->use_doorbell) {
5491                 atomic64_set((atomic64_t *)&adev->wb.wb[ring->wptr_offs], ring->wptr);
5492                 WDOORBELL64(ring->doorbell_index, ring->wptr);
5493         } else{
5494                 BUG(); /* only DOORBELL method supported on gfx9 now */
5495         }
5496 }
5497
5498 static void gfx_v9_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
5499                                          u64 seq, unsigned int flags)
5500 {
5501         struct amdgpu_device *adev = ring->adev;
5502
5503         /* we only allocate 32bit for each seq wb address */
5504         BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
5505
5506         /* write fence seq to the "addr" */
5507         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5508         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5509                                  WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
5510         amdgpu_ring_write(ring, lower_32_bits(addr));
5511         amdgpu_ring_write(ring, upper_32_bits(addr));
5512         amdgpu_ring_write(ring, lower_32_bits(seq));
5513
5514         if (flags & AMDGPU_FENCE_FLAG_INT) {
5515                 /* set register to trigger INT */
5516                 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5517                 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5518                                          WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
5519                 amdgpu_ring_write(ring, SOC15_REG_OFFSET(GC, 0, mmCPC_INT_STATUS));
5520                 amdgpu_ring_write(ring, 0);
5521                 amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
5522         }
5523 }
5524
5525 static void gfx_v9_ring_emit_sb(struct amdgpu_ring *ring)
5526 {
5527         amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
5528         amdgpu_ring_write(ring, 0);
5529 }
5530
5531 static void gfx_v9_0_ring_emit_ce_meta(struct amdgpu_ring *ring)
5532 {
5533         struct v9_ce_ib_state ce_payload = {0};
5534         uint64_t csa_addr;
5535         int cnt;
5536
5537         cnt = (sizeof(ce_payload) >> 2) + 4 - 2;
5538         csa_addr = amdgpu_csa_vaddr(ring->adev);
5539
5540         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
5541         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
5542                                  WRITE_DATA_DST_SEL(8) |
5543                                  WR_CONFIRM) |
5544                                  WRITE_DATA_CACHE_POLICY(0));
5545         amdgpu_ring_write(ring, lower_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, ce_payload)));
5546         amdgpu_ring_write(ring, upper_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, ce_payload)));
5547         amdgpu_ring_write_multiple(ring, (void *)&ce_payload, sizeof(ce_payload) >> 2);
5548 }
5549
5550 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring)
5551 {
5552         struct v9_de_ib_state de_payload = {0};
5553         uint64_t csa_addr, gds_addr;
5554         int cnt;
5555
5556         csa_addr = amdgpu_csa_vaddr(ring->adev);
5557         gds_addr = csa_addr + 4096;
5558         de_payload.gds_backup_addrlo = lower_32_bits(gds_addr);
5559         de_payload.gds_backup_addrhi = upper_32_bits(gds_addr);
5560
5561         cnt = (sizeof(de_payload) >> 2) + 4 - 2;
5562         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
5563         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
5564                                  WRITE_DATA_DST_SEL(8) |
5565                                  WR_CONFIRM) |
5566                                  WRITE_DATA_CACHE_POLICY(0));
5567         amdgpu_ring_write(ring, lower_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, de_payload)));
5568         amdgpu_ring_write(ring, upper_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, de_payload)));
5569         amdgpu_ring_write_multiple(ring, (void *)&de_payload, sizeof(de_payload) >> 2);
5570 }
5571
5572 static void gfx_v9_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start,
5573                                    bool secure)
5574 {
5575         uint32_t v = secure ? FRAME_TMZ : 0;
5576
5577         amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0));
5578         amdgpu_ring_write(ring, v | FRAME_CMD(start ? 0 : 1));
5579 }
5580
5581 static void gfx_v9_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
5582 {
5583         uint32_t dw2 = 0;
5584
5585         if (amdgpu_sriov_vf(ring->adev))
5586                 gfx_v9_0_ring_emit_ce_meta(ring);
5587
5588         dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
5589         if (flags & AMDGPU_HAVE_CTX_SWITCH) {
5590                 /* set load_global_config & load_global_uconfig */
5591                 dw2 |= 0x8001;
5592                 /* set load_cs_sh_regs */
5593                 dw2 |= 0x01000000;
5594                 /* set load_per_context_state & load_gfx_sh_regs for GFX */
5595                 dw2 |= 0x10002;
5596
5597                 /* set load_ce_ram if preamble presented */
5598                 if (AMDGPU_PREAMBLE_IB_PRESENT & flags)
5599                         dw2 |= 0x10000000;
5600         } else {
5601                 /* still load_ce_ram if this is the first time preamble presented
5602                  * although there is no context switch happens.
5603                  */
5604                 if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags)
5605                         dw2 |= 0x10000000;
5606         }
5607
5608         amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
5609         amdgpu_ring_write(ring, dw2);
5610         amdgpu_ring_write(ring, 0);
5611 }
5612
5613 static unsigned gfx_v9_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring)
5614 {
5615         unsigned ret;
5616         amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
5617         amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
5618         amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
5619         amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */
5620         ret = ring->wptr & ring->buf_mask;
5621         amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */
5622         return ret;
5623 }
5624
5625 static void gfx_v9_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset)
5626 {
5627         unsigned cur;
5628         BUG_ON(offset > ring->buf_mask);
5629         BUG_ON(ring->ring[offset] != 0x55aa55aa);
5630
5631         cur = (ring->wptr & ring->buf_mask) - 1;
5632         if (likely(cur > offset))
5633                 ring->ring[offset] = cur - offset;
5634         else
5635                 ring->ring[offset] = (ring->ring_size>>2) - offset + cur;
5636 }
5637
5638 static void gfx_v9_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg,
5639                                     uint32_t reg_val_offs)
5640 {
5641         struct amdgpu_device *adev = ring->adev;
5642
5643         amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
5644         amdgpu_ring_write(ring, 0 |     /* src: register*/
5645                                 (5 << 8) |      /* dst: memory */
5646                                 (1 << 20));     /* write confirm */
5647         amdgpu_ring_write(ring, reg);
5648         amdgpu_ring_write(ring, 0);
5649         amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
5650                                 reg_val_offs * 4));
5651         amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
5652                                 reg_val_offs * 4));
5653 }
5654
5655 static void gfx_v9_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
5656                                     uint32_t val)
5657 {
5658         uint32_t cmd = 0;
5659
5660         switch (ring->funcs->type) {
5661         case AMDGPU_RING_TYPE_GFX:
5662                 cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM;
5663                 break;
5664         case AMDGPU_RING_TYPE_KIQ:
5665                 cmd = (1 << 16); /* no inc addr */
5666                 break;
5667         default:
5668                 cmd = WR_CONFIRM;
5669                 break;
5670         }
5671         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5672         amdgpu_ring_write(ring, cmd);
5673         amdgpu_ring_write(ring, reg);
5674         amdgpu_ring_write(ring, 0);
5675         amdgpu_ring_write(ring, val);
5676 }
5677
5678 static void gfx_v9_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
5679                                         uint32_t val, uint32_t mask)
5680 {
5681         gfx_v9_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20);
5682 }
5683
5684 static void gfx_v9_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
5685                                                   uint32_t reg0, uint32_t reg1,
5686                                                   uint32_t ref, uint32_t mask)
5687 {
5688         int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
5689         struct amdgpu_device *adev = ring->adev;
5690         bool fw_version_ok = (ring->funcs->type == AMDGPU_RING_TYPE_GFX) ?
5691                 adev->gfx.me_fw_write_wait : adev->gfx.mec_fw_write_wait;
5692
5693         if (fw_version_ok)
5694                 gfx_v9_0_wait_reg_mem(ring, usepfp, 0, 1, reg0, reg1,
5695                                       ref, mask, 0x20);
5696         else
5697                 amdgpu_ring_emit_reg_write_reg_wait_helper(ring, reg0, reg1,
5698                                                            ref, mask);
5699 }
5700
5701 static void gfx_v9_0_ring_soft_recovery(struct amdgpu_ring *ring, unsigned vmid)
5702 {
5703         struct amdgpu_device *adev = ring->adev;
5704         uint32_t value = 0;
5705
5706         value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03);
5707         value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01);
5708         value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1);
5709         value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid);
5710         WREG32_SOC15(GC, 0, mmSQ_CMD, value);
5711 }
5712
5713 static void gfx_v9_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
5714                                                  enum amdgpu_interrupt_state state)
5715 {
5716         switch (state) {
5717         case AMDGPU_IRQ_STATE_DISABLE:
5718         case AMDGPU_IRQ_STATE_ENABLE:
5719                 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5720                                TIME_STAMP_INT_ENABLE,
5721                                state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
5722                 break;
5723         default:
5724                 break;
5725         }
5726 }
5727
5728 static void gfx_v9_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
5729                                                      int me, int pipe,
5730                                                      enum amdgpu_interrupt_state state)
5731 {
5732         u32 mec_int_cntl, mec_int_cntl_reg;
5733
5734         /*
5735          * amdgpu controls only the first MEC. That's why this function only
5736          * handles the setting of interrupts for this specific MEC. All other
5737          * pipes' interrupts are set by amdkfd.
5738          */
5739
5740         if (me == 1) {
5741                 switch (pipe) {
5742                 case 0:
5743                         mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE0_INT_CNTL);
5744                         break;
5745                 case 1:
5746                         mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE1_INT_CNTL);
5747                         break;
5748                 case 2:
5749                         mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE2_INT_CNTL);
5750                         break;
5751                 case 3:
5752                         mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE3_INT_CNTL);
5753                         break;
5754                 default:
5755                         DRM_DEBUG("invalid pipe %d\n", pipe);
5756                         return;
5757                 }
5758         } else {
5759                 DRM_DEBUG("invalid me %d\n", me);
5760                 return;
5761         }
5762
5763         switch (state) {
5764         case AMDGPU_IRQ_STATE_DISABLE:
5765                 mec_int_cntl = RREG32_SOC15_IP(GC,mec_int_cntl_reg);
5766                 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
5767                                              TIME_STAMP_INT_ENABLE, 0);
5768                 WREG32_SOC15_IP(GC, mec_int_cntl_reg, mec_int_cntl);
5769                 break;
5770         case AMDGPU_IRQ_STATE_ENABLE:
5771                 mec_int_cntl = RREG32_SOC15_IP(GC, mec_int_cntl_reg);
5772                 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
5773                                              TIME_STAMP_INT_ENABLE, 1);
5774                 WREG32_SOC15_IP(GC, mec_int_cntl_reg, mec_int_cntl);
5775                 break;
5776         default:
5777                 break;
5778         }
5779 }
5780
5781 static int gfx_v9_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
5782                                              struct amdgpu_irq_src *source,
5783                                              unsigned type,
5784                                              enum amdgpu_interrupt_state state)
5785 {
5786         switch (state) {
5787         case AMDGPU_IRQ_STATE_DISABLE:
5788         case AMDGPU_IRQ_STATE_ENABLE:
5789                 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5790                                PRIV_REG_INT_ENABLE,
5791                                state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
5792                 break;
5793         default:
5794                 break;
5795         }
5796
5797         return 0;
5798 }
5799
5800 static int gfx_v9_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
5801                                               struct amdgpu_irq_src *source,
5802                                               unsigned type,
5803                                               enum amdgpu_interrupt_state state)
5804 {
5805         switch (state) {
5806         case AMDGPU_IRQ_STATE_DISABLE:
5807         case AMDGPU_IRQ_STATE_ENABLE:
5808                 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5809                                PRIV_INSTR_INT_ENABLE,
5810                                state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
5811                 break;
5812         default:
5813                 break;
5814         }
5815
5816         return 0;
5817 }
5818
5819 #define ENABLE_ECC_ON_ME_PIPE(me, pipe)                         \
5820         WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\
5821                         CP_ECC_ERROR_INT_ENABLE, 1)
5822
5823 #define DISABLE_ECC_ON_ME_PIPE(me, pipe)                        \
5824         WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\
5825                         CP_ECC_ERROR_INT_ENABLE, 0)
5826
5827 static int gfx_v9_0_set_cp_ecc_error_state(struct amdgpu_device *adev,
5828                                               struct amdgpu_irq_src *source,
5829                                               unsigned type,
5830                                               enum amdgpu_interrupt_state state)
5831 {
5832         switch (state) {
5833         case AMDGPU_IRQ_STATE_DISABLE:
5834                 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5835                                 CP_ECC_ERROR_INT_ENABLE, 0);
5836                 DISABLE_ECC_ON_ME_PIPE(1, 0);
5837                 DISABLE_ECC_ON_ME_PIPE(1, 1);
5838                 DISABLE_ECC_ON_ME_PIPE(1, 2);
5839                 DISABLE_ECC_ON_ME_PIPE(1, 3);
5840                 break;
5841
5842         case AMDGPU_IRQ_STATE_ENABLE:
5843                 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5844                                 CP_ECC_ERROR_INT_ENABLE, 1);
5845                 ENABLE_ECC_ON_ME_PIPE(1, 0);
5846                 ENABLE_ECC_ON_ME_PIPE(1, 1);
5847                 ENABLE_ECC_ON_ME_PIPE(1, 2);
5848                 ENABLE_ECC_ON_ME_PIPE(1, 3);
5849                 break;
5850         default:
5851                 break;
5852         }
5853
5854         return 0;
5855 }
5856
5857
5858 static int gfx_v9_0_set_eop_interrupt_state(struct amdgpu_device *adev,
5859                                             struct amdgpu_irq_src *src,
5860                                             unsigned type,
5861                                             enum amdgpu_interrupt_state state)
5862 {
5863         switch (type) {
5864         case AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP:
5865                 gfx_v9_0_set_gfx_eop_interrupt_state(adev, state);
5866                 break;
5867         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
5868                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
5869                 break;
5870         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
5871                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
5872                 break;
5873         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
5874                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
5875                 break;
5876         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
5877                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
5878                 break;
5879         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
5880                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
5881                 break;
5882         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
5883                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
5884                 break;
5885         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
5886                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
5887                 break;
5888         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
5889                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
5890                 break;
5891         default:
5892                 break;
5893         }
5894         return 0;
5895 }
5896
5897 static int gfx_v9_0_eop_irq(struct amdgpu_device *adev,
5898                             struct amdgpu_irq_src *source,
5899                             struct amdgpu_iv_entry *entry)
5900 {
5901         int i;
5902         u8 me_id, pipe_id, queue_id;
5903         struct amdgpu_ring *ring;
5904
5905         DRM_DEBUG("IH: CP EOP\n");
5906         me_id = (entry->ring_id & 0x0c) >> 2;
5907         pipe_id = (entry->ring_id & 0x03) >> 0;
5908         queue_id = (entry->ring_id & 0x70) >> 4;
5909
5910         switch (me_id) {
5911         case 0:
5912                 amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
5913                 break;
5914         case 1:
5915         case 2:
5916                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5917                         ring = &adev->gfx.compute_ring[i];
5918                         /* Per-queue interrupt is supported for MEC starting from VI.
5919                           * The interrupt can only be enabled/disabled per pipe instead of per queue.
5920                           */
5921                         if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
5922                                 amdgpu_fence_process(ring);
5923                 }
5924                 break;
5925         }
5926         return 0;
5927 }
5928
5929 static void gfx_v9_0_fault(struct amdgpu_device *adev,
5930                            struct amdgpu_iv_entry *entry)
5931 {
5932         u8 me_id, pipe_id, queue_id;
5933         struct amdgpu_ring *ring;
5934         int i;
5935
5936         me_id = (entry->ring_id & 0x0c) >> 2;
5937         pipe_id = (entry->ring_id & 0x03) >> 0;
5938         queue_id = (entry->ring_id & 0x70) >> 4;
5939
5940         switch (me_id) {
5941         case 0:
5942                 drm_sched_fault(&adev->gfx.gfx_ring[0].sched);
5943                 break;
5944         case 1:
5945         case 2:
5946                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5947                         ring = &adev->gfx.compute_ring[i];
5948                         if (ring->me == me_id && ring->pipe == pipe_id &&
5949                             ring->queue == queue_id)
5950                                 drm_sched_fault(&ring->sched);
5951                 }
5952                 break;
5953         }
5954 }
5955
5956 static int gfx_v9_0_priv_reg_irq(struct amdgpu_device *adev,
5957                                  struct amdgpu_irq_src *source,
5958                                  struct amdgpu_iv_entry *entry)
5959 {
5960         DRM_ERROR("Illegal register access in command stream\n");
5961         gfx_v9_0_fault(adev, entry);
5962         return 0;
5963 }
5964
5965 static int gfx_v9_0_priv_inst_irq(struct amdgpu_device *adev,
5966                                   struct amdgpu_irq_src *source,
5967                                   struct amdgpu_iv_entry *entry)
5968 {
5969         DRM_ERROR("Illegal instruction in command stream\n");
5970         gfx_v9_0_fault(adev, entry);
5971         return 0;
5972 }
5973
5974
5975 static const struct soc15_ras_field_entry gfx_v9_0_ras_fields[] = {
5976         { "CPC_SCRATCH", SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT),
5977           SOC15_REG_FIELD(CPC_EDC_SCRATCH_CNT, SEC_COUNT),
5978           SOC15_REG_FIELD(CPC_EDC_SCRATCH_CNT, DED_COUNT)
5979         },
5980         { "CPC_UCODE", SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT),
5981           SOC15_REG_FIELD(CPC_EDC_UCODE_CNT, SEC_COUNT),
5982           SOC15_REG_FIELD(CPC_EDC_UCODE_CNT, DED_COUNT)
5983         },
5984         { "CPF_ROQ_ME1", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT),
5985           SOC15_REG_FIELD(CPF_EDC_ROQ_CNT, COUNT_ME1),
5986           0, 0
5987         },
5988         { "CPF_ROQ_ME2", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT),
5989           SOC15_REG_FIELD(CPF_EDC_ROQ_CNT, COUNT_ME2),
5990           0, 0
5991         },
5992         { "CPF_TAG", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT),
5993           SOC15_REG_FIELD(CPF_EDC_TAG_CNT, SEC_COUNT),
5994           SOC15_REG_FIELD(CPF_EDC_TAG_CNT, DED_COUNT)
5995         },
5996         { "CPG_DMA_ROQ", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT),
5997           SOC15_REG_FIELD(CPG_EDC_DMA_CNT, ROQ_COUNT),
5998           0, 0
5999         },
6000         { "CPG_DMA_TAG", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT),
6001           SOC15_REG_FIELD(CPG_EDC_DMA_CNT, TAG_SEC_COUNT),
6002           SOC15_REG_FIELD(CPG_EDC_DMA_CNT, TAG_DED_COUNT)
6003         },
6004         { "CPG_TAG", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT),
6005           SOC15_REG_FIELD(CPG_EDC_TAG_CNT, SEC_COUNT),
6006           SOC15_REG_FIELD(CPG_EDC_TAG_CNT, DED_COUNT)
6007         },
6008         { "DC_CSINVOC", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT),
6009           SOC15_REG_FIELD(DC_EDC_CSINVOC_CNT, COUNT_ME1),
6010           0, 0
6011         },
6012         { "DC_RESTORE", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT),
6013           SOC15_REG_FIELD(DC_EDC_RESTORE_CNT, COUNT_ME1),
6014           0, 0
6015         },
6016         { "DC_STATE", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT),
6017           SOC15_REG_FIELD(DC_EDC_STATE_CNT, COUNT_ME1),
6018           0, 0
6019         },
6020         { "GDS_MEM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT),
6021           SOC15_REG_FIELD(GDS_EDC_CNT, GDS_MEM_SEC),
6022           SOC15_REG_FIELD(GDS_EDC_CNT, GDS_MEM_DED)
6023         },
6024         { "GDS_INPUT_QUEUE", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT),
6025           SOC15_REG_FIELD(GDS_EDC_CNT, GDS_INPUT_QUEUE_SED),
6026           0, 0
6027         },
6028         { "GDS_ME0_CS_PIPE_MEM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT),
6029           SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, ME0_CS_PIPE_MEM_SEC),
6030           SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, ME0_CS_PIPE_MEM_DED)
6031         },
6032         { "GDS_OA_PHY_PHY_CMD_RAM_MEM",
6033           SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT),
6034           SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_CMD_RAM_MEM_SEC),
6035           SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_CMD_RAM_MEM_DED)
6036         },
6037         { "GDS_OA_PHY_PHY_DATA_RAM_MEM",
6038           SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT),
6039           SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_DATA_RAM_MEM_SED),
6040           0, 0
6041         },
6042         { "GDS_OA_PIPE_ME1_PIPE0_PIPE_MEM",
6043           SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
6044           SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE0_PIPE_MEM_SEC),
6045           SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE0_PIPE_MEM_DED)
6046         },
6047         { "GDS_OA_PIPE_ME1_PIPE1_PIPE_MEM",
6048           SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
6049           SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE1_PIPE_MEM_SEC),
6050           SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE1_PIPE_MEM_DED)
6051         },
6052         { "GDS_OA_PIPE_ME1_PIPE2_PIPE_MEM",
6053           SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
6054           SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE2_PIPE_MEM_SEC),
6055           SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE2_PIPE_MEM_DED)
6056         },
6057         { "GDS_OA_PIPE_ME1_PIPE3_PIPE_MEM",
6058           SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
6059           SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE3_PIPE_MEM_SEC),
6060           SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE3_PIPE_MEM_DED)
6061         },
6062         { "SPI_SR_MEM", SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT),
6063           SOC15_REG_FIELD(SPI_EDC_CNT, SPI_SR_MEM_SED_COUNT),
6064           0, 0
6065         },
6066         { "TA_FS_DFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
6067           SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_DFIFO_SEC_COUNT),
6068           SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_DFIFO_DED_COUNT)
6069         },
6070         { "TA_FS_AFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
6071           SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_AFIFO_SED_COUNT),
6072           0, 0
6073         },
6074         { "TA_FL_LFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
6075           SOC15_REG_FIELD(TA_EDC_CNT, TA_FL_LFIFO_SED_COUNT),
6076           0, 0
6077         },
6078         { "TA_FX_LFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
6079           SOC15_REG_FIELD(TA_EDC_CNT, TA_FX_LFIFO_SED_COUNT),
6080           0, 0
6081         },
6082         { "TA_FS_CFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
6083           SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_CFIFO_SED_COUNT),
6084           0, 0
6085         },
6086         { "TCA_HOLE_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT),
6087           SOC15_REG_FIELD(TCA_EDC_CNT, HOLE_FIFO_SED_COUNT),
6088           0, 0
6089         },
6090         { "TCA_REQ_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT),
6091           SOC15_REG_FIELD(TCA_EDC_CNT, REQ_FIFO_SED_COUNT),
6092           0, 0
6093         },
6094         { "TCC_CACHE_DATA", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6095           SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DATA_SEC_COUNT),
6096           SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DATA_DED_COUNT)
6097         },
6098         { "TCC_CACHE_DIRTY", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6099           SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DIRTY_SEC_COUNT),
6100           SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DIRTY_DED_COUNT)
6101         },
6102         { "TCC_HIGH_RATE_TAG", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6103           SOC15_REG_FIELD(TCC_EDC_CNT, HIGH_RATE_TAG_SEC_COUNT),
6104           SOC15_REG_FIELD(TCC_EDC_CNT, HIGH_RATE_TAG_DED_COUNT)
6105         },
6106         { "TCC_LOW_RATE_TAG", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6107           SOC15_REG_FIELD(TCC_EDC_CNT, LOW_RATE_TAG_SEC_COUNT),
6108           SOC15_REG_FIELD(TCC_EDC_CNT, LOW_RATE_TAG_DED_COUNT)
6109         },
6110         { "TCC_SRC_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6111           SOC15_REG_FIELD(TCC_EDC_CNT, SRC_FIFO_SEC_COUNT),
6112           SOC15_REG_FIELD(TCC_EDC_CNT, SRC_FIFO_DED_COUNT)
6113         },
6114         { "TCC_IN_USE_DEC", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6115           SOC15_REG_FIELD(TCC_EDC_CNT, IN_USE_DEC_SED_COUNT),
6116           0, 0
6117         },
6118         { "TCC_IN_USE_TRANSFER", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6119           SOC15_REG_FIELD(TCC_EDC_CNT, IN_USE_TRANSFER_SED_COUNT),
6120           0, 0
6121         },
6122         { "TCC_LATENCY_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6123           SOC15_REG_FIELD(TCC_EDC_CNT, LATENCY_FIFO_SED_COUNT),
6124           0, 0
6125         },
6126         { "TCC_RETURN_DATA", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6127           SOC15_REG_FIELD(TCC_EDC_CNT, RETURN_DATA_SED_COUNT),
6128           0, 0
6129         },
6130         { "TCC_RETURN_CONTROL", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6131           SOC15_REG_FIELD(TCC_EDC_CNT, RETURN_CONTROL_SED_COUNT),
6132           0, 0
6133         },
6134         { "TCC_UC_ATOMIC_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6135           SOC15_REG_FIELD(TCC_EDC_CNT, UC_ATOMIC_FIFO_SED_COUNT),
6136           0, 0
6137         },
6138         { "TCC_WRITE_RETURN", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6139           SOC15_REG_FIELD(TCC_EDC_CNT2, WRITE_RETURN_SED_COUNT),
6140           0, 0
6141         },
6142         { "TCC_WRITE_CACHE_READ", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6143           SOC15_REG_FIELD(TCC_EDC_CNT2, WRITE_CACHE_READ_SED_COUNT),
6144           0, 0
6145         },
6146         { "TCC_SRC_FIFO_NEXT_RAM", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6147           SOC15_REG_FIELD(TCC_EDC_CNT2, SRC_FIFO_NEXT_RAM_SED_COUNT),
6148           0, 0
6149         },
6150         { "TCC_LATENCY_FIFO_NEXT_RAM", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6151           SOC15_REG_FIELD(TCC_EDC_CNT2, LATENCY_FIFO_NEXT_RAM_SED_COUNT),
6152           0, 0
6153         },
6154         { "TCC_CACHE_TAG_PROBE_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6155           SOC15_REG_FIELD(TCC_EDC_CNT2, CACHE_TAG_PROBE_FIFO_SED_COUNT),
6156           0, 0
6157         },
6158         { "TCC_WRRET_TAG_WRITE_RETURN", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6159           SOC15_REG_FIELD(TCC_EDC_CNT2, WRRET_TAG_WRITE_RETURN_SED_COUNT),
6160           0, 0
6161         },
6162         { "TCC_ATOMIC_RETURN_BUFFER", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6163           SOC15_REG_FIELD(TCC_EDC_CNT2, ATOMIC_RETURN_BUFFER_SED_COUNT),
6164           0, 0
6165         },
6166         { "TCI_WRITE_RAM", SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT),
6167           SOC15_REG_FIELD(TCI_EDC_CNT, WRITE_RAM_SED_COUNT),
6168           0, 0
6169         },
6170         { "TCP_CACHE_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6171           SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CACHE_RAM_SEC_COUNT),
6172           SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CACHE_RAM_DED_COUNT)
6173         },
6174         { "TCP_LFIFO_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6175           SOC15_REG_FIELD(TCP_EDC_CNT_NEW, LFIFO_RAM_SEC_COUNT),
6176           SOC15_REG_FIELD(TCP_EDC_CNT_NEW, LFIFO_RAM_DED_COUNT)
6177         },
6178         { "TCP_CMD_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6179           SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CMD_FIFO_SED_COUNT),
6180           0, 0
6181         },
6182         { "TCP_VM_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6183           SOC15_REG_FIELD(TCP_EDC_CNT_NEW, VM_FIFO_SEC_COUNT),
6184           0, 0
6185         },
6186         { "TCP_DB_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6187           SOC15_REG_FIELD(TCP_EDC_CNT_NEW, DB_RAM_SED_COUNT),
6188           0, 0
6189         },
6190         { "TCP_UTCL1_LFIFO0", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6191           SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO0_SEC_COUNT),
6192           SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO0_DED_COUNT)
6193         },
6194         { "TCP_UTCL1_LFIFO1", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6195           SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO1_SEC_COUNT),
6196           SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO1_DED_COUNT)
6197         },
6198         { "TD_SS_FIFO_LO", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT),
6199           SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_LO_SEC_COUNT),
6200           SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_LO_DED_COUNT)
6201         },
6202         { "TD_SS_FIFO_HI", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT),
6203           SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_HI_SEC_COUNT),
6204           SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_HI_DED_COUNT)
6205         },
6206         { "TD_CS_FIFO", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT),
6207           SOC15_REG_FIELD(TD_EDC_CNT, CS_FIFO_SED_COUNT),
6208           0, 0
6209         },
6210         { "SQ_LDS_D", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6211           SOC15_REG_FIELD(SQ_EDC_CNT, LDS_D_SEC_COUNT),
6212           SOC15_REG_FIELD(SQ_EDC_CNT, LDS_D_DED_COUNT)
6213         },
6214         { "SQ_LDS_I", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6215           SOC15_REG_FIELD(SQ_EDC_CNT, LDS_I_SEC_COUNT),
6216           SOC15_REG_FIELD(SQ_EDC_CNT, LDS_I_DED_COUNT)
6217         },
6218         { "SQ_SGPR", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6219           SOC15_REG_FIELD(SQ_EDC_CNT, SGPR_SEC_COUNT),
6220           SOC15_REG_FIELD(SQ_EDC_CNT, SGPR_DED_COUNT)
6221         },
6222         { "SQ_VGPR0", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6223           SOC15_REG_FIELD(SQ_EDC_CNT, VGPR0_SEC_COUNT),
6224           SOC15_REG_FIELD(SQ_EDC_CNT, VGPR0_DED_COUNT)
6225         },
6226         { "SQ_VGPR1", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6227           SOC15_REG_FIELD(SQ_EDC_CNT, VGPR1_SEC_COUNT),
6228           SOC15_REG_FIELD(SQ_EDC_CNT, VGPR1_DED_COUNT)
6229         },
6230         { "SQ_VGPR2", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6231           SOC15_REG_FIELD(SQ_EDC_CNT, VGPR2_SEC_COUNT),
6232           SOC15_REG_FIELD(SQ_EDC_CNT, VGPR2_DED_COUNT)
6233         },
6234         { "SQ_VGPR3", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6235           SOC15_REG_FIELD(SQ_EDC_CNT, VGPR3_SEC_COUNT),
6236           SOC15_REG_FIELD(SQ_EDC_CNT, VGPR3_DED_COUNT)
6237         },
6238         { "SQC_DATA_CU0_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6239           SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_WRITE_DATA_BUF_SEC_COUNT),
6240           SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_WRITE_DATA_BUF_DED_COUNT)
6241         },
6242         { "SQC_DATA_CU0_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6243           SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_UTCL1_LFIFO_SEC_COUNT),
6244           SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_UTCL1_LFIFO_DED_COUNT)
6245         },
6246         { "SQC_DATA_CU1_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6247           SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_WRITE_DATA_BUF_SEC_COUNT),
6248           SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_WRITE_DATA_BUF_DED_COUNT)
6249         },
6250         { "SQC_DATA_CU1_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6251           SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_UTCL1_LFIFO_SEC_COUNT),
6252           SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_UTCL1_LFIFO_DED_COUNT)
6253         },
6254         { "SQC_DATA_CU2_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6255           SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_WRITE_DATA_BUF_SEC_COUNT),
6256           SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_WRITE_DATA_BUF_DED_COUNT)
6257         },
6258         { "SQC_DATA_CU2_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6259           SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_UTCL1_LFIFO_SEC_COUNT),
6260           SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_UTCL1_LFIFO_DED_COUNT)
6261         },
6262         { "SQC_INST_BANKA_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6263           SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_TAG_RAM_SEC_COUNT),
6264           SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_TAG_RAM_DED_COUNT)
6265         },
6266         { "SQC_INST_BANKA_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6267           SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_BANK_RAM_SEC_COUNT),
6268           SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_BANK_RAM_DED_COUNT)
6269         },
6270         { "SQC_DATA_BANKA_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6271           SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_TAG_RAM_SEC_COUNT),
6272           SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_TAG_RAM_DED_COUNT)
6273         },
6274         { "SQC_DATA_BANKA_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6275           SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_BANK_RAM_SEC_COUNT),
6276           SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_BANK_RAM_DED_COUNT)
6277         },
6278         { "SQC_INST_BANKA_UTCL1_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6279           SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_UTCL1_MISS_FIFO_SED_COUNT),
6280           0, 0
6281         },
6282         { "SQC_INST_BANKA_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6283           SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_MISS_FIFO_SED_COUNT),
6284           0, 0
6285         },
6286         { "SQC_DATA_BANKA_HIT_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6287           SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_HIT_FIFO_SED_COUNT),
6288           0, 0
6289         },
6290         { "SQC_DATA_BANKA_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6291           SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_MISS_FIFO_SED_COUNT),
6292           0, 0
6293         },
6294         { "SQC_DATA_BANKA_DIRTY_BIT_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6295           SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_DIRTY_BIT_RAM_SED_COUNT),
6296           0, 0
6297         },
6298         { "SQC_INST_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6299           SOC15_REG_FIELD(SQC_EDC_CNT2, INST_UTCL1_LFIFO_SEC_COUNT),
6300           SOC15_REG_FIELD(SQC_EDC_CNT2, INST_UTCL1_LFIFO_DED_COUNT)
6301         },
6302         { "SQC_INST_BANKB_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6303           SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_TAG_RAM_SEC_COUNT),
6304           SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_TAG_RAM_DED_COUNT)
6305         },
6306         { "SQC_INST_BANKB_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6307           SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_BANK_RAM_SEC_COUNT),
6308           SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_BANK_RAM_DED_COUNT)
6309         },
6310         { "SQC_DATA_BANKB_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6311           SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_TAG_RAM_SEC_COUNT),
6312           SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_TAG_RAM_DED_COUNT)
6313         },
6314         { "SQC_DATA_BANKB_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6315           SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_BANK_RAM_SEC_COUNT),
6316           SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_BANK_RAM_DED_COUNT)
6317         },
6318         { "SQC_INST_BANKB_UTCL1_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6319           SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_UTCL1_MISS_FIFO_SED_COUNT),
6320           0, 0
6321         },
6322         { "SQC_INST_BANKB_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6323           SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_MISS_FIFO_SED_COUNT),
6324           0, 0
6325         },
6326         { "SQC_DATA_BANKB_HIT_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6327           SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_HIT_FIFO_SED_COUNT),
6328           0, 0
6329         },
6330         { "SQC_DATA_BANKB_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6331           SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_MISS_FIFO_SED_COUNT),
6332           0, 0
6333         },
6334         { "SQC_DATA_BANKB_DIRTY_BIT_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6335           SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_DIRTY_BIT_RAM_SED_COUNT),
6336           0, 0
6337         },
6338         { "EA_DRAMRD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6339           SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_CMDMEM_SEC_COUNT),
6340           SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_CMDMEM_DED_COUNT)
6341         },
6342         { "EA_DRAMWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6343           SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_CMDMEM_SEC_COUNT),
6344           SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_CMDMEM_DED_COUNT)
6345         },
6346         { "EA_DRAMWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6347           SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_DATAMEM_SEC_COUNT),
6348           SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_DATAMEM_DED_COUNT)
6349         },
6350         { "EA_RRET_TAGMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6351           SOC15_REG_FIELD(GCEA_EDC_CNT, RRET_TAGMEM_SEC_COUNT),
6352           SOC15_REG_FIELD(GCEA_EDC_CNT, RRET_TAGMEM_DED_COUNT)
6353         },
6354         { "EA_WRET_TAGMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6355           SOC15_REG_FIELD(GCEA_EDC_CNT, WRET_TAGMEM_SEC_COUNT),
6356           SOC15_REG_FIELD(GCEA_EDC_CNT, WRET_TAGMEM_DED_COUNT)
6357         },
6358         { "EA_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6359           SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_PAGEMEM_SED_COUNT),
6360           0, 0
6361         },
6362         { "EA_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6363           SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_PAGEMEM_SED_COUNT),
6364           0, 0
6365         },
6366         { "EA_IORD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6367           SOC15_REG_FIELD(GCEA_EDC_CNT, IORD_CMDMEM_SED_COUNT),
6368           0, 0
6369         },
6370         { "EA_IOWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6371           SOC15_REG_FIELD(GCEA_EDC_CNT, IOWR_CMDMEM_SED_COUNT),
6372           0, 0
6373         },
6374         { "EA_IOWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6375           SOC15_REG_FIELD(GCEA_EDC_CNT, IOWR_DATAMEM_SED_COUNT),
6376           0, 0
6377         },
6378         { "GMIRD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6379           SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_CMDMEM_SEC_COUNT),
6380           SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_CMDMEM_DED_COUNT)
6381         },
6382         { "GMIWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6383           SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_CMDMEM_SEC_COUNT),
6384           SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_CMDMEM_DED_COUNT)
6385         },
6386         { "GMIWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6387           SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_DATAMEM_SEC_COUNT),
6388           SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_DATAMEM_DED_COUNT)
6389         },
6390         { "GMIRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6391           SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_PAGEMEM_SED_COUNT),
6392           0, 0
6393         },
6394         { "GMIWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6395           SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_PAGEMEM_SED_COUNT),
6396           0, 0
6397         },
6398         { "MAM_D0MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6399           SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D0MEM_SED_COUNT),
6400           0, 0
6401         },
6402         { "MAM_D1MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6403           SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D1MEM_SED_COUNT),
6404           0, 0
6405         },
6406         { "MAM_D2MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6407           SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D2MEM_SED_COUNT),
6408           0, 0
6409         },
6410         { "MAM_D3MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6411           SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D3MEM_SED_COUNT),
6412           0, 0
6413         }
6414 };
6415
6416 static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev,
6417                                      void *inject_if)
6418 {
6419         struct ras_inject_if *info = (struct ras_inject_if *)inject_if;
6420         int ret;
6421         struct ta_ras_trigger_error_input block_info = { 0 };
6422
6423         if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
6424                 return -EINVAL;
6425
6426         if (info->head.sub_block_index >= ARRAY_SIZE(ras_gfx_subblocks))
6427                 return -EINVAL;
6428
6429         if (!ras_gfx_subblocks[info->head.sub_block_index].name)
6430                 return -EPERM;
6431
6432         if (!(ras_gfx_subblocks[info->head.sub_block_index].hw_supported_error_type &
6433               info->head.type)) {
6434                 DRM_ERROR("GFX Subblock %s, hardware do not support type 0x%x\n",
6435                         ras_gfx_subblocks[info->head.sub_block_index].name,
6436                         info->head.type);
6437                 return -EPERM;
6438         }
6439
6440         if (!(ras_gfx_subblocks[info->head.sub_block_index].sw_supported_error_type &
6441               info->head.type)) {
6442                 DRM_ERROR("GFX Subblock %s, driver do not support type 0x%x\n",
6443                         ras_gfx_subblocks[info->head.sub_block_index].name,
6444                         info->head.type);
6445                 return -EPERM;
6446         }
6447
6448         block_info.block_id = amdgpu_ras_block_to_ta(info->head.block);
6449         block_info.sub_block_index =
6450                 ras_gfx_subblocks[info->head.sub_block_index].ta_subblock;
6451         block_info.inject_error_type = amdgpu_ras_error_to_ta(info->head.type);
6452         block_info.address = info->address;
6453         block_info.value = info->value;
6454
6455         mutex_lock(&adev->grbm_idx_mutex);
6456         ret = psp_ras_trigger_error(&adev->psp, &block_info);
6457         mutex_unlock(&adev->grbm_idx_mutex);
6458
6459         return ret;
6460 }
6461
6462 static const char *vml2_mems[] = {
6463         "UTC_VML2_BANK_CACHE_0_BIGK_MEM0",
6464         "UTC_VML2_BANK_CACHE_0_BIGK_MEM1",
6465         "UTC_VML2_BANK_CACHE_0_4K_MEM0",
6466         "UTC_VML2_BANK_CACHE_0_4K_MEM1",
6467         "UTC_VML2_BANK_CACHE_1_BIGK_MEM0",
6468         "UTC_VML2_BANK_CACHE_1_BIGK_MEM1",
6469         "UTC_VML2_BANK_CACHE_1_4K_MEM0",
6470         "UTC_VML2_BANK_CACHE_1_4K_MEM1",
6471         "UTC_VML2_BANK_CACHE_2_BIGK_MEM0",
6472         "UTC_VML2_BANK_CACHE_2_BIGK_MEM1",
6473         "UTC_VML2_BANK_CACHE_2_4K_MEM0",
6474         "UTC_VML2_BANK_CACHE_2_4K_MEM1",
6475         "UTC_VML2_BANK_CACHE_3_BIGK_MEM0",
6476         "UTC_VML2_BANK_CACHE_3_BIGK_MEM1",
6477         "UTC_VML2_BANK_CACHE_3_4K_MEM0",
6478         "UTC_VML2_BANK_CACHE_3_4K_MEM1",
6479 };
6480
6481 static const char *vml2_walker_mems[] = {
6482         "UTC_VML2_CACHE_PDE0_MEM0",
6483         "UTC_VML2_CACHE_PDE0_MEM1",
6484         "UTC_VML2_CACHE_PDE1_MEM0",
6485         "UTC_VML2_CACHE_PDE1_MEM1",
6486         "UTC_VML2_CACHE_PDE2_MEM0",
6487         "UTC_VML2_CACHE_PDE2_MEM1",
6488         "UTC_VML2_RDIF_LOG_FIFO",
6489 };
6490
6491 static const char *atc_l2_cache_2m_mems[] = {
6492         "UTC_ATCL2_CACHE_2M_BANK0_WAY0_MEM",
6493         "UTC_ATCL2_CACHE_2M_BANK0_WAY1_MEM",
6494         "UTC_ATCL2_CACHE_2M_BANK1_WAY0_MEM",
6495         "UTC_ATCL2_CACHE_2M_BANK1_WAY1_MEM",
6496 };
6497
6498 static const char *atc_l2_cache_4k_mems[] = {
6499         "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM0",
6500         "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM1",
6501         "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM2",
6502         "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM3",
6503         "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM4",
6504         "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM5",
6505         "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM6",
6506         "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM7",
6507         "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM0",
6508         "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM1",
6509         "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM2",
6510         "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM3",
6511         "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM4",
6512         "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM5",
6513         "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM6",
6514         "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM7",
6515         "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM0",
6516         "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM1",
6517         "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM2",
6518         "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM3",
6519         "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM4",
6520         "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM5",
6521         "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM6",
6522         "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM7",
6523         "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM0",
6524         "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM1",
6525         "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM2",
6526         "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM3",
6527         "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM4",
6528         "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM5",
6529         "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM6",
6530         "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM7",
6531 };
6532
6533 static int gfx_v9_0_query_utc_edc_status(struct amdgpu_device *adev,
6534                                          struct ras_err_data *err_data)
6535 {
6536         uint32_t i, data;
6537         uint32_t sec_count, ded_count;
6538
6539         WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
6540         WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT, 0);
6541         WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
6542         WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT, 0);
6543         WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
6544         WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT, 0);
6545         WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
6546         WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT, 0);
6547
6548         for (i = 0; i < ARRAY_SIZE(vml2_mems); i++) {
6549                 WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, i);
6550                 data = RREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT);
6551
6552                 sec_count = REG_GET_FIELD(data, VM_L2_MEM_ECC_CNT, SEC_COUNT);
6553                 if (sec_count) {
6554                         dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6555                                 "SEC %d\n", i, vml2_mems[i], sec_count);
6556                         err_data->ce_count += sec_count;
6557                 }
6558
6559                 ded_count = REG_GET_FIELD(data, VM_L2_MEM_ECC_CNT, DED_COUNT);
6560                 if (ded_count) {
6561                         dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6562                                 "DED %d\n", i, vml2_mems[i], ded_count);
6563                         err_data->ue_count += ded_count;
6564                 }
6565         }
6566
6567         for (i = 0; i < ARRAY_SIZE(vml2_walker_mems); i++) {
6568                 WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, i);
6569                 data = RREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT);
6570
6571                 sec_count = REG_GET_FIELD(data, VM_L2_WALKER_MEM_ECC_CNT,
6572                                                 SEC_COUNT);
6573                 if (sec_count) {
6574                         dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6575                                 "SEC %d\n", i, vml2_walker_mems[i], sec_count);
6576                         err_data->ce_count += sec_count;
6577                 }
6578
6579                 ded_count = REG_GET_FIELD(data, VM_L2_WALKER_MEM_ECC_CNT,
6580                                                 DED_COUNT);
6581                 if (ded_count) {
6582                         dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6583                                 "DED %d\n", i, vml2_walker_mems[i], ded_count);
6584                         err_data->ue_count += ded_count;
6585                 }
6586         }
6587
6588         for (i = 0; i < ARRAY_SIZE(atc_l2_cache_2m_mems); i++) {
6589                 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, i);
6590                 data = RREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT);
6591
6592                 sec_count = (data & 0x00006000L) >> 0xd;
6593                 if (sec_count) {
6594                         dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6595                                 "SEC %d\n", i, atc_l2_cache_2m_mems[i],
6596                                 sec_count);
6597                         err_data->ce_count += sec_count;
6598                 }
6599         }
6600
6601         for (i = 0; i < ARRAY_SIZE(atc_l2_cache_4k_mems); i++) {
6602                 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, i);
6603                 data = RREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT);
6604
6605                 sec_count = (data & 0x00006000L) >> 0xd;
6606                 if (sec_count) {
6607                         dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6608                                 "SEC %d\n", i, atc_l2_cache_4k_mems[i],
6609                                 sec_count);
6610                         err_data->ce_count += sec_count;
6611                 }
6612
6613                 ded_count = (data & 0x00018000L) >> 0xf;
6614                 if (ded_count) {
6615                         dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6616                                 "DED %d\n", i, atc_l2_cache_4k_mems[i],
6617                                 ded_count);
6618                         err_data->ue_count += ded_count;
6619                 }
6620         }
6621
6622         WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
6623         WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
6624         WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
6625         WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
6626
6627         return 0;
6628 }
6629
6630 static int gfx_v9_0_ras_error_count(struct amdgpu_device *adev,
6631         const struct soc15_reg_entry *reg,
6632         uint32_t se_id, uint32_t inst_id, uint32_t value,
6633         uint32_t *sec_count, uint32_t *ded_count)
6634 {
6635         uint32_t i;
6636         uint32_t sec_cnt, ded_cnt;
6637
6638         for (i = 0; i < ARRAY_SIZE(gfx_v9_0_ras_fields); i++) {
6639                 if(gfx_v9_0_ras_fields[i].reg_offset != reg->reg_offset ||
6640                         gfx_v9_0_ras_fields[i].seg != reg->seg ||
6641                         gfx_v9_0_ras_fields[i].inst != reg->inst)
6642                         continue;
6643
6644                 sec_cnt = (value &
6645                                 gfx_v9_0_ras_fields[i].sec_count_mask) >>
6646                                 gfx_v9_0_ras_fields[i].sec_count_shift;
6647                 if (sec_cnt) {
6648                         dev_info(adev->dev, "GFX SubBlock %s, "
6649                                 "Instance[%d][%d], SEC %d\n",
6650                                 gfx_v9_0_ras_fields[i].name,
6651                                 se_id, inst_id,
6652                                 sec_cnt);
6653                         *sec_count += sec_cnt;
6654                 }
6655
6656                 ded_cnt = (value &
6657                                 gfx_v9_0_ras_fields[i].ded_count_mask) >>
6658                                 gfx_v9_0_ras_fields[i].ded_count_shift;
6659                 if (ded_cnt) {
6660                         dev_info(adev->dev, "GFX SubBlock %s, "
6661                                 "Instance[%d][%d], DED %d\n",
6662                                 gfx_v9_0_ras_fields[i].name,
6663                                 se_id, inst_id,
6664                                 ded_cnt);
6665                         *ded_count += ded_cnt;
6666                 }
6667         }
6668
6669         return 0;
6670 }
6671
6672 static void gfx_v9_0_reset_ras_error_count(struct amdgpu_device *adev)
6673 {
6674         int i, j, k;
6675
6676         if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
6677                 return;
6678
6679         /* read back registers to clear the counters */
6680         mutex_lock(&adev->grbm_idx_mutex);
6681         for (i = 0; i < ARRAY_SIZE(gfx_v9_0_edc_counter_regs); i++) {
6682                 for (j = 0; j < gfx_v9_0_edc_counter_regs[i].se_num; j++) {
6683                         for (k = 0; k < gfx_v9_0_edc_counter_regs[i].instance; k++) {
6684                                 gfx_v9_0_select_se_sh(adev, j, 0x0, k);
6685                                 RREG32(SOC15_REG_ENTRY_OFFSET(gfx_v9_0_edc_counter_regs[i]));
6686                         }
6687                 }
6688         }
6689         WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, 0xe0000000);
6690         mutex_unlock(&adev->grbm_idx_mutex);
6691
6692         WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
6693         WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT, 0);
6694         WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
6695         WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT, 0);
6696         WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
6697         WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT, 0);
6698         WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
6699         WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT, 0);
6700
6701         for (i = 0; i < ARRAY_SIZE(vml2_mems); i++) {
6702                 WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, i);
6703                 RREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT);
6704         }
6705
6706         for (i = 0; i < ARRAY_SIZE(vml2_walker_mems); i++) {
6707                 WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, i);
6708                 RREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT);
6709         }
6710
6711         for (i = 0; i < ARRAY_SIZE(atc_l2_cache_2m_mems); i++) {
6712                 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, i);
6713                 RREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT);
6714         }
6715
6716         for (i = 0; i < ARRAY_SIZE(atc_l2_cache_4k_mems); i++) {
6717                 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, i);
6718                 RREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT);
6719         }
6720
6721         WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
6722         WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
6723         WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
6724         WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
6725 }
6726
6727 static void gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev,
6728                                           void *ras_error_status)
6729 {
6730         struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
6731         uint32_t sec_count = 0, ded_count = 0;
6732         uint32_t i, j, k;
6733         uint32_t reg_value;
6734
6735         if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
6736                 return;
6737
6738         err_data->ue_count = 0;
6739         err_data->ce_count = 0;
6740
6741         mutex_lock(&adev->grbm_idx_mutex);
6742
6743         for (i = 0; i < ARRAY_SIZE(gfx_v9_0_edc_counter_regs); i++) {
6744                 for (j = 0; j < gfx_v9_0_edc_counter_regs[i].se_num; j++) {
6745                         for (k = 0; k < gfx_v9_0_edc_counter_regs[i].instance; k++) {
6746                                 gfx_v9_0_select_se_sh(adev, j, 0, k);
6747                                 reg_value =
6748                                         RREG32(SOC15_REG_ENTRY_OFFSET(gfx_v9_0_edc_counter_regs[i]));
6749                                 if (reg_value)
6750                                         gfx_v9_0_ras_error_count(adev,
6751                                                 &gfx_v9_0_edc_counter_regs[i],
6752                                                 j, k, reg_value,
6753                                                 &sec_count, &ded_count);
6754                         }
6755                 }
6756         }
6757
6758         err_data->ce_count += sec_count;
6759         err_data->ue_count += ded_count;
6760
6761         gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
6762         mutex_unlock(&adev->grbm_idx_mutex);
6763
6764         gfx_v9_0_query_utc_edc_status(adev, err_data);
6765 }
6766
6767 static void gfx_v9_0_emit_mem_sync(struct amdgpu_ring *ring)
6768 {
6769         const unsigned int cp_coher_cntl =
6770                         PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_SH_ICACHE_ACTION_ENA(1) |
6771                         PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_SH_KCACHE_ACTION_ENA(1) |
6772                         PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TC_ACTION_ENA(1) |
6773                         PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TCL1_ACTION_ENA(1) |
6774                         PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TC_WB_ACTION_ENA(1);
6775
6776         /* ACQUIRE_MEM -make one or more surfaces valid for use by the subsequent operations */
6777         amdgpu_ring_write(ring, PACKET3(PACKET3_ACQUIRE_MEM, 5));
6778         amdgpu_ring_write(ring, cp_coher_cntl); /* CP_COHER_CNTL */
6779         amdgpu_ring_write(ring, 0xffffffff);  /* CP_COHER_SIZE */
6780         amdgpu_ring_write(ring, 0xffffff);  /* CP_COHER_SIZE_HI */
6781         amdgpu_ring_write(ring, 0); /* CP_COHER_BASE */
6782         amdgpu_ring_write(ring, 0);  /* CP_COHER_BASE_HI */
6783         amdgpu_ring_write(ring, 0x0000000A); /* POLL_INTERVAL */
6784 }
6785
6786 static void gfx_v9_0_emit_wave_limit_cs(struct amdgpu_ring *ring,
6787                                         uint32_t pipe, bool enable)
6788 {
6789         struct amdgpu_device *adev = ring->adev;
6790         uint32_t val;
6791         uint32_t wcl_cs_reg;
6792
6793         /* mmSPI_WCL_PIPE_PERCENT_CS[0-7]_DEFAULT values are same */
6794         val = enable ? 0x1 : mmSPI_WCL_PIPE_PERCENT_CS0_DEFAULT;
6795
6796         switch (pipe) {
6797         case 0:
6798                 wcl_cs_reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_CS0);
6799                 break;
6800         case 1:
6801                 wcl_cs_reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_CS1);
6802                 break;
6803         case 2:
6804                 wcl_cs_reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_CS2);
6805                 break;
6806         case 3:
6807                 wcl_cs_reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_CS3);
6808                 break;
6809         default:
6810                 DRM_DEBUG("invalid pipe %d\n", pipe);
6811                 return;
6812         }
6813
6814         amdgpu_ring_emit_wreg(ring, wcl_cs_reg, val);
6815
6816 }
6817 static void gfx_v9_0_emit_wave_limit(struct amdgpu_ring *ring, bool enable)
6818 {
6819         struct amdgpu_device *adev = ring->adev;
6820         uint32_t val;
6821         int i;
6822
6823
6824         /* mmSPI_WCL_PIPE_PERCENT_GFX is 7 bit multiplier register to limit
6825          * number of gfx waves. Setting 5 bit will make sure gfx only gets
6826          * around 25% of gpu resources.
6827          */
6828         val = enable ? 0x1f : mmSPI_WCL_PIPE_PERCENT_GFX_DEFAULT;
6829         amdgpu_ring_emit_wreg(ring,
6830                               SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_GFX),
6831                               val);
6832
6833         /* Restrict waves for normal/low priority compute queues as well
6834          * to get best QoS for high priority compute jobs.
6835          *
6836          * amdgpu controls only 1st ME(0-3 CS pipes).
6837          */
6838         for (i = 0; i < adev->gfx.mec.num_pipe_per_mec; i++) {
6839                 if (i != ring->pipe)
6840                         gfx_v9_0_emit_wave_limit_cs(ring, i, enable);
6841
6842         }
6843 }
6844
6845 static const struct amd_ip_funcs gfx_v9_0_ip_funcs = {
6846         .name = "gfx_v9_0",
6847         .early_init = gfx_v9_0_early_init,
6848         .late_init = gfx_v9_0_late_init,
6849         .sw_init = gfx_v9_0_sw_init,
6850         .sw_fini = gfx_v9_0_sw_fini,
6851         .hw_init = gfx_v9_0_hw_init,
6852         .hw_fini = gfx_v9_0_hw_fini,
6853         .suspend = gfx_v9_0_suspend,
6854         .resume = gfx_v9_0_resume,
6855         .is_idle = gfx_v9_0_is_idle,
6856         .wait_for_idle = gfx_v9_0_wait_for_idle,
6857         .soft_reset = gfx_v9_0_soft_reset,
6858         .set_clockgating_state = gfx_v9_0_set_clockgating_state,
6859         .set_powergating_state = gfx_v9_0_set_powergating_state,
6860         .get_clockgating_state = gfx_v9_0_get_clockgating_state,
6861 };
6862
6863 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = {
6864         .type = AMDGPU_RING_TYPE_GFX,
6865         .align_mask = 0xff,
6866         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6867         .support_64bit_ptrs = true,
6868         .secure_submission_supported = true,
6869         .vmhub = AMDGPU_GFXHUB_0,
6870         .get_rptr = gfx_v9_0_ring_get_rptr_gfx,
6871         .get_wptr = gfx_v9_0_ring_get_wptr_gfx,
6872         .set_wptr = gfx_v9_0_ring_set_wptr_gfx,
6873         .emit_frame_size = /* totally 242 maximum if 16 IBs */
6874                 5 +  /* COND_EXEC */
6875                 7 +  /* PIPELINE_SYNC */
6876                 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
6877                 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
6878                 2 + /* VM_FLUSH */
6879                 8 +  /* FENCE for VM_FLUSH */
6880                 20 + /* GDS switch */
6881                 4 + /* double SWITCH_BUFFER,
6882                        the first COND_EXEC jump to the place just
6883                            prior to this double SWITCH_BUFFER  */
6884                 5 + /* COND_EXEC */
6885                 7 +      /*     HDP_flush */
6886                 4 +      /*     VGT_flush */
6887                 14 + /* CE_META */
6888                 31 + /* DE_META */
6889                 3 + /* CNTX_CTRL */
6890                 5 + /* HDP_INVL */
6891                 8 + 8 + /* FENCE x2 */
6892                 2 + /* SWITCH_BUFFER */
6893                 7, /* gfx_v9_0_emit_mem_sync */
6894         .emit_ib_size = 4, /* gfx_v9_0_ring_emit_ib_gfx */
6895         .emit_ib = gfx_v9_0_ring_emit_ib_gfx,
6896         .emit_fence = gfx_v9_0_ring_emit_fence,
6897         .emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
6898         .emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
6899         .emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
6900         .emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
6901         .test_ring = gfx_v9_0_ring_test_ring,
6902         .test_ib = gfx_v9_0_ring_test_ib,
6903         .insert_nop = amdgpu_ring_insert_nop,
6904         .pad_ib = amdgpu_ring_generic_pad_ib,
6905         .emit_switch_buffer = gfx_v9_ring_emit_sb,
6906         .emit_cntxcntl = gfx_v9_ring_emit_cntxcntl,
6907         .init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec,
6908         .patch_cond_exec = gfx_v9_0_ring_emit_patch_cond_exec,
6909         .emit_frame_cntl = gfx_v9_0_ring_emit_frame_cntl,
6910         .emit_wreg = gfx_v9_0_ring_emit_wreg,
6911         .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
6912         .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
6913         .soft_recovery = gfx_v9_0_ring_soft_recovery,
6914         .emit_mem_sync = gfx_v9_0_emit_mem_sync,
6915 };
6916
6917 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = {
6918         .type = AMDGPU_RING_TYPE_COMPUTE,
6919         .align_mask = 0xff,
6920         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6921         .support_64bit_ptrs = true,
6922         .vmhub = AMDGPU_GFXHUB_0,
6923         .get_rptr = gfx_v9_0_ring_get_rptr_compute,
6924         .get_wptr = gfx_v9_0_ring_get_wptr_compute,
6925         .set_wptr = gfx_v9_0_ring_set_wptr_compute,
6926         .emit_frame_size =
6927                 20 + /* gfx_v9_0_ring_emit_gds_switch */
6928                 7 + /* gfx_v9_0_ring_emit_hdp_flush */
6929                 5 + /* hdp invalidate */
6930                 7 + /* gfx_v9_0_ring_emit_pipeline_sync */
6931                 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
6932                 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
6933                 2 + /* gfx_v9_0_ring_emit_vm_flush */
6934                 8 + 8 + 8 + /* gfx_v9_0_ring_emit_fence x3 for user fence, vm fence */
6935                 7 + /* gfx_v9_0_emit_mem_sync */
6936                 5 + /* gfx_v9_0_emit_wave_limit for updating mmSPI_WCL_PIPE_PERCENT_GFX register */
6937                 15, /* for updating 3 mmSPI_WCL_PIPE_PERCENT_CS registers */
6938         .emit_ib_size = 7, /* gfx_v9_0_ring_emit_ib_compute */
6939         .emit_ib = gfx_v9_0_ring_emit_ib_compute,
6940         .emit_fence = gfx_v9_0_ring_emit_fence,
6941         .emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
6942         .emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
6943         .emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
6944         .emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
6945         .test_ring = gfx_v9_0_ring_test_ring,
6946         .test_ib = gfx_v9_0_ring_test_ib,
6947         .insert_nop = amdgpu_ring_insert_nop,
6948         .pad_ib = amdgpu_ring_generic_pad_ib,
6949         .emit_wreg = gfx_v9_0_ring_emit_wreg,
6950         .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
6951         .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
6952         .emit_mem_sync = gfx_v9_0_emit_mem_sync,
6953         .emit_wave_limit = gfx_v9_0_emit_wave_limit,
6954 };
6955
6956 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = {
6957         .type = AMDGPU_RING_TYPE_KIQ,
6958         .align_mask = 0xff,
6959         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6960         .support_64bit_ptrs = true,
6961         .vmhub = AMDGPU_GFXHUB_0,
6962         .get_rptr = gfx_v9_0_ring_get_rptr_compute,
6963         .get_wptr = gfx_v9_0_ring_get_wptr_compute,
6964         .set_wptr = gfx_v9_0_ring_set_wptr_compute,
6965         .emit_frame_size =
6966                 20 + /* gfx_v9_0_ring_emit_gds_switch */
6967                 7 + /* gfx_v9_0_ring_emit_hdp_flush */
6968                 5 + /* hdp invalidate */
6969                 7 + /* gfx_v9_0_ring_emit_pipeline_sync */
6970                 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
6971                 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
6972                 2 + /* gfx_v9_0_ring_emit_vm_flush */
6973                 8 + 8 + 8, /* gfx_v9_0_ring_emit_fence_kiq x3 for user fence, vm fence */
6974         .emit_ib_size = 7, /* gfx_v9_0_ring_emit_ib_compute */
6975         .emit_fence = gfx_v9_0_ring_emit_fence_kiq,
6976         .test_ring = gfx_v9_0_ring_test_ring,
6977         .insert_nop = amdgpu_ring_insert_nop,
6978         .pad_ib = amdgpu_ring_generic_pad_ib,
6979         .emit_rreg = gfx_v9_0_ring_emit_rreg,
6980         .emit_wreg = gfx_v9_0_ring_emit_wreg,
6981         .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
6982         .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
6983 };
6984
6985 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev)
6986 {
6987         int i;
6988
6989         adev->gfx.kiq.ring.funcs = &gfx_v9_0_ring_funcs_kiq;
6990
6991         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
6992                 adev->gfx.gfx_ring[i].funcs = &gfx_v9_0_ring_funcs_gfx;
6993
6994         for (i = 0; i < adev->gfx.num_compute_rings; i++)
6995                 adev->gfx.compute_ring[i].funcs = &gfx_v9_0_ring_funcs_compute;
6996 }
6997
6998 static const struct amdgpu_irq_src_funcs gfx_v9_0_eop_irq_funcs = {
6999         .set = gfx_v9_0_set_eop_interrupt_state,
7000         .process = gfx_v9_0_eop_irq,
7001 };
7002
7003 static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_reg_irq_funcs = {
7004         .set = gfx_v9_0_set_priv_reg_fault_state,
7005         .process = gfx_v9_0_priv_reg_irq,
7006 };
7007
7008 static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_inst_irq_funcs = {
7009         .set = gfx_v9_0_set_priv_inst_fault_state,
7010         .process = gfx_v9_0_priv_inst_irq,
7011 };
7012
7013 static const struct amdgpu_irq_src_funcs gfx_v9_0_cp_ecc_error_irq_funcs = {
7014         .set = gfx_v9_0_set_cp_ecc_error_state,
7015         .process = amdgpu_gfx_cp_ecc_error_irq,
7016 };
7017
7018
7019 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev)
7020 {
7021         adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
7022         adev->gfx.eop_irq.funcs = &gfx_v9_0_eop_irq_funcs;
7023
7024         adev->gfx.priv_reg_irq.num_types = 1;
7025         adev->gfx.priv_reg_irq.funcs = &gfx_v9_0_priv_reg_irq_funcs;
7026
7027         adev->gfx.priv_inst_irq.num_types = 1;
7028         adev->gfx.priv_inst_irq.funcs = &gfx_v9_0_priv_inst_irq_funcs;
7029
7030         adev->gfx.cp_ecc_error_irq.num_types = 2; /*C5 ECC error and C9 FUE error*/
7031         adev->gfx.cp_ecc_error_irq.funcs = &gfx_v9_0_cp_ecc_error_irq_funcs;
7032 }
7033
7034 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev)
7035 {
7036         switch (adev->ip_versions[GC_HWIP][0]) {
7037         case IP_VERSION(9, 0, 1):
7038         case IP_VERSION(9, 2, 1):
7039         case IP_VERSION(9, 4, 0):
7040         case IP_VERSION(9, 2, 2):
7041         case IP_VERSION(9, 1, 0):
7042         case IP_VERSION(9, 4, 1):
7043         case IP_VERSION(9, 3, 0):
7044         case IP_VERSION(9, 4, 2):
7045                 adev->gfx.rlc.funcs = &gfx_v9_0_rlc_funcs;
7046                 break;
7047         default:
7048                 break;
7049         }
7050 }
7051
7052 static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev)
7053 {
7054         /* init asci gds info */
7055         switch (adev->ip_versions[GC_HWIP][0]) {
7056         case IP_VERSION(9, 0, 1):
7057         case IP_VERSION(9, 2, 1):
7058         case IP_VERSION(9, 4, 0):
7059                 adev->gds.gds_size = 0x10000;
7060                 break;
7061         case IP_VERSION(9, 2, 2):
7062         case IP_VERSION(9, 1, 0):
7063         case IP_VERSION(9, 4, 1):
7064                 adev->gds.gds_size = 0x1000;
7065                 break;
7066         case IP_VERSION(9, 4, 2):
7067                 /* aldebaran removed all the GDS internal memory,
7068                  * only support GWS opcode in kernel, like barrier
7069                  * semaphore.etc */
7070                 adev->gds.gds_size = 0;
7071                 break;
7072         default:
7073                 adev->gds.gds_size = 0x10000;
7074                 break;
7075         }
7076
7077         switch (adev->ip_versions[GC_HWIP][0]) {
7078         case IP_VERSION(9, 0, 1):
7079         case IP_VERSION(9, 4, 0):
7080                 adev->gds.gds_compute_max_wave_id = 0x7ff;
7081                 break;
7082         case IP_VERSION(9, 2, 1):
7083                 adev->gds.gds_compute_max_wave_id = 0x27f;
7084                 break;
7085         case IP_VERSION(9, 2, 2):
7086         case IP_VERSION(9, 1, 0):
7087                 if (adev->apu_flags & AMD_APU_IS_RAVEN2)
7088                         adev->gds.gds_compute_max_wave_id = 0x77; /* raven2 */
7089                 else
7090                         adev->gds.gds_compute_max_wave_id = 0x15f; /* raven1 */
7091                 break;
7092         case IP_VERSION(9, 4, 1):
7093                 adev->gds.gds_compute_max_wave_id = 0xfff;
7094                 break;
7095         case IP_VERSION(9, 4, 2):
7096                 /* deprecated for Aldebaran, no usage at all */
7097                 adev->gds.gds_compute_max_wave_id = 0;
7098                 break;
7099         default:
7100                 /* this really depends on the chip */
7101                 adev->gds.gds_compute_max_wave_id = 0x7ff;
7102                 break;
7103         }
7104
7105         adev->gds.gws_size = 64;
7106         adev->gds.oa_size = 16;
7107 }
7108
7109 static void gfx_v9_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
7110                                                  u32 bitmap)
7111 {
7112         u32 data;
7113
7114         if (!bitmap)
7115                 return;
7116
7117         data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
7118         data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
7119
7120         WREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG, data);
7121 }
7122
7123 static u32 gfx_v9_0_get_cu_active_bitmap(struct amdgpu_device *adev)
7124 {
7125         u32 data, mask;
7126
7127         data = RREG32_SOC15(GC, 0, mmCC_GC_SHADER_ARRAY_CONFIG);
7128         data |= RREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG);
7129
7130         data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
7131         data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
7132
7133         mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh);
7134
7135         return (~data) & mask;
7136 }
7137
7138 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
7139                                  struct amdgpu_cu_info *cu_info)
7140 {
7141         int i, j, k, counter, active_cu_number = 0;
7142         u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
7143         unsigned disable_masks[4 * 4];
7144
7145         if (!adev || !cu_info)
7146                 return -EINVAL;
7147
7148         /*
7149          * 16 comes from bitmap array size 4*4, and it can cover all gfx9 ASICs
7150          */
7151         if (adev->gfx.config.max_shader_engines *
7152                 adev->gfx.config.max_sh_per_se > 16)
7153                 return -EINVAL;
7154
7155         amdgpu_gfx_parse_disable_cu(disable_masks,
7156                                     adev->gfx.config.max_shader_engines,
7157                                     adev->gfx.config.max_sh_per_se);
7158
7159         mutex_lock(&adev->grbm_idx_mutex);
7160         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
7161                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
7162                         mask = 1;
7163                         ao_bitmap = 0;
7164                         counter = 0;
7165                         gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
7166                         gfx_v9_0_set_user_cu_inactive_bitmap(
7167                                 adev, disable_masks[i * adev->gfx.config.max_sh_per_se + j]);
7168                         bitmap = gfx_v9_0_get_cu_active_bitmap(adev);
7169
7170                         /*
7171                          * The bitmap(and ao_cu_bitmap) in cu_info structure is
7172                          * 4x4 size array, and it's usually suitable for Vega
7173                          * ASICs which has 4*2 SE/SH layout.
7174                          * But for Arcturus, SE/SH layout is changed to 8*1.
7175                          * To mostly reduce the impact, we make it compatible
7176                          * with current bitmap array as below:
7177                          *    SE4,SH0 --> bitmap[0][1]
7178                          *    SE5,SH0 --> bitmap[1][1]
7179                          *    SE6,SH0 --> bitmap[2][1]
7180                          *    SE7,SH0 --> bitmap[3][1]
7181                          */
7182                         cu_info->bitmap[i % 4][j + i / 4] = bitmap;
7183
7184                         for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
7185                                 if (bitmap & mask) {
7186                                         if (counter < adev->gfx.config.max_cu_per_sh)
7187                                                 ao_bitmap |= mask;
7188                                         counter ++;
7189                                 }
7190                                 mask <<= 1;
7191                         }
7192                         active_cu_number += counter;
7193                         if (i < 2 && j < 2)
7194                                 ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
7195                         cu_info->ao_cu_bitmap[i % 4][j + i / 4] = ao_bitmap;
7196                 }
7197         }
7198         gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
7199         mutex_unlock(&adev->grbm_idx_mutex);
7200
7201         cu_info->number = active_cu_number;
7202         cu_info->ao_cu_mask = ao_cu_mask;
7203         cu_info->simd_per_cu = NUM_SIMD_PER_CU;
7204
7205         return 0;
7206 }
7207
7208 const struct amdgpu_ip_block_version gfx_v9_0_ip_block =
7209 {
7210         .type = AMD_IP_BLOCK_TYPE_GFX,
7211         .major = 9,
7212         .minor = 0,
7213         .rev = 0,
7214         .funcs = &gfx_v9_0_ip_funcs,
7215 };
This page took 0.455847 seconds and 4 git commands to generate.