]> Git Repo - linux.git/blob - drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
Merge tag 'iomap-5.8-merge-1' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux
[linux.git] / drivers / gpu / drm / amd / amdgpu / gfx_v9_0.c
1 /*
2  * Copyright 2016 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23
24 #include <linux/delay.h>
25 #include <linux/kernel.h>
26 #include <linux/firmware.h>
27 #include <linux/module.h>
28 #include <linux/pci.h>
29
30 #include "amdgpu.h"
31 #include "amdgpu_gfx.h"
32 #include "soc15.h"
33 #include "soc15d.h"
34 #include "amdgpu_atomfirmware.h"
35 #include "amdgpu_pm.h"
36
37 #include "gc/gc_9_0_offset.h"
38 #include "gc/gc_9_0_sh_mask.h"
39
40 #include "vega10_enum.h"
41 #include "hdp/hdp_4_0_offset.h"
42
43 #include "soc15_common.h"
44 #include "clearstate_gfx9.h"
45 #include "v9_structs.h"
46
47 #include "ivsrcid/gfx/irqsrcs_gfx_9_0.h"
48
49 #include "amdgpu_ras.h"
50
51 #include "gfx_v9_4.h"
52
53 #include "asic_reg/pwr/pwr_10_0_offset.h"
54 #include "asic_reg/pwr/pwr_10_0_sh_mask.h"
55
56 #define GFX9_NUM_GFX_RINGS     1
57 #define GFX9_MEC_HPD_SIZE 4096
58 #define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L
59 #define RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET 0x00000000L
60
61 #define mmGCEA_PROBE_MAP                        0x070c
62 #define mmGCEA_PROBE_MAP_BASE_IDX               0
63
64 MODULE_FIRMWARE("amdgpu/vega10_ce.bin");
65 MODULE_FIRMWARE("amdgpu/vega10_pfp.bin");
66 MODULE_FIRMWARE("amdgpu/vega10_me.bin");
67 MODULE_FIRMWARE("amdgpu/vega10_mec.bin");
68 MODULE_FIRMWARE("amdgpu/vega10_mec2.bin");
69 MODULE_FIRMWARE("amdgpu/vega10_rlc.bin");
70
71 MODULE_FIRMWARE("amdgpu/vega12_ce.bin");
72 MODULE_FIRMWARE("amdgpu/vega12_pfp.bin");
73 MODULE_FIRMWARE("amdgpu/vega12_me.bin");
74 MODULE_FIRMWARE("amdgpu/vega12_mec.bin");
75 MODULE_FIRMWARE("amdgpu/vega12_mec2.bin");
76 MODULE_FIRMWARE("amdgpu/vega12_rlc.bin");
77
78 MODULE_FIRMWARE("amdgpu/vega20_ce.bin");
79 MODULE_FIRMWARE("amdgpu/vega20_pfp.bin");
80 MODULE_FIRMWARE("amdgpu/vega20_me.bin");
81 MODULE_FIRMWARE("amdgpu/vega20_mec.bin");
82 MODULE_FIRMWARE("amdgpu/vega20_mec2.bin");
83 MODULE_FIRMWARE("amdgpu/vega20_rlc.bin");
84
85 MODULE_FIRMWARE("amdgpu/raven_ce.bin");
86 MODULE_FIRMWARE("amdgpu/raven_pfp.bin");
87 MODULE_FIRMWARE("amdgpu/raven_me.bin");
88 MODULE_FIRMWARE("amdgpu/raven_mec.bin");
89 MODULE_FIRMWARE("amdgpu/raven_mec2.bin");
90 MODULE_FIRMWARE("amdgpu/raven_rlc.bin");
91
92 MODULE_FIRMWARE("amdgpu/picasso_ce.bin");
93 MODULE_FIRMWARE("amdgpu/picasso_pfp.bin");
94 MODULE_FIRMWARE("amdgpu/picasso_me.bin");
95 MODULE_FIRMWARE("amdgpu/picasso_mec.bin");
96 MODULE_FIRMWARE("amdgpu/picasso_mec2.bin");
97 MODULE_FIRMWARE("amdgpu/picasso_rlc.bin");
98 MODULE_FIRMWARE("amdgpu/picasso_rlc_am4.bin");
99
100 MODULE_FIRMWARE("amdgpu/raven2_ce.bin");
101 MODULE_FIRMWARE("amdgpu/raven2_pfp.bin");
102 MODULE_FIRMWARE("amdgpu/raven2_me.bin");
103 MODULE_FIRMWARE("amdgpu/raven2_mec.bin");
104 MODULE_FIRMWARE("amdgpu/raven2_mec2.bin");
105 MODULE_FIRMWARE("amdgpu/raven2_rlc.bin");
106 MODULE_FIRMWARE("amdgpu/raven_kicker_rlc.bin");
107
108 MODULE_FIRMWARE("amdgpu/arcturus_mec.bin");
109 MODULE_FIRMWARE("amdgpu/arcturus_mec2.bin");
110 MODULE_FIRMWARE("amdgpu/arcturus_rlc.bin");
111
112 MODULE_FIRMWARE("amdgpu/renoir_ce.bin");
113 MODULE_FIRMWARE("amdgpu/renoir_pfp.bin");
114 MODULE_FIRMWARE("amdgpu/renoir_me.bin");
115 MODULE_FIRMWARE("amdgpu/renoir_mec.bin");
116 MODULE_FIRMWARE("amdgpu/renoir_mec2.bin");
117 MODULE_FIRMWARE("amdgpu/renoir_rlc.bin");
118
119 #define mmTCP_CHAN_STEER_0_ARCT                                                         0x0b03
120 #define mmTCP_CHAN_STEER_0_ARCT_BASE_IDX                                                        0
121 #define mmTCP_CHAN_STEER_1_ARCT                                                         0x0b04
122 #define mmTCP_CHAN_STEER_1_ARCT_BASE_IDX                                                        0
123 #define mmTCP_CHAN_STEER_2_ARCT                                                         0x0b09
124 #define mmTCP_CHAN_STEER_2_ARCT_BASE_IDX                                                        0
125 #define mmTCP_CHAN_STEER_3_ARCT                                                         0x0b0a
126 #define mmTCP_CHAN_STEER_3_ARCT_BASE_IDX                                                        0
127 #define mmTCP_CHAN_STEER_4_ARCT                                                         0x0b0b
128 #define mmTCP_CHAN_STEER_4_ARCT_BASE_IDX                                                        0
129 #define mmTCP_CHAN_STEER_5_ARCT                                                         0x0b0c
130 #define mmTCP_CHAN_STEER_5_ARCT_BASE_IDX                                                        0
131
132 enum ta_ras_gfx_subblock {
133         /*CPC*/
134         TA_RAS_BLOCK__GFX_CPC_INDEX_START = 0,
135         TA_RAS_BLOCK__GFX_CPC_SCRATCH = TA_RAS_BLOCK__GFX_CPC_INDEX_START,
136         TA_RAS_BLOCK__GFX_CPC_UCODE,
137         TA_RAS_BLOCK__GFX_DC_STATE_ME1,
138         TA_RAS_BLOCK__GFX_DC_CSINVOC_ME1,
139         TA_RAS_BLOCK__GFX_DC_RESTORE_ME1,
140         TA_RAS_BLOCK__GFX_DC_STATE_ME2,
141         TA_RAS_BLOCK__GFX_DC_CSINVOC_ME2,
142         TA_RAS_BLOCK__GFX_DC_RESTORE_ME2,
143         TA_RAS_BLOCK__GFX_CPC_INDEX_END = TA_RAS_BLOCK__GFX_DC_RESTORE_ME2,
144         /* CPF*/
145         TA_RAS_BLOCK__GFX_CPF_INDEX_START,
146         TA_RAS_BLOCK__GFX_CPF_ROQ_ME2 = TA_RAS_BLOCK__GFX_CPF_INDEX_START,
147         TA_RAS_BLOCK__GFX_CPF_ROQ_ME1,
148         TA_RAS_BLOCK__GFX_CPF_TAG,
149         TA_RAS_BLOCK__GFX_CPF_INDEX_END = TA_RAS_BLOCK__GFX_CPF_TAG,
150         /* CPG*/
151         TA_RAS_BLOCK__GFX_CPG_INDEX_START,
152         TA_RAS_BLOCK__GFX_CPG_DMA_ROQ = TA_RAS_BLOCK__GFX_CPG_INDEX_START,
153         TA_RAS_BLOCK__GFX_CPG_DMA_TAG,
154         TA_RAS_BLOCK__GFX_CPG_TAG,
155         TA_RAS_BLOCK__GFX_CPG_INDEX_END = TA_RAS_BLOCK__GFX_CPG_TAG,
156         /* GDS*/
157         TA_RAS_BLOCK__GFX_GDS_INDEX_START,
158         TA_RAS_BLOCK__GFX_GDS_MEM = TA_RAS_BLOCK__GFX_GDS_INDEX_START,
159         TA_RAS_BLOCK__GFX_GDS_INPUT_QUEUE,
160         TA_RAS_BLOCK__GFX_GDS_OA_PHY_CMD_RAM_MEM,
161         TA_RAS_BLOCK__GFX_GDS_OA_PHY_DATA_RAM_MEM,
162         TA_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM,
163         TA_RAS_BLOCK__GFX_GDS_INDEX_END = TA_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM,
164         /* SPI*/
165         TA_RAS_BLOCK__GFX_SPI_SR_MEM,
166         /* SQ*/
167         TA_RAS_BLOCK__GFX_SQ_INDEX_START,
168         TA_RAS_BLOCK__GFX_SQ_SGPR = TA_RAS_BLOCK__GFX_SQ_INDEX_START,
169         TA_RAS_BLOCK__GFX_SQ_LDS_D,
170         TA_RAS_BLOCK__GFX_SQ_LDS_I,
171         TA_RAS_BLOCK__GFX_SQ_VGPR, /* VGPR = SP*/
172         TA_RAS_BLOCK__GFX_SQ_INDEX_END = TA_RAS_BLOCK__GFX_SQ_VGPR,
173         /* SQC (3 ranges)*/
174         TA_RAS_BLOCK__GFX_SQC_INDEX_START,
175         /* SQC range 0*/
176         TA_RAS_BLOCK__GFX_SQC_INDEX0_START = TA_RAS_BLOCK__GFX_SQC_INDEX_START,
177         TA_RAS_BLOCK__GFX_SQC_INST_UTCL1_LFIFO =
178                 TA_RAS_BLOCK__GFX_SQC_INDEX0_START,
179         TA_RAS_BLOCK__GFX_SQC_DATA_CU0_WRITE_DATA_BUF,
180         TA_RAS_BLOCK__GFX_SQC_DATA_CU0_UTCL1_LFIFO,
181         TA_RAS_BLOCK__GFX_SQC_DATA_CU1_WRITE_DATA_BUF,
182         TA_RAS_BLOCK__GFX_SQC_DATA_CU1_UTCL1_LFIFO,
183         TA_RAS_BLOCK__GFX_SQC_DATA_CU2_WRITE_DATA_BUF,
184         TA_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO,
185         TA_RAS_BLOCK__GFX_SQC_INDEX0_END =
186                 TA_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO,
187         /* SQC range 1*/
188         TA_RAS_BLOCK__GFX_SQC_INDEX1_START,
189         TA_RAS_BLOCK__GFX_SQC_INST_BANKA_TAG_RAM =
190                 TA_RAS_BLOCK__GFX_SQC_INDEX1_START,
191         TA_RAS_BLOCK__GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO,
192         TA_RAS_BLOCK__GFX_SQC_INST_BANKA_MISS_FIFO,
193         TA_RAS_BLOCK__GFX_SQC_INST_BANKA_BANK_RAM,
194         TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_TAG_RAM,
195         TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_HIT_FIFO,
196         TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_MISS_FIFO,
197         TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM,
198         TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM,
199         TA_RAS_BLOCK__GFX_SQC_INDEX1_END =
200                 TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM,
201         /* SQC range 2*/
202         TA_RAS_BLOCK__GFX_SQC_INDEX2_START,
203         TA_RAS_BLOCK__GFX_SQC_INST_BANKB_TAG_RAM =
204                 TA_RAS_BLOCK__GFX_SQC_INDEX2_START,
205         TA_RAS_BLOCK__GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO,
206         TA_RAS_BLOCK__GFX_SQC_INST_BANKB_MISS_FIFO,
207         TA_RAS_BLOCK__GFX_SQC_INST_BANKB_BANK_RAM,
208         TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_TAG_RAM,
209         TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_HIT_FIFO,
210         TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_MISS_FIFO,
211         TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM,
212         TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM,
213         TA_RAS_BLOCK__GFX_SQC_INDEX2_END =
214                 TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM,
215         TA_RAS_BLOCK__GFX_SQC_INDEX_END = TA_RAS_BLOCK__GFX_SQC_INDEX2_END,
216         /* TA*/
217         TA_RAS_BLOCK__GFX_TA_INDEX_START,
218         TA_RAS_BLOCK__GFX_TA_FS_DFIFO = TA_RAS_BLOCK__GFX_TA_INDEX_START,
219         TA_RAS_BLOCK__GFX_TA_FS_AFIFO,
220         TA_RAS_BLOCK__GFX_TA_FL_LFIFO,
221         TA_RAS_BLOCK__GFX_TA_FX_LFIFO,
222         TA_RAS_BLOCK__GFX_TA_FS_CFIFO,
223         TA_RAS_BLOCK__GFX_TA_INDEX_END = TA_RAS_BLOCK__GFX_TA_FS_CFIFO,
224         /* TCA*/
225         TA_RAS_BLOCK__GFX_TCA_INDEX_START,
226         TA_RAS_BLOCK__GFX_TCA_HOLE_FIFO = TA_RAS_BLOCK__GFX_TCA_INDEX_START,
227         TA_RAS_BLOCK__GFX_TCA_REQ_FIFO,
228         TA_RAS_BLOCK__GFX_TCA_INDEX_END = TA_RAS_BLOCK__GFX_TCA_REQ_FIFO,
229         /* TCC (5 sub-ranges)*/
230         TA_RAS_BLOCK__GFX_TCC_INDEX_START,
231         /* TCC range 0*/
232         TA_RAS_BLOCK__GFX_TCC_INDEX0_START = TA_RAS_BLOCK__GFX_TCC_INDEX_START,
233         TA_RAS_BLOCK__GFX_TCC_CACHE_DATA = TA_RAS_BLOCK__GFX_TCC_INDEX0_START,
234         TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_0_1,
235         TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_0,
236         TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_1,
237         TA_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_0,
238         TA_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_1,
239         TA_RAS_BLOCK__GFX_TCC_HIGH_RATE_TAG,
240         TA_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG,
241         TA_RAS_BLOCK__GFX_TCC_INDEX0_END = TA_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG,
242         /* TCC range 1*/
243         TA_RAS_BLOCK__GFX_TCC_INDEX1_START,
244         TA_RAS_BLOCK__GFX_TCC_IN_USE_DEC = TA_RAS_BLOCK__GFX_TCC_INDEX1_START,
245         TA_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER,
246         TA_RAS_BLOCK__GFX_TCC_INDEX1_END =
247                 TA_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER,
248         /* TCC range 2*/
249         TA_RAS_BLOCK__GFX_TCC_INDEX2_START,
250         TA_RAS_BLOCK__GFX_TCC_RETURN_DATA = TA_RAS_BLOCK__GFX_TCC_INDEX2_START,
251         TA_RAS_BLOCK__GFX_TCC_RETURN_CONTROL,
252         TA_RAS_BLOCK__GFX_TCC_UC_ATOMIC_FIFO,
253         TA_RAS_BLOCK__GFX_TCC_WRITE_RETURN,
254         TA_RAS_BLOCK__GFX_TCC_WRITE_CACHE_READ,
255         TA_RAS_BLOCK__GFX_TCC_SRC_FIFO,
256         TA_RAS_BLOCK__GFX_TCC_SRC_FIFO_NEXT_RAM,
257         TA_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO,
258         TA_RAS_BLOCK__GFX_TCC_INDEX2_END =
259                 TA_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO,
260         /* TCC range 3*/
261         TA_RAS_BLOCK__GFX_TCC_INDEX3_START,
262         TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO = TA_RAS_BLOCK__GFX_TCC_INDEX3_START,
263         TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM,
264         TA_RAS_BLOCK__GFX_TCC_INDEX3_END =
265                 TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM,
266         /* TCC range 4*/
267         TA_RAS_BLOCK__GFX_TCC_INDEX4_START,
268         TA_RAS_BLOCK__GFX_TCC_WRRET_TAG_WRITE_RETURN =
269                 TA_RAS_BLOCK__GFX_TCC_INDEX4_START,
270         TA_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER,
271         TA_RAS_BLOCK__GFX_TCC_INDEX4_END =
272                 TA_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER,
273         TA_RAS_BLOCK__GFX_TCC_INDEX_END = TA_RAS_BLOCK__GFX_TCC_INDEX4_END,
274         /* TCI*/
275         TA_RAS_BLOCK__GFX_TCI_WRITE_RAM,
276         /* TCP*/
277         TA_RAS_BLOCK__GFX_TCP_INDEX_START,
278         TA_RAS_BLOCK__GFX_TCP_CACHE_RAM = TA_RAS_BLOCK__GFX_TCP_INDEX_START,
279         TA_RAS_BLOCK__GFX_TCP_LFIFO_RAM,
280         TA_RAS_BLOCK__GFX_TCP_CMD_FIFO,
281         TA_RAS_BLOCK__GFX_TCP_VM_FIFO,
282         TA_RAS_BLOCK__GFX_TCP_DB_RAM,
283         TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO0,
284         TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1,
285         TA_RAS_BLOCK__GFX_TCP_INDEX_END = TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1,
286         /* TD*/
287         TA_RAS_BLOCK__GFX_TD_INDEX_START,
288         TA_RAS_BLOCK__GFX_TD_SS_FIFO_LO = TA_RAS_BLOCK__GFX_TD_INDEX_START,
289         TA_RAS_BLOCK__GFX_TD_SS_FIFO_HI,
290         TA_RAS_BLOCK__GFX_TD_CS_FIFO,
291         TA_RAS_BLOCK__GFX_TD_INDEX_END = TA_RAS_BLOCK__GFX_TD_CS_FIFO,
292         /* EA (3 sub-ranges)*/
293         TA_RAS_BLOCK__GFX_EA_INDEX_START,
294         /* EA range 0*/
295         TA_RAS_BLOCK__GFX_EA_INDEX0_START = TA_RAS_BLOCK__GFX_EA_INDEX_START,
296         TA_RAS_BLOCK__GFX_EA_DRAMRD_CMDMEM = TA_RAS_BLOCK__GFX_EA_INDEX0_START,
297         TA_RAS_BLOCK__GFX_EA_DRAMWR_CMDMEM,
298         TA_RAS_BLOCK__GFX_EA_DRAMWR_DATAMEM,
299         TA_RAS_BLOCK__GFX_EA_RRET_TAGMEM,
300         TA_RAS_BLOCK__GFX_EA_WRET_TAGMEM,
301         TA_RAS_BLOCK__GFX_EA_GMIRD_CMDMEM,
302         TA_RAS_BLOCK__GFX_EA_GMIWR_CMDMEM,
303         TA_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM,
304         TA_RAS_BLOCK__GFX_EA_INDEX0_END = TA_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM,
305         /* EA range 1*/
306         TA_RAS_BLOCK__GFX_EA_INDEX1_START,
307         TA_RAS_BLOCK__GFX_EA_DRAMRD_PAGEMEM = TA_RAS_BLOCK__GFX_EA_INDEX1_START,
308         TA_RAS_BLOCK__GFX_EA_DRAMWR_PAGEMEM,
309         TA_RAS_BLOCK__GFX_EA_IORD_CMDMEM,
310         TA_RAS_BLOCK__GFX_EA_IOWR_CMDMEM,
311         TA_RAS_BLOCK__GFX_EA_IOWR_DATAMEM,
312         TA_RAS_BLOCK__GFX_EA_GMIRD_PAGEMEM,
313         TA_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM,
314         TA_RAS_BLOCK__GFX_EA_INDEX1_END = TA_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM,
315         /* EA range 2*/
316         TA_RAS_BLOCK__GFX_EA_INDEX2_START,
317         TA_RAS_BLOCK__GFX_EA_MAM_D0MEM = TA_RAS_BLOCK__GFX_EA_INDEX2_START,
318         TA_RAS_BLOCK__GFX_EA_MAM_D1MEM,
319         TA_RAS_BLOCK__GFX_EA_MAM_D2MEM,
320         TA_RAS_BLOCK__GFX_EA_MAM_D3MEM,
321         TA_RAS_BLOCK__GFX_EA_INDEX2_END = TA_RAS_BLOCK__GFX_EA_MAM_D3MEM,
322         TA_RAS_BLOCK__GFX_EA_INDEX_END = TA_RAS_BLOCK__GFX_EA_INDEX2_END,
323         /* UTC VM L2 bank*/
324         TA_RAS_BLOCK__UTC_VML2_BANK_CACHE,
325         /* UTC VM walker*/
326         TA_RAS_BLOCK__UTC_VML2_WALKER,
327         /* UTC ATC L2 2MB cache*/
328         TA_RAS_BLOCK__UTC_ATCL2_CACHE_2M_BANK,
329         /* UTC ATC L2 4KB cache*/
330         TA_RAS_BLOCK__UTC_ATCL2_CACHE_4K_BANK,
331         TA_RAS_BLOCK__GFX_MAX
332 };
333
334 struct ras_gfx_subblock {
335         unsigned char *name;
336         int ta_subblock;
337         int hw_supported_error_type;
338         int sw_supported_error_type;
339 };
340
341 #define AMDGPU_RAS_SUB_BLOCK(subblock, a, b, c, d, e, f, g, h)                             \
342         [AMDGPU_RAS_BLOCK__##subblock] = {                                     \
343                 #subblock,                                                     \
344                 TA_RAS_BLOCK__##subblock,                                      \
345                 ((a) | ((b) << 1) | ((c) << 2) | ((d) << 3)),                  \
346                 (((e) << 1) | ((f) << 3) | (g) | ((h) << 2)),                  \
347         }
348
349 static const struct ras_gfx_subblock ras_gfx_subblocks[] = {
350         AMDGPU_RAS_SUB_BLOCK(GFX_CPC_SCRATCH, 0, 1, 1, 1, 1, 0, 0, 1),
351         AMDGPU_RAS_SUB_BLOCK(GFX_CPC_UCODE, 0, 1, 1, 1, 1, 0, 0, 1),
352         AMDGPU_RAS_SUB_BLOCK(GFX_DC_STATE_ME1, 1, 0, 0, 1, 0, 0, 1, 0),
353         AMDGPU_RAS_SUB_BLOCK(GFX_DC_CSINVOC_ME1, 1, 0, 0, 1, 0, 0, 0, 0),
354         AMDGPU_RAS_SUB_BLOCK(GFX_DC_RESTORE_ME1, 1, 0, 0, 1, 0, 0, 0, 0),
355         AMDGPU_RAS_SUB_BLOCK(GFX_DC_STATE_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
356         AMDGPU_RAS_SUB_BLOCK(GFX_DC_CSINVOC_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
357         AMDGPU_RAS_SUB_BLOCK(GFX_DC_RESTORE_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
358         AMDGPU_RAS_SUB_BLOCK(GFX_CPF_ROQ_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
359         AMDGPU_RAS_SUB_BLOCK(GFX_CPF_ROQ_ME1, 1, 0, 0, 1, 0, 0, 1, 0),
360         AMDGPU_RAS_SUB_BLOCK(GFX_CPF_TAG, 0, 1, 1, 1, 1, 0, 0, 1),
361         AMDGPU_RAS_SUB_BLOCK(GFX_CPG_DMA_ROQ, 1, 0, 0, 1, 0, 0, 1, 0),
362         AMDGPU_RAS_SUB_BLOCK(GFX_CPG_DMA_TAG, 0, 1, 1, 1, 0, 1, 0, 1),
363         AMDGPU_RAS_SUB_BLOCK(GFX_CPG_TAG, 0, 1, 1, 1, 1, 1, 0, 1),
364         AMDGPU_RAS_SUB_BLOCK(GFX_GDS_MEM, 0, 1, 1, 1, 0, 0, 0, 0),
365         AMDGPU_RAS_SUB_BLOCK(GFX_GDS_INPUT_QUEUE, 1, 0, 0, 1, 0, 0, 0, 0),
366         AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PHY_CMD_RAM_MEM, 0, 1, 1, 1, 0, 0, 0,
367                              0),
368         AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PHY_DATA_RAM_MEM, 1, 0, 0, 1, 0, 0, 0,
369                              0),
370         AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PIPE_MEM, 0, 1, 1, 1, 0, 0, 0, 0),
371         AMDGPU_RAS_SUB_BLOCK(GFX_SPI_SR_MEM, 1, 0, 0, 1, 0, 0, 0, 0),
372         AMDGPU_RAS_SUB_BLOCK(GFX_SQ_SGPR, 0, 1, 1, 1, 0, 0, 0, 0),
373         AMDGPU_RAS_SUB_BLOCK(GFX_SQ_LDS_D, 0, 1, 1, 1, 1, 0, 0, 1),
374         AMDGPU_RAS_SUB_BLOCK(GFX_SQ_LDS_I, 0, 1, 1, 1, 0, 0, 0, 0),
375         AMDGPU_RAS_SUB_BLOCK(GFX_SQ_VGPR, 0, 1, 1, 1, 0, 0, 0, 0),
376         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0, 1),
377         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU0_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0,
378                              0, 0),
379         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU0_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0,
380                              0),
381         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU1_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0,
382                              0, 0),
383         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU1_UTCL1_LFIFO, 0, 1, 1, 1, 1, 0, 0,
384                              0),
385         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU2_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0,
386                              0, 0),
387         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU2_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0,
388                              0),
389         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_TAG_RAM, 0, 1, 1, 1, 1, 0, 0,
390                              1),
391         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO, 1, 0, 0, 1, 0,
392                              0, 0, 0),
393         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
394                              0),
395         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
396                              0),
397         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_TAG_RAM, 0, 1, 1, 1, 0, 0, 0,
398                              0),
399         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_HIT_FIFO, 1, 0, 0, 1, 0, 0, 0,
400                              0),
401         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
402                              0),
403         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM, 1, 0, 0, 1, 0, 0,
404                              0, 0),
405         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
406                              0),
407         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_TAG_RAM, 0, 1, 1, 1, 1, 0, 0,
408                              0),
409         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO, 1, 0, 0, 1, 0,
410                              0, 0, 0),
411         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
412                              0),
413         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
414                              0),
415         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_TAG_RAM, 0, 1, 1, 1, 0, 0, 0,
416                              0),
417         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_HIT_FIFO, 1, 0, 0, 1, 0, 0, 0,
418                              0),
419         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
420                              0),
421         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM, 1, 0, 0, 1, 0, 0,
422                              0, 0),
423         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
424                              0),
425         AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_DFIFO, 0, 1, 1, 1, 1, 0, 0, 1),
426         AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_AFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
427         AMDGPU_RAS_SUB_BLOCK(GFX_TA_FL_LFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
428         AMDGPU_RAS_SUB_BLOCK(GFX_TA_FX_LFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
429         AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_CFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
430         AMDGPU_RAS_SUB_BLOCK(GFX_TCA_HOLE_FIFO, 1, 0, 0, 1, 0, 1, 1, 0),
431         AMDGPU_RAS_SUB_BLOCK(GFX_TCA_REQ_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
432         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA, 0, 1, 1, 1, 1, 0, 0, 1),
433         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_0_1, 0, 1, 1, 1, 1, 0, 0,
434                              1),
435         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_1_0, 0, 1, 1, 1, 1, 0, 0,
436                              1),
437         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_1_1, 0, 1, 1, 1, 1, 0, 0,
438                              1),
439         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DIRTY_BANK_0, 0, 1, 1, 1, 0, 0, 0,
440                              0),
441         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DIRTY_BANK_1, 0, 1, 1, 1, 0, 0, 0,
442                              0),
443         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_HIGH_RATE_TAG, 0, 1, 1, 1, 0, 0, 0, 0),
444         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LOW_RATE_TAG, 0, 1, 1, 1, 0, 0, 0, 0),
445         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_IN_USE_DEC, 1, 0, 0, 1, 0, 0, 0, 0),
446         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_IN_USE_TRANSFER, 1, 0, 0, 1, 0, 0, 0, 0),
447         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_RETURN_DATA, 1, 0, 0, 1, 0, 0, 0, 0),
448         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_RETURN_CONTROL, 1, 0, 0, 1, 0, 0, 0, 0),
449         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_UC_ATOMIC_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
450         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRITE_RETURN, 1, 0, 0, 1, 0, 1, 1, 0),
451         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRITE_CACHE_READ, 1, 0, 0, 1, 0, 0, 0, 0),
452         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_SRC_FIFO, 0, 1, 1, 1, 0, 0, 0, 0),
453         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_SRC_FIFO_NEXT_RAM, 1, 0, 0, 1, 0, 0, 1, 0),
454         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_TAG_PROBE_FIFO, 1, 0, 0, 1, 0, 0, 0,
455                              0),
456         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LATENCY_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
457         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LATENCY_FIFO_NEXT_RAM, 1, 0, 0, 1, 0, 0, 0,
458                              0),
459         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRRET_TAG_WRITE_RETURN, 1, 0, 0, 1, 0, 0,
460                              0, 0),
461         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_ATOMIC_RETURN_BUFFER, 1, 0, 0, 1, 0, 0, 0,
462                              0),
463         AMDGPU_RAS_SUB_BLOCK(GFX_TCI_WRITE_RAM, 1, 0, 0, 1, 0, 0, 0, 0),
464         AMDGPU_RAS_SUB_BLOCK(GFX_TCP_CACHE_RAM, 0, 1, 1, 1, 1, 0, 0, 1),
465         AMDGPU_RAS_SUB_BLOCK(GFX_TCP_LFIFO_RAM, 0, 1, 1, 1, 0, 0, 0, 0),
466         AMDGPU_RAS_SUB_BLOCK(GFX_TCP_CMD_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
467         AMDGPU_RAS_SUB_BLOCK(GFX_TCP_VM_FIFO, 0, 1, 1, 1, 0, 0, 0, 0),
468         AMDGPU_RAS_SUB_BLOCK(GFX_TCP_DB_RAM, 1, 0, 0, 1, 0, 0, 0, 0),
469         AMDGPU_RAS_SUB_BLOCK(GFX_TCP_UTCL1_LFIFO0, 0, 1, 1, 1, 0, 0, 0, 0),
470         AMDGPU_RAS_SUB_BLOCK(GFX_TCP_UTCL1_LFIFO1, 0, 1, 1, 1, 0, 0, 0, 0),
471         AMDGPU_RAS_SUB_BLOCK(GFX_TD_SS_FIFO_LO, 0, 1, 1, 1, 1, 0, 0, 1),
472         AMDGPU_RAS_SUB_BLOCK(GFX_TD_SS_FIFO_HI, 0, 1, 1, 1, 0, 0, 0, 0),
473         AMDGPU_RAS_SUB_BLOCK(GFX_TD_CS_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
474         AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMRD_CMDMEM, 0, 1, 1, 1, 1, 0, 0, 1),
475         AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0),
476         AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_DATAMEM, 0, 1, 1, 1, 0, 0, 0, 0),
477         AMDGPU_RAS_SUB_BLOCK(GFX_EA_RRET_TAGMEM, 0, 1, 1, 1, 0, 0, 0, 0),
478         AMDGPU_RAS_SUB_BLOCK(GFX_EA_WRET_TAGMEM, 0, 1, 1, 1, 0, 0, 0, 0),
479         AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIRD_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0),
480         AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0),
481         AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_DATAMEM, 0, 1, 1, 1, 0, 0, 0, 0),
482         AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMRD_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
483         AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
484         AMDGPU_RAS_SUB_BLOCK(GFX_EA_IORD_CMDMEM, 1, 0, 0, 1, 0, 0, 0, 0),
485         AMDGPU_RAS_SUB_BLOCK(GFX_EA_IOWR_CMDMEM, 1, 0, 0, 1, 0, 0, 0, 0),
486         AMDGPU_RAS_SUB_BLOCK(GFX_EA_IOWR_DATAMEM, 1, 0, 0, 1, 0, 0, 0, 0),
487         AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIRD_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
488         AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
489         AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D0MEM, 1, 0, 0, 1, 0, 0, 0, 0),
490         AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D1MEM, 1, 0, 0, 1, 0, 0, 0, 0),
491         AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D2MEM, 1, 0, 0, 1, 0, 0, 0, 0),
492         AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D3MEM, 1, 0, 0, 1, 0, 0, 0, 0),
493         AMDGPU_RAS_SUB_BLOCK(UTC_VML2_BANK_CACHE, 0, 1, 1, 1, 0, 0, 0, 0),
494         AMDGPU_RAS_SUB_BLOCK(UTC_VML2_WALKER, 0, 1, 1, 1, 0, 0, 0, 0),
495         AMDGPU_RAS_SUB_BLOCK(UTC_ATCL2_CACHE_2M_BANK, 1, 0, 0, 1, 0, 0, 0, 0),
496         AMDGPU_RAS_SUB_BLOCK(UTC_ATCL2_CACHE_4K_BANK, 0, 1, 1, 1, 0, 0, 0, 0),
497 };
498
499 static const struct soc15_reg_golden golden_settings_gc_9_0[] =
500 {
501         SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000400),
502         SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0x80000000, 0x80000000),
503         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
504         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
505         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
506         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
507         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000),
508         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800),
509         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800),
510         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x00ffff87),
511         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x00ffff8f),
512         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000),
513         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
514         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68),
515         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197),
516         SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
517         SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff),
518         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800),
519         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800),
520         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000)
521 };
522
523 static const struct soc15_reg_golden golden_settings_gc_9_0_vg10[] =
524 {
525         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0x0000f000, 0x00012107),
526         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
527         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080),
528         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080),
529         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080),
530         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042),
531         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x2a114042),
532         SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080),
533         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0x00008000, 0x00048000),
534         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080),
535         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080),
536         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080),
537         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080),
538         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080),
539         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000),
540         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x01000107),
541         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x00001800, 0x00000800),
542         SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080)
543 };
544
545 static const struct soc15_reg_golden golden_settings_gc_9_0_vg20[] =
546 {
547         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x0f000080, 0x04000080),
548         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000),
549         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
550         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xf3e777ff, 0x22014042),
551         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xf3e777ff, 0x22014042),
552         SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0x00003e00, 0x00000400),
553         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xff840000, 0x04040000),
554         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00030000),
555         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff010f, 0x01000107),
556         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0x000b0000, 0x000b0000),
557         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01000000, 0x01000000)
558 };
559
560 static const struct soc15_reg_golden golden_settings_gc_9_1[] =
561 {
562         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
563         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080),
564         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080),
565         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080),
566         SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420),
567         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
568         SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080),
569         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
570         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
571         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
572         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080),
573         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080),
574         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080),
575         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080),
576         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080),
577         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
578         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
579         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003120),
580         SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
581         SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000000ff),
582         SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080),
583         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800),
584         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800),
585         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000)
586 };
587
588 static const struct soc15_reg_golden golden_settings_gc_9_1_rv1[] =
589 {
590         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
591         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24000042),
592         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24000042),
593         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04048000),
594         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_MODE_CNTL_1, 0x06000000, 0x06000000),
595         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000),
596         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x00000800)
597 };
598
599 static const struct soc15_reg_golden golden_settings_gc_9_1_rv2[] =
600 {
601         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0xff7fffff, 0x04000000),
602         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
603         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0xff7fffff, 0x0a000000),
604         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x7f0fffff, 0x08000080),
605         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0xff8fffff, 0x08000080),
606         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x7f8fffff, 0x08000080),
607         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x26013041),
608         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x26013041),
609         SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x3f8fffff, 0x08000080),
610         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
611         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0xff0fffff, 0x08000080),
612         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0xff0fffff, 0x08000080),
613         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0xff0fffff, 0x08000080),
614         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0xff0fffff, 0x08000080),
615         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0xff0fffff, 0x08000080),
616         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
617         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010),
618         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000),
619         SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x3f8fffff, 0x08000080),
620 };
621
622 static const struct soc15_reg_golden golden_settings_gc_9_1_rn[] =
623 {
624         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
625         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0xff7fffff, 0x0a000000),
626         SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000400),
627         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xf3e777ff, 0x24000042),
628         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xf3e777ff, 0x24000042),
629         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
630         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
631         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
632         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
633         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
634         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003120),
635         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCEA_PROBE_MAP, 0xffffffff, 0x0000cccc),
636 };
637
638 static const struct soc15_reg_golden golden_settings_gc_9_x_common[] =
639 {
640         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_SD_CNTL, 0xffffffff, 0x000001ff),
641         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_INDEX, 0xffffffff, 0x00000000),
642         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_DATA, 0xffffffff, 0x2544c382)
643 };
644
645 static const struct soc15_reg_golden golden_settings_gc_9_2_1[] =
646 {
647         SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420),
648         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
649         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
650         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
651         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
652         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000),
653         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800),
654         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800),
655         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x0000ff87),
656         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x0000ff8f),
657         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000),
658         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
659         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68),
660         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197),
661         SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
662         SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff)
663 };
664
665 static const struct soc15_reg_golden golden_settings_gc_9_2_1_vg12[] =
666 {
667         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x00000080, 0x04000080),
668         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
669         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000),
670         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24104041),
671         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24104041),
672         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
673         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff03ff, 0x01000107),
674         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
675         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x76325410),
676         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000),
677         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800),
678         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800),
679         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000)
680 };
681
682 static const struct soc15_reg_golden golden_settings_gc_9_4_1_arct[] =
683 {
684         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042),
685         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x10b0000),
686         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_0_ARCT, 0x3fffffff, 0x346f0a4e),
687         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_1_ARCT, 0x3fffffff, 0x1c642ca),
688         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_2_ARCT, 0x3fffffff, 0x26f45098),
689         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_3_ARCT, 0x3fffffff, 0x2ebd9fe3),
690         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_4_ARCT, 0x3fffffff, 0xb90f5b1),
691         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_5_ARCT, 0x3ff, 0x135),
692         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_CONFIG, 0xffffffff, 0x011A0000),
693         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_FIFO_SIZES, 0xffffffff, 0x00000f00),
694 };
695
696 static const struct soc15_reg_rlcg rlcg_access_gc_9_0[] = {
697         {SOC15_REG_ENTRY(GC, 0, mmGRBM_GFX_INDEX)},
698         {SOC15_REG_ENTRY(GC, 0, mmSQ_IND_INDEX)},
699 };
700
701 static const u32 GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[] =
702 {
703         mmRLC_SRM_INDEX_CNTL_ADDR_0 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
704         mmRLC_SRM_INDEX_CNTL_ADDR_1 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
705         mmRLC_SRM_INDEX_CNTL_ADDR_2 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
706         mmRLC_SRM_INDEX_CNTL_ADDR_3 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
707         mmRLC_SRM_INDEX_CNTL_ADDR_4 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
708         mmRLC_SRM_INDEX_CNTL_ADDR_5 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
709         mmRLC_SRM_INDEX_CNTL_ADDR_6 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
710         mmRLC_SRM_INDEX_CNTL_ADDR_7 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
711 };
712
713 static const u32 GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[] =
714 {
715         mmRLC_SRM_INDEX_CNTL_DATA_0 - mmRLC_SRM_INDEX_CNTL_DATA_0,
716         mmRLC_SRM_INDEX_CNTL_DATA_1 - mmRLC_SRM_INDEX_CNTL_DATA_0,
717         mmRLC_SRM_INDEX_CNTL_DATA_2 - mmRLC_SRM_INDEX_CNTL_DATA_0,
718         mmRLC_SRM_INDEX_CNTL_DATA_3 - mmRLC_SRM_INDEX_CNTL_DATA_0,
719         mmRLC_SRM_INDEX_CNTL_DATA_4 - mmRLC_SRM_INDEX_CNTL_DATA_0,
720         mmRLC_SRM_INDEX_CNTL_DATA_5 - mmRLC_SRM_INDEX_CNTL_DATA_0,
721         mmRLC_SRM_INDEX_CNTL_DATA_6 - mmRLC_SRM_INDEX_CNTL_DATA_0,
722         mmRLC_SRM_INDEX_CNTL_DATA_7 - mmRLC_SRM_INDEX_CNTL_DATA_0,
723 };
724
725 void gfx_v9_0_rlcg_wreg(struct amdgpu_device *adev, u32 offset, u32 v)
726 {
727         static void *scratch_reg0;
728         static void *scratch_reg1;
729         static void *scratch_reg2;
730         static void *scratch_reg3;
731         static void *spare_int;
732         static uint32_t grbm_cntl;
733         static uint32_t grbm_idx;
734
735         scratch_reg0 = adev->rmmio + (adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG0_BASE_IDX] + mmSCRATCH_REG0)*4;
736         scratch_reg1 = adev->rmmio + (adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG1_BASE_IDX] + mmSCRATCH_REG1)*4;
737         scratch_reg2 = adev->rmmio + (adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG1_BASE_IDX] + mmSCRATCH_REG2)*4;
738         scratch_reg3 = adev->rmmio + (adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG1_BASE_IDX] + mmSCRATCH_REG3)*4;
739         spare_int = adev->rmmio + (adev->reg_offset[GC_HWIP][0][mmRLC_SPARE_INT_BASE_IDX] + mmRLC_SPARE_INT)*4;
740
741         grbm_cntl = adev->reg_offset[GC_HWIP][0][mmGRBM_GFX_CNTL_BASE_IDX] + mmGRBM_GFX_CNTL;
742         grbm_idx = adev->reg_offset[GC_HWIP][0][mmGRBM_GFX_INDEX_BASE_IDX] + mmGRBM_GFX_INDEX;
743
744         if (amdgpu_sriov_runtime(adev)) {
745                 pr_err("shouldn't call rlcg write register during runtime\n");
746                 return;
747         }
748
749         if (offset == grbm_cntl || offset == grbm_idx) {
750                 if (offset  == grbm_cntl)
751                         writel(v, scratch_reg2);
752                 else if (offset == grbm_idx)
753                         writel(v, scratch_reg3);
754
755                 writel(v, ((void __iomem *)adev->rmmio) + (offset * 4));
756         } else {
757                 uint32_t i = 0;
758                 uint32_t retries = 50000;
759
760                 writel(v, scratch_reg0);
761                 writel(offset | 0x80000000, scratch_reg1);
762                 writel(1, spare_int);
763                 for (i = 0; i < retries; i++) {
764                         u32 tmp;
765
766                         tmp = readl(scratch_reg1);
767                         if (!(tmp & 0x80000000))
768                                 break;
769
770                         udelay(10);
771                 }
772                 if (i >= retries)
773                         pr_err("timeout: rlcg program reg:0x%05x failed !\n", offset);
774         }
775
776 }
777
778 #define VEGA10_GB_ADDR_CONFIG_GOLDEN 0x2a114042
779 #define VEGA12_GB_ADDR_CONFIG_GOLDEN 0x24104041
780 #define RAVEN_GB_ADDR_CONFIG_GOLDEN 0x24000042
781 #define RAVEN2_GB_ADDR_CONFIG_GOLDEN 0x26013041
782
783 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev);
784 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev);
785 static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev);
786 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev);
787 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
788                                  struct amdgpu_cu_info *cu_info);
789 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev);
790 static void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num, u32 instance);
791 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring);
792 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring);
793 static int gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev,
794                                           void *ras_error_status);
795 static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev,
796                                      void *inject_if);
797 static void gfx_v9_0_reset_ras_error_count(struct amdgpu_device *adev);
798
799 static void gfx_v9_0_kiq_set_resources(struct amdgpu_ring *kiq_ring,
800                                 uint64_t queue_mask)
801 {
802         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
803         amdgpu_ring_write(kiq_ring,
804                 PACKET3_SET_RESOURCES_VMID_MASK(0) |
805                 /* vmid_mask:0* queue_type:0 (KIQ) */
806                 PACKET3_SET_RESOURCES_QUEUE_TYPE(0));
807         amdgpu_ring_write(kiq_ring,
808                         lower_32_bits(queue_mask));     /* queue mask lo */
809         amdgpu_ring_write(kiq_ring,
810                         upper_32_bits(queue_mask));     /* queue mask hi */
811         amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */
812         amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */
813         amdgpu_ring_write(kiq_ring, 0); /* oac mask */
814         amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */
815 }
816
817 static void gfx_v9_0_kiq_map_queues(struct amdgpu_ring *kiq_ring,
818                                  struct amdgpu_ring *ring)
819 {
820         struct amdgpu_device *adev = kiq_ring->adev;
821         uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
822         uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
823         uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
824
825         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
826         /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
827         amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
828                          PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */
829                          PACKET3_MAP_QUEUES_VMID(0) | /* VMID */
830                          PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
831                          PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
832                          PACKET3_MAP_QUEUES_ME((ring->me == 1 ? 0 : 1)) |
833                          /*queue_type: normal compute queue */
834                          PACKET3_MAP_QUEUES_QUEUE_TYPE(0) |
835                          /* alloc format: all_on_one_pipe */
836                          PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) |
837                          PACKET3_MAP_QUEUES_ENGINE_SEL(eng_sel) |
838                          /* num_queues: must be 1 */
839                          PACKET3_MAP_QUEUES_NUM_QUEUES(1));
840         amdgpu_ring_write(kiq_ring,
841                         PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index));
842         amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
843         amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
844         amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
845         amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
846 }
847
848 static void gfx_v9_0_kiq_unmap_queues(struct amdgpu_ring *kiq_ring,
849                                    struct amdgpu_ring *ring,
850                                    enum amdgpu_unmap_queues_action action,
851                                    u64 gpu_addr, u64 seq)
852 {
853         uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
854
855         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
856         amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
857                           PACKET3_UNMAP_QUEUES_ACTION(action) |
858                           PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) |
859                           PACKET3_UNMAP_QUEUES_ENGINE_SEL(eng_sel) |
860                           PACKET3_UNMAP_QUEUES_NUM_QUEUES(1));
861         amdgpu_ring_write(kiq_ring,
862                         PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
863
864         if (action == PREEMPT_QUEUES_NO_UNMAP) {
865                 amdgpu_ring_write(kiq_ring, lower_32_bits(gpu_addr));
866                 amdgpu_ring_write(kiq_ring, upper_32_bits(gpu_addr));
867                 amdgpu_ring_write(kiq_ring, seq);
868         } else {
869                 amdgpu_ring_write(kiq_ring, 0);
870                 amdgpu_ring_write(kiq_ring, 0);
871                 amdgpu_ring_write(kiq_ring, 0);
872         }
873 }
874
875 static void gfx_v9_0_kiq_query_status(struct amdgpu_ring *kiq_ring,
876                                    struct amdgpu_ring *ring,
877                                    u64 addr,
878                                    u64 seq)
879 {
880         uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
881
882         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_QUERY_STATUS, 5));
883         amdgpu_ring_write(kiq_ring,
884                           PACKET3_QUERY_STATUS_CONTEXT_ID(0) |
885                           PACKET3_QUERY_STATUS_INTERRUPT_SEL(0) |
886                           PACKET3_QUERY_STATUS_COMMAND(2));
887         /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
888         amdgpu_ring_write(kiq_ring,
889                         PACKET3_QUERY_STATUS_DOORBELL_OFFSET(ring->doorbell_index) |
890                         PACKET3_QUERY_STATUS_ENG_SEL(eng_sel));
891         amdgpu_ring_write(kiq_ring, lower_32_bits(addr));
892         amdgpu_ring_write(kiq_ring, upper_32_bits(addr));
893         amdgpu_ring_write(kiq_ring, lower_32_bits(seq));
894         amdgpu_ring_write(kiq_ring, upper_32_bits(seq));
895 }
896
897 static void gfx_v9_0_kiq_invalidate_tlbs(struct amdgpu_ring *kiq_ring,
898                                 uint16_t pasid, uint32_t flush_type,
899                                 bool all_hub)
900 {
901         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0));
902         amdgpu_ring_write(kiq_ring,
903                         PACKET3_INVALIDATE_TLBS_DST_SEL(1) |
904                         PACKET3_INVALIDATE_TLBS_ALL_HUB(all_hub) |
905                         PACKET3_INVALIDATE_TLBS_PASID(pasid) |
906                         PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type));
907 }
908
909 static const struct kiq_pm4_funcs gfx_v9_0_kiq_pm4_funcs = {
910         .kiq_set_resources = gfx_v9_0_kiq_set_resources,
911         .kiq_map_queues = gfx_v9_0_kiq_map_queues,
912         .kiq_unmap_queues = gfx_v9_0_kiq_unmap_queues,
913         .kiq_query_status = gfx_v9_0_kiq_query_status,
914         .kiq_invalidate_tlbs = gfx_v9_0_kiq_invalidate_tlbs,
915         .set_resources_size = 8,
916         .map_queues_size = 7,
917         .unmap_queues_size = 6,
918         .query_status_size = 7,
919         .invalidate_tlbs_size = 2,
920 };
921
922 static void gfx_v9_0_set_kiq_pm4_funcs(struct amdgpu_device *adev)
923 {
924         adev->gfx.kiq.pmf = &gfx_v9_0_kiq_pm4_funcs;
925 }
926
927 static void gfx_v9_0_init_golden_registers(struct amdgpu_device *adev)
928 {
929         switch (adev->asic_type) {
930         case CHIP_VEGA10:
931                 soc15_program_register_sequence(adev,
932                                                 golden_settings_gc_9_0,
933                                                 ARRAY_SIZE(golden_settings_gc_9_0));
934                 soc15_program_register_sequence(adev,
935                                                 golden_settings_gc_9_0_vg10,
936                                                 ARRAY_SIZE(golden_settings_gc_9_0_vg10));
937                 break;
938         case CHIP_VEGA12:
939                 soc15_program_register_sequence(adev,
940                                                 golden_settings_gc_9_2_1,
941                                                 ARRAY_SIZE(golden_settings_gc_9_2_1));
942                 soc15_program_register_sequence(adev,
943                                                 golden_settings_gc_9_2_1_vg12,
944                                                 ARRAY_SIZE(golden_settings_gc_9_2_1_vg12));
945                 break;
946         case CHIP_VEGA20:
947                 soc15_program_register_sequence(adev,
948                                                 golden_settings_gc_9_0,
949                                                 ARRAY_SIZE(golden_settings_gc_9_0));
950                 soc15_program_register_sequence(adev,
951                                                 golden_settings_gc_9_0_vg20,
952                                                 ARRAY_SIZE(golden_settings_gc_9_0_vg20));
953                 break;
954         case CHIP_ARCTURUS:
955                 soc15_program_register_sequence(adev,
956                                                 golden_settings_gc_9_4_1_arct,
957                                                 ARRAY_SIZE(golden_settings_gc_9_4_1_arct));
958                 break;
959         case CHIP_RAVEN:
960                 soc15_program_register_sequence(adev, golden_settings_gc_9_1,
961                                                 ARRAY_SIZE(golden_settings_gc_9_1));
962                 if (adev->apu_flags & AMD_APU_IS_RAVEN2)
963                         soc15_program_register_sequence(adev,
964                                                         golden_settings_gc_9_1_rv2,
965                                                         ARRAY_SIZE(golden_settings_gc_9_1_rv2));
966                 else
967                         soc15_program_register_sequence(adev,
968                                                         golden_settings_gc_9_1_rv1,
969                                                         ARRAY_SIZE(golden_settings_gc_9_1_rv1));
970                 break;
971          case CHIP_RENOIR:
972                 soc15_program_register_sequence(adev,
973                                                 golden_settings_gc_9_1_rn,
974                                                 ARRAY_SIZE(golden_settings_gc_9_1_rn));
975                 return; /* for renoir, don't need common goldensetting */
976         default:
977                 break;
978         }
979
980         if (adev->asic_type != CHIP_ARCTURUS)
981                 soc15_program_register_sequence(adev, golden_settings_gc_9_x_common,
982                                                 (const u32)ARRAY_SIZE(golden_settings_gc_9_x_common));
983 }
984
985 static void gfx_v9_0_scratch_init(struct amdgpu_device *adev)
986 {
987         adev->gfx.scratch.num_reg = 8;
988         adev->gfx.scratch.reg_base = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0);
989         adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1;
990 }
991
992 static void gfx_v9_0_write_data_to_reg(struct amdgpu_ring *ring, int eng_sel,
993                                        bool wc, uint32_t reg, uint32_t val)
994 {
995         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
996         amdgpu_ring_write(ring, WRITE_DATA_ENGINE_SEL(eng_sel) |
997                                 WRITE_DATA_DST_SEL(0) |
998                                 (wc ? WR_CONFIRM : 0));
999         amdgpu_ring_write(ring, reg);
1000         amdgpu_ring_write(ring, 0);
1001         amdgpu_ring_write(ring, val);
1002 }
1003
1004 static void gfx_v9_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel,
1005                                   int mem_space, int opt, uint32_t addr0,
1006                                   uint32_t addr1, uint32_t ref, uint32_t mask,
1007                                   uint32_t inv)
1008 {
1009         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
1010         amdgpu_ring_write(ring,
1011                                  /* memory (1) or register (0) */
1012                                  (WAIT_REG_MEM_MEM_SPACE(mem_space) |
1013                                  WAIT_REG_MEM_OPERATION(opt) | /* wait */
1014                                  WAIT_REG_MEM_FUNCTION(3) |  /* equal */
1015                                  WAIT_REG_MEM_ENGINE(eng_sel)));
1016
1017         if (mem_space)
1018                 BUG_ON(addr0 & 0x3); /* Dword align */
1019         amdgpu_ring_write(ring, addr0);
1020         amdgpu_ring_write(ring, addr1);
1021         amdgpu_ring_write(ring, ref);
1022         amdgpu_ring_write(ring, mask);
1023         amdgpu_ring_write(ring, inv); /* poll interval */
1024 }
1025
1026 static int gfx_v9_0_ring_test_ring(struct amdgpu_ring *ring)
1027 {
1028         struct amdgpu_device *adev = ring->adev;
1029         uint32_t scratch;
1030         uint32_t tmp = 0;
1031         unsigned i;
1032         int r;
1033
1034         r = amdgpu_gfx_scratch_get(adev, &scratch);
1035         if (r)
1036                 return r;
1037
1038         WREG32(scratch, 0xCAFEDEAD);
1039         r = amdgpu_ring_alloc(ring, 3);
1040         if (r)
1041                 goto error_free_scratch;
1042
1043         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
1044         amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
1045         amdgpu_ring_write(ring, 0xDEADBEEF);
1046         amdgpu_ring_commit(ring);
1047
1048         for (i = 0; i < adev->usec_timeout; i++) {
1049                 tmp = RREG32(scratch);
1050                 if (tmp == 0xDEADBEEF)
1051                         break;
1052                 udelay(1);
1053         }
1054
1055         if (i >= adev->usec_timeout)
1056                 r = -ETIMEDOUT;
1057
1058 error_free_scratch:
1059         amdgpu_gfx_scratch_free(adev, scratch);
1060         return r;
1061 }
1062
1063 static int gfx_v9_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
1064 {
1065         struct amdgpu_device *adev = ring->adev;
1066         struct amdgpu_ib ib;
1067         struct dma_fence *f = NULL;
1068
1069         unsigned index;
1070         uint64_t gpu_addr;
1071         uint32_t tmp;
1072         long r;
1073
1074         r = amdgpu_device_wb_get(adev, &index);
1075         if (r)
1076                 return r;
1077
1078         gpu_addr = adev->wb.gpu_addr + (index * 4);
1079         adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
1080         memset(&ib, 0, sizeof(ib));
1081         r = amdgpu_ib_get(adev, NULL, 16,
1082                                         AMDGPU_IB_POOL_DIRECT, &ib);
1083         if (r)
1084                 goto err1;
1085
1086         ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3);
1087         ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
1088         ib.ptr[2] = lower_32_bits(gpu_addr);
1089         ib.ptr[3] = upper_32_bits(gpu_addr);
1090         ib.ptr[4] = 0xDEADBEEF;
1091         ib.length_dw = 5;
1092
1093         r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
1094         if (r)
1095                 goto err2;
1096
1097         r = dma_fence_wait_timeout(f, false, timeout);
1098         if (r == 0) {
1099                 r = -ETIMEDOUT;
1100                 goto err2;
1101         } else if (r < 0) {
1102                 goto err2;
1103         }
1104
1105         tmp = adev->wb.wb[index];
1106         if (tmp == 0xDEADBEEF)
1107                 r = 0;
1108         else
1109                 r = -EINVAL;
1110
1111 err2:
1112         amdgpu_ib_free(adev, &ib, NULL);
1113         dma_fence_put(f);
1114 err1:
1115         amdgpu_device_wb_free(adev, index);
1116         return r;
1117 }
1118
1119
1120 static void gfx_v9_0_free_microcode(struct amdgpu_device *adev)
1121 {
1122         release_firmware(adev->gfx.pfp_fw);
1123         adev->gfx.pfp_fw = NULL;
1124         release_firmware(adev->gfx.me_fw);
1125         adev->gfx.me_fw = NULL;
1126         release_firmware(adev->gfx.ce_fw);
1127         adev->gfx.ce_fw = NULL;
1128         release_firmware(adev->gfx.rlc_fw);
1129         adev->gfx.rlc_fw = NULL;
1130         release_firmware(adev->gfx.mec_fw);
1131         adev->gfx.mec_fw = NULL;
1132         release_firmware(adev->gfx.mec2_fw);
1133         adev->gfx.mec2_fw = NULL;
1134
1135         kfree(adev->gfx.rlc.register_list_format);
1136 }
1137
1138 static void gfx_v9_0_init_rlc_ext_microcode(struct amdgpu_device *adev)
1139 {
1140         const struct rlc_firmware_header_v2_1 *rlc_hdr;
1141
1142         rlc_hdr = (const struct rlc_firmware_header_v2_1 *)adev->gfx.rlc_fw->data;
1143         adev->gfx.rlc_srlc_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_ucode_ver);
1144         adev->gfx.rlc_srlc_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_feature_ver);
1145         adev->gfx.rlc.save_restore_list_cntl_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_cntl_size_bytes);
1146         adev->gfx.rlc.save_restore_list_cntl = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_cntl_offset_bytes);
1147         adev->gfx.rlc_srlg_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_ucode_ver);
1148         adev->gfx.rlc_srlg_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_feature_ver);
1149         adev->gfx.rlc.save_restore_list_gpm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_gpm_size_bytes);
1150         adev->gfx.rlc.save_restore_list_gpm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_gpm_offset_bytes);
1151         adev->gfx.rlc_srls_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_ucode_ver);
1152         adev->gfx.rlc_srls_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_feature_ver);
1153         adev->gfx.rlc.save_restore_list_srm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_srm_size_bytes);
1154         adev->gfx.rlc.save_restore_list_srm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_srm_offset_bytes);
1155         adev->gfx.rlc.reg_list_format_direct_reg_list_length =
1156                         le32_to_cpu(rlc_hdr->reg_list_format_direct_reg_list_length);
1157 }
1158
1159 static void gfx_v9_0_check_fw_write_wait(struct amdgpu_device *adev)
1160 {
1161         adev->gfx.me_fw_write_wait = false;
1162         adev->gfx.mec_fw_write_wait = false;
1163
1164         if ((adev->asic_type != CHIP_ARCTURUS) &&
1165             ((adev->gfx.mec_fw_version < 0x000001a5) ||
1166             (adev->gfx.mec_feature_version < 46) ||
1167             (adev->gfx.pfp_fw_version < 0x000000b7) ||
1168             (adev->gfx.pfp_feature_version < 46)))
1169                 DRM_WARN_ONCE("CP firmware version too old, please update!");
1170
1171         switch (adev->asic_type) {
1172         case CHIP_VEGA10:
1173                 if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1174                     (adev->gfx.me_feature_version >= 42) &&
1175                     (adev->gfx.pfp_fw_version >=  0x000000b1) &&
1176                     (adev->gfx.pfp_feature_version >= 42))
1177                         adev->gfx.me_fw_write_wait = true;
1178
1179                 if ((adev->gfx.mec_fw_version >=  0x00000193) &&
1180                     (adev->gfx.mec_feature_version >= 42))
1181                         adev->gfx.mec_fw_write_wait = true;
1182                 break;
1183         case CHIP_VEGA12:
1184                 if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1185                     (adev->gfx.me_feature_version >= 44) &&
1186                     (adev->gfx.pfp_fw_version >=  0x000000b2) &&
1187                     (adev->gfx.pfp_feature_version >= 44))
1188                         adev->gfx.me_fw_write_wait = true;
1189
1190                 if ((adev->gfx.mec_fw_version >=  0x00000196) &&
1191                     (adev->gfx.mec_feature_version >= 44))
1192                         adev->gfx.mec_fw_write_wait = true;
1193                 break;
1194         case CHIP_VEGA20:
1195                 if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1196                     (adev->gfx.me_feature_version >= 44) &&
1197                     (adev->gfx.pfp_fw_version >=  0x000000b2) &&
1198                     (adev->gfx.pfp_feature_version >= 44))
1199                         adev->gfx.me_fw_write_wait = true;
1200
1201                 if ((adev->gfx.mec_fw_version >=  0x00000197) &&
1202                     (adev->gfx.mec_feature_version >= 44))
1203                         adev->gfx.mec_fw_write_wait = true;
1204                 break;
1205         case CHIP_RAVEN:
1206                 if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1207                     (adev->gfx.me_feature_version >= 42) &&
1208                     (adev->gfx.pfp_fw_version >=  0x000000b1) &&
1209                     (adev->gfx.pfp_feature_version >= 42))
1210                         adev->gfx.me_fw_write_wait = true;
1211
1212                 if ((adev->gfx.mec_fw_version >=  0x00000192) &&
1213                     (adev->gfx.mec_feature_version >= 42))
1214                         adev->gfx.mec_fw_write_wait = true;
1215                 break;
1216         default:
1217                 adev->gfx.me_fw_write_wait = true;
1218                 adev->gfx.mec_fw_write_wait = true;
1219                 break;
1220         }
1221 }
1222
1223 struct amdgpu_gfxoff_quirk {
1224         u16 chip_vendor;
1225         u16 chip_device;
1226         u16 subsys_vendor;
1227         u16 subsys_device;
1228         u8 revision;
1229 };
1230
1231 static const struct amdgpu_gfxoff_quirk amdgpu_gfxoff_quirk_list[] = {
1232         /* https://bugzilla.kernel.org/show_bug.cgi?id=204689 */
1233         { 0x1002, 0x15dd, 0x1002, 0x15dd, 0xc8 },
1234         /* https://bugzilla.kernel.org/show_bug.cgi?id=207171 */
1235         { 0x1002, 0x15dd, 0x103c, 0x83e7, 0xd3 },
1236         /* GFXOFF is unstable on C6 parts with a VBIOS 113-RAVEN-114 */
1237         { 0x1002, 0x15dd, 0x1002, 0x15dd, 0xc6 },
1238         { 0, 0, 0, 0, 0 },
1239 };
1240
1241 static bool gfx_v9_0_should_disable_gfxoff(struct pci_dev *pdev)
1242 {
1243         const struct amdgpu_gfxoff_quirk *p = amdgpu_gfxoff_quirk_list;
1244
1245         while (p && p->chip_device != 0) {
1246                 if (pdev->vendor == p->chip_vendor &&
1247                     pdev->device == p->chip_device &&
1248                     pdev->subsystem_vendor == p->subsys_vendor &&
1249                     pdev->subsystem_device == p->subsys_device &&
1250                     pdev->revision == p->revision) {
1251                         return true;
1252                 }
1253                 ++p;
1254         }
1255         return false;
1256 }
1257
1258 static bool is_raven_kicker(struct amdgpu_device *adev)
1259 {
1260         if (adev->pm.fw_version >= 0x41e2b)
1261                 return true;
1262         else
1263                 return false;
1264 }
1265
1266 static void gfx_v9_0_check_if_need_gfxoff(struct amdgpu_device *adev)
1267 {
1268         if (gfx_v9_0_should_disable_gfxoff(adev->pdev))
1269                 adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
1270
1271         switch (adev->asic_type) {
1272         case CHIP_VEGA10:
1273         case CHIP_VEGA12:
1274         case CHIP_VEGA20:
1275                 break;
1276         case CHIP_RAVEN:
1277                 if (!((adev->apu_flags & AMD_APU_IS_RAVEN2) ||
1278                       (adev->apu_flags & AMD_APU_IS_PICASSO)) &&
1279                     ((!is_raven_kicker(adev) &&
1280                       adev->gfx.rlc_fw_version < 531) ||
1281                      (adev->gfx.rlc_feature_version < 1) ||
1282                      !adev->gfx.rlc.is_rlc_v2_1))
1283                         adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
1284
1285                 if (adev->pm.pp_feature & PP_GFXOFF_MASK)
1286                         adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG |
1287                                 AMD_PG_SUPPORT_CP |
1288                                 AMD_PG_SUPPORT_RLC_SMU_HS;
1289                 break;
1290         case CHIP_RENOIR:
1291                 if (adev->pm.pp_feature & PP_GFXOFF_MASK)
1292                         adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG |
1293                                 AMD_PG_SUPPORT_CP |
1294                                 AMD_PG_SUPPORT_RLC_SMU_HS;
1295                 break;
1296         default:
1297                 break;
1298         }
1299 }
1300
1301 static int gfx_v9_0_init_cp_gfx_microcode(struct amdgpu_device *adev,
1302                                           const char *chip_name)
1303 {
1304         char fw_name[30];
1305         int err;
1306         struct amdgpu_firmware_info *info = NULL;
1307         const struct common_firmware_header *header = NULL;
1308         const struct gfx_firmware_header_v1_0 *cp_hdr;
1309
1310         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
1311         err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
1312         if (err)
1313                 goto out;
1314         err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
1315         if (err)
1316                 goto out;
1317         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
1318         adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1319         adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1320
1321         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
1322         err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
1323         if (err)
1324                 goto out;
1325         err = amdgpu_ucode_validate(adev->gfx.me_fw);
1326         if (err)
1327                 goto out;
1328         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
1329         adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1330         adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1331
1332         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
1333         err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
1334         if (err)
1335                 goto out;
1336         err = amdgpu_ucode_validate(adev->gfx.ce_fw);
1337         if (err)
1338                 goto out;
1339         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
1340         adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1341         adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1342
1343         if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
1344                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
1345                 info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
1346                 info->fw = adev->gfx.pfp_fw;
1347                 header = (const struct common_firmware_header *)info->fw->data;
1348                 adev->firmware.fw_size +=
1349                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1350
1351                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
1352                 info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
1353                 info->fw = adev->gfx.me_fw;
1354                 header = (const struct common_firmware_header *)info->fw->data;
1355                 adev->firmware.fw_size +=
1356                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1357
1358                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
1359                 info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
1360                 info->fw = adev->gfx.ce_fw;
1361                 header = (const struct common_firmware_header *)info->fw->data;
1362                 adev->firmware.fw_size +=
1363                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1364         }
1365
1366 out:
1367         if (err) {
1368                 dev_err(adev->dev,
1369                         "gfx9: Failed to load firmware \"%s\"\n",
1370                         fw_name);
1371                 release_firmware(adev->gfx.pfp_fw);
1372                 adev->gfx.pfp_fw = NULL;
1373                 release_firmware(adev->gfx.me_fw);
1374                 adev->gfx.me_fw = NULL;
1375                 release_firmware(adev->gfx.ce_fw);
1376                 adev->gfx.ce_fw = NULL;
1377         }
1378         return err;
1379 }
1380
1381 static int gfx_v9_0_init_rlc_microcode(struct amdgpu_device *adev,
1382                                           const char *chip_name)
1383 {
1384         char fw_name[30];
1385         int err;
1386         struct amdgpu_firmware_info *info = NULL;
1387         const struct common_firmware_header *header = NULL;
1388         const struct rlc_firmware_header_v2_0 *rlc_hdr;
1389         unsigned int *tmp = NULL;
1390         unsigned int i = 0;
1391         uint16_t version_major;
1392         uint16_t version_minor;
1393         uint32_t smu_version;
1394
1395         /*
1396          * For Picasso && AM4 SOCKET board, we use picasso_rlc_am4.bin
1397          * instead of picasso_rlc.bin.
1398          * Judgment method:
1399          * PCO AM4: revision >= 0xC8 && revision <= 0xCF
1400          *          or revision >= 0xD8 && revision <= 0xDF
1401          * otherwise is PCO FP5
1402          */
1403         if (!strcmp(chip_name, "picasso") &&
1404                 (((adev->pdev->revision >= 0xC8) && (adev->pdev->revision <= 0xCF)) ||
1405                 ((adev->pdev->revision >= 0xD8) && (adev->pdev->revision <= 0xDF))))
1406                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc_am4.bin", chip_name);
1407         else if (!strcmp(chip_name, "raven") && (amdgpu_pm_load_smu_firmware(adev, &smu_version) == 0) &&
1408                 (smu_version >= 0x41e2b))
1409                 /**
1410                 *SMC is loaded by SBIOS on APU and it's able to get the SMU version directly.
1411                 */
1412                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_kicker_rlc.bin", chip_name);
1413         else
1414                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
1415         err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
1416         if (err)
1417                 goto out;
1418         err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
1419         rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
1420
1421         version_major = le16_to_cpu(rlc_hdr->header.header_version_major);
1422         version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor);
1423         if (version_major == 2 && version_minor == 1)
1424                 adev->gfx.rlc.is_rlc_v2_1 = true;
1425
1426         adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
1427         adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
1428         adev->gfx.rlc.save_and_restore_offset =
1429                         le32_to_cpu(rlc_hdr->save_and_restore_offset);
1430         adev->gfx.rlc.clear_state_descriptor_offset =
1431                         le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
1432         adev->gfx.rlc.avail_scratch_ram_locations =
1433                         le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
1434         adev->gfx.rlc.reg_restore_list_size =
1435                         le32_to_cpu(rlc_hdr->reg_restore_list_size);
1436         adev->gfx.rlc.reg_list_format_start =
1437                         le32_to_cpu(rlc_hdr->reg_list_format_start);
1438         adev->gfx.rlc.reg_list_format_separate_start =
1439                         le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
1440         adev->gfx.rlc.starting_offsets_start =
1441                         le32_to_cpu(rlc_hdr->starting_offsets_start);
1442         adev->gfx.rlc.reg_list_format_size_bytes =
1443                         le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
1444         adev->gfx.rlc.reg_list_size_bytes =
1445                         le32_to_cpu(rlc_hdr->reg_list_size_bytes);
1446         adev->gfx.rlc.register_list_format =
1447                         kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
1448                                 adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
1449         if (!adev->gfx.rlc.register_list_format) {
1450                 err = -ENOMEM;
1451                 goto out;
1452         }
1453
1454         tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1455                         le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
1456         for (i = 0 ; i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2); i++)
1457                 adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]);
1458
1459         adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
1460
1461         tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1462                         le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
1463         for (i = 0 ; i < (adev->gfx.rlc.reg_list_size_bytes >> 2); i++)
1464                 adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
1465
1466         if (adev->gfx.rlc.is_rlc_v2_1)
1467                 gfx_v9_0_init_rlc_ext_microcode(adev);
1468
1469         if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
1470                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
1471                 info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
1472                 info->fw = adev->gfx.rlc_fw;
1473                 header = (const struct common_firmware_header *)info->fw->data;
1474                 adev->firmware.fw_size +=
1475                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1476
1477                 if (adev->gfx.rlc.is_rlc_v2_1 &&
1478                     adev->gfx.rlc.save_restore_list_cntl_size_bytes &&
1479                     adev->gfx.rlc.save_restore_list_gpm_size_bytes &&
1480                     adev->gfx.rlc.save_restore_list_srm_size_bytes) {
1481                         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL];
1482                         info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL;
1483                         info->fw = adev->gfx.rlc_fw;
1484                         adev->firmware.fw_size +=
1485                                 ALIGN(adev->gfx.rlc.save_restore_list_cntl_size_bytes, PAGE_SIZE);
1486
1487                         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM];
1488                         info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM;
1489                         info->fw = adev->gfx.rlc_fw;
1490                         adev->firmware.fw_size +=
1491                                 ALIGN(adev->gfx.rlc.save_restore_list_gpm_size_bytes, PAGE_SIZE);
1492
1493                         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM];
1494                         info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM;
1495                         info->fw = adev->gfx.rlc_fw;
1496                         adev->firmware.fw_size +=
1497                                 ALIGN(adev->gfx.rlc.save_restore_list_srm_size_bytes, PAGE_SIZE);
1498                 }
1499         }
1500
1501 out:
1502         if (err) {
1503                 dev_err(adev->dev,
1504                         "gfx9: Failed to load firmware \"%s\"\n",
1505                         fw_name);
1506                 release_firmware(adev->gfx.rlc_fw);
1507                 adev->gfx.rlc_fw = NULL;
1508         }
1509         return err;
1510 }
1511
1512 static int gfx_v9_0_init_cp_compute_microcode(struct amdgpu_device *adev,
1513                                           const char *chip_name)
1514 {
1515         char fw_name[30];
1516         int err;
1517         struct amdgpu_firmware_info *info = NULL;
1518         const struct common_firmware_header *header = NULL;
1519         const struct gfx_firmware_header_v1_0 *cp_hdr;
1520
1521         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
1522         err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1523         if (err)
1524                 goto out;
1525         err = amdgpu_ucode_validate(adev->gfx.mec_fw);
1526         if (err)
1527                 goto out;
1528         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1529         adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1530         adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1531
1532
1533         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1534         err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1535         if (!err) {
1536                 err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
1537                 if (err)
1538                         goto out;
1539                 cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1540                 adev->gfx.mec2_fw->data;
1541                 adev->gfx.mec2_fw_version =
1542                 le32_to_cpu(cp_hdr->header.ucode_version);
1543                 adev->gfx.mec2_feature_version =
1544                 le32_to_cpu(cp_hdr->ucode_feature_version);
1545         } else {
1546                 err = 0;
1547                 adev->gfx.mec2_fw = NULL;
1548         }
1549
1550         if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
1551                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
1552                 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
1553                 info->fw = adev->gfx.mec_fw;
1554                 header = (const struct common_firmware_header *)info->fw->data;
1555                 cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data;
1556                 adev->firmware.fw_size +=
1557                         ALIGN(le32_to_cpu(header->ucode_size_bytes) - le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
1558
1559                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1_JT];
1560                 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1_JT;
1561                 info->fw = adev->gfx.mec_fw;
1562                 adev->firmware.fw_size +=
1563                         ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
1564
1565                 if (adev->gfx.mec2_fw) {
1566                         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
1567                         info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
1568                         info->fw = adev->gfx.mec2_fw;
1569                         header = (const struct common_firmware_header *)info->fw->data;
1570                         cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data;
1571                         adev->firmware.fw_size +=
1572                                 ALIGN(le32_to_cpu(header->ucode_size_bytes) - le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
1573
1574                         /* TODO: Determine if MEC2 JT FW loading can be removed
1575                                  for all GFX V9 asic and above */
1576                         if (adev->asic_type != CHIP_ARCTURUS &&
1577                             adev->asic_type != CHIP_RENOIR) {
1578                                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2_JT];
1579                                 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2_JT;
1580                                 info->fw = adev->gfx.mec2_fw;
1581                                 adev->firmware.fw_size +=
1582                                         ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4,
1583                                         PAGE_SIZE);
1584                         }
1585                 }
1586         }
1587
1588 out:
1589         gfx_v9_0_check_if_need_gfxoff(adev);
1590         gfx_v9_0_check_fw_write_wait(adev);
1591         if (err) {
1592                 dev_err(adev->dev,
1593                         "gfx9: Failed to load firmware \"%s\"\n",
1594                         fw_name);
1595                 release_firmware(adev->gfx.mec_fw);
1596                 adev->gfx.mec_fw = NULL;
1597                 release_firmware(adev->gfx.mec2_fw);
1598                 adev->gfx.mec2_fw = NULL;
1599         }
1600         return err;
1601 }
1602
1603 static int gfx_v9_0_init_microcode(struct amdgpu_device *adev)
1604 {
1605         const char *chip_name;
1606         int r;
1607
1608         DRM_DEBUG("\n");
1609
1610         switch (adev->asic_type) {
1611         case CHIP_VEGA10:
1612                 chip_name = "vega10";
1613                 break;
1614         case CHIP_VEGA12:
1615                 chip_name = "vega12";
1616                 break;
1617         case CHIP_VEGA20:
1618                 chip_name = "vega20";
1619                 break;
1620         case CHIP_RAVEN:
1621                 if (adev->apu_flags & AMD_APU_IS_RAVEN2)
1622                         chip_name = "raven2";
1623                 else if (adev->apu_flags & AMD_APU_IS_PICASSO)
1624                         chip_name = "picasso";
1625                 else
1626                         chip_name = "raven";
1627                 break;
1628         case CHIP_ARCTURUS:
1629                 chip_name = "arcturus";
1630                 break;
1631         case CHIP_RENOIR:
1632                 chip_name = "renoir";
1633                 break;
1634         default:
1635                 BUG();
1636         }
1637
1638         /* No CPG in Arcturus */
1639         if (adev->asic_type != CHIP_ARCTURUS) {
1640                 r = gfx_v9_0_init_cp_gfx_microcode(adev, chip_name);
1641                 if (r)
1642                         return r;
1643         }
1644
1645         r = gfx_v9_0_init_rlc_microcode(adev, chip_name);
1646         if (r)
1647                 return r;
1648
1649         r = gfx_v9_0_init_cp_compute_microcode(adev, chip_name);
1650         if (r)
1651                 return r;
1652
1653         return r;
1654 }
1655
1656 static u32 gfx_v9_0_get_csb_size(struct amdgpu_device *adev)
1657 {
1658         u32 count = 0;
1659         const struct cs_section_def *sect = NULL;
1660         const struct cs_extent_def *ext = NULL;
1661
1662         /* begin clear state */
1663         count += 2;
1664         /* context control state */
1665         count += 3;
1666
1667         for (sect = gfx9_cs_data; sect->section != NULL; ++sect) {
1668                 for (ext = sect->section; ext->extent != NULL; ++ext) {
1669                         if (sect->id == SECT_CONTEXT)
1670                                 count += 2 + ext->reg_count;
1671                         else
1672                                 return 0;
1673                 }
1674         }
1675
1676         /* end clear state */
1677         count += 2;
1678         /* clear state */
1679         count += 2;
1680
1681         return count;
1682 }
1683
1684 static void gfx_v9_0_get_csb_buffer(struct amdgpu_device *adev,
1685                                     volatile u32 *buffer)
1686 {
1687         u32 count = 0, i;
1688         const struct cs_section_def *sect = NULL;
1689         const struct cs_extent_def *ext = NULL;
1690
1691         if (adev->gfx.rlc.cs_data == NULL)
1692                 return;
1693         if (buffer == NULL)
1694                 return;
1695
1696         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1697         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1698
1699         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
1700         buffer[count++] = cpu_to_le32(0x80000000);
1701         buffer[count++] = cpu_to_le32(0x80000000);
1702
1703         for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
1704                 for (ext = sect->section; ext->extent != NULL; ++ext) {
1705                         if (sect->id == SECT_CONTEXT) {
1706                                 buffer[count++] =
1707                                         cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
1708                                 buffer[count++] = cpu_to_le32(ext->reg_index -
1709                                                 PACKET3_SET_CONTEXT_REG_START);
1710                                 for (i = 0; i < ext->reg_count; i++)
1711                                         buffer[count++] = cpu_to_le32(ext->extent[i]);
1712                         } else {
1713                                 return;
1714                         }
1715                 }
1716         }
1717
1718         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1719         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
1720
1721         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
1722         buffer[count++] = cpu_to_le32(0);
1723 }
1724
1725 static void gfx_v9_0_init_always_on_cu_mask(struct amdgpu_device *adev)
1726 {
1727         struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
1728         uint32_t pg_always_on_cu_num = 2;
1729         uint32_t always_on_cu_num;
1730         uint32_t i, j, k;
1731         uint32_t mask, cu_bitmap, counter;
1732
1733         if (adev->flags & AMD_IS_APU)
1734                 always_on_cu_num = 4;
1735         else if (adev->asic_type == CHIP_VEGA12)
1736                 always_on_cu_num = 8;
1737         else
1738                 always_on_cu_num = 12;
1739
1740         mutex_lock(&adev->grbm_idx_mutex);
1741         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
1742                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
1743                         mask = 1;
1744                         cu_bitmap = 0;
1745                         counter = 0;
1746                         gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
1747
1748                         for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
1749                                 if (cu_info->bitmap[i][j] & mask) {
1750                                         if (counter == pg_always_on_cu_num)
1751                                                 WREG32_SOC15(GC, 0, mmRLC_PG_ALWAYS_ON_CU_MASK, cu_bitmap);
1752                                         if (counter < always_on_cu_num)
1753                                                 cu_bitmap |= mask;
1754                                         else
1755                                                 break;
1756                                         counter++;
1757                                 }
1758                                 mask <<= 1;
1759                         }
1760
1761                         WREG32_SOC15(GC, 0, mmRLC_LB_ALWAYS_ACTIVE_CU_MASK, cu_bitmap);
1762                         cu_info->ao_cu_bitmap[i][j] = cu_bitmap;
1763                 }
1764         }
1765         gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1766         mutex_unlock(&adev->grbm_idx_mutex);
1767 }
1768
1769 static void gfx_v9_0_init_lbpw(struct amdgpu_device *adev)
1770 {
1771         uint32_t data;
1772
1773         /* set mmRLC_LB_THR_CONFIG_1/2/3/4 */
1774         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F);
1775         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x0333A5A7);
1776         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077);
1777         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x30 | 0x40 << 8 | 0x02FA << 16));
1778
1779         /* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */
1780         WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000);
1781
1782         /* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */
1783         WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000500);
1784
1785         mutex_lock(&adev->grbm_idx_mutex);
1786         /* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/
1787         gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1788         WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff);
1789
1790         /* set mmRLC_LB_PARAMS = 0x003F_1006 */
1791         data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003);
1792         data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010);
1793         data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F);
1794         WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data);
1795
1796         /* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */
1797         data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7);
1798         data &= 0x0000FFFF;
1799         data |= 0x00C00000;
1800         WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data);
1801
1802         /*
1803          * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xF (4 CUs AON for Raven),
1804          * programmed in gfx_v9_0_init_always_on_cu_mask()
1805          */
1806
1807         /* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved,
1808          * but used for RLC_LB_CNTL configuration */
1809         data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK;
1810         data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09);
1811         data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000);
1812         WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data);
1813         mutex_unlock(&adev->grbm_idx_mutex);
1814
1815         gfx_v9_0_init_always_on_cu_mask(adev);
1816 }
1817
1818 static void gfx_v9_4_init_lbpw(struct amdgpu_device *adev)
1819 {
1820         uint32_t data;
1821
1822         /* set mmRLC_LB_THR_CONFIG_1/2/3/4 */
1823         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F);
1824         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x033388F8);
1825         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077);
1826         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x10 | 0x27 << 8 | 0x02FA << 16));
1827
1828         /* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */
1829         WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000);
1830
1831         /* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */
1832         WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000800);
1833
1834         mutex_lock(&adev->grbm_idx_mutex);
1835         /* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/
1836         gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1837         WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff);
1838
1839         /* set mmRLC_LB_PARAMS = 0x003F_1006 */
1840         data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003);
1841         data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010);
1842         data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F);
1843         WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data);
1844
1845         /* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */
1846         data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7);
1847         data &= 0x0000FFFF;
1848         data |= 0x00C00000;
1849         WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data);
1850
1851         /*
1852          * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xFFF (12 CUs AON),
1853          * programmed in gfx_v9_0_init_always_on_cu_mask()
1854          */
1855
1856         /* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved,
1857          * but used for RLC_LB_CNTL configuration */
1858         data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK;
1859         data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09);
1860         data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000);
1861         WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data);
1862         mutex_unlock(&adev->grbm_idx_mutex);
1863
1864         gfx_v9_0_init_always_on_cu_mask(adev);
1865 }
1866
1867 static void gfx_v9_0_enable_lbpw(struct amdgpu_device *adev, bool enable)
1868 {
1869         WREG32_FIELD15(GC, 0, RLC_LB_CNTL, LOAD_BALANCE_ENABLE, enable ? 1 : 0);
1870 }
1871
1872 static int gfx_v9_0_cp_jump_table_num(struct amdgpu_device *adev)
1873 {
1874         return 5;
1875 }
1876
1877 static int gfx_v9_0_rlc_init(struct amdgpu_device *adev)
1878 {
1879         const struct cs_section_def *cs_data;
1880         int r;
1881
1882         adev->gfx.rlc.cs_data = gfx9_cs_data;
1883
1884         cs_data = adev->gfx.rlc.cs_data;
1885
1886         if (cs_data) {
1887                 /* init clear state block */
1888                 r = amdgpu_gfx_rlc_init_csb(adev);
1889                 if (r)
1890                         return r;
1891         }
1892
1893         if (adev->asic_type == CHIP_RAVEN || adev->asic_type == CHIP_RENOIR) {
1894                 /* TODO: double check the cp_table_size for RV */
1895                 adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
1896                 r = amdgpu_gfx_rlc_init_cpt(adev);
1897                 if (r)
1898                         return r;
1899         }
1900
1901         switch (adev->asic_type) {
1902         case CHIP_RAVEN:
1903                 gfx_v9_0_init_lbpw(adev);
1904                 break;
1905         case CHIP_VEGA20:
1906                 gfx_v9_4_init_lbpw(adev);
1907                 break;
1908         default:
1909                 break;
1910         }
1911
1912         /* init spm vmid with 0xf */
1913         if (adev->gfx.rlc.funcs->update_spm_vmid)
1914                 adev->gfx.rlc.funcs->update_spm_vmid(adev, 0xf);
1915
1916         return 0;
1917 }
1918
1919 static void gfx_v9_0_mec_fini(struct amdgpu_device *adev)
1920 {
1921         amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
1922         amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_obj, NULL, NULL);
1923 }
1924
1925 static int gfx_v9_0_mec_init(struct amdgpu_device *adev)
1926 {
1927         int r;
1928         u32 *hpd;
1929         const __le32 *fw_data;
1930         unsigned fw_size;
1931         u32 *fw;
1932         size_t mec_hpd_size;
1933
1934         const struct gfx_firmware_header_v1_0 *mec_hdr;
1935
1936         bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
1937
1938         /* take ownership of the relevant compute queues */
1939         amdgpu_gfx_compute_queue_acquire(adev);
1940         mec_hpd_size = adev->gfx.num_compute_rings * GFX9_MEC_HPD_SIZE;
1941
1942         r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
1943                                       AMDGPU_GEM_DOMAIN_VRAM,
1944                                       &adev->gfx.mec.hpd_eop_obj,
1945                                       &adev->gfx.mec.hpd_eop_gpu_addr,
1946                                       (void **)&hpd);
1947         if (r) {
1948                 dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
1949                 gfx_v9_0_mec_fini(adev);
1950                 return r;
1951         }
1952
1953         memset(hpd, 0, mec_hpd_size);
1954
1955         amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
1956         amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1957
1958         mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1959
1960         fw_data = (const __le32 *)
1961                 (adev->gfx.mec_fw->data +
1962                  le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
1963         fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
1964
1965         r = amdgpu_bo_create_reserved(adev, mec_hdr->header.ucode_size_bytes,
1966                                       PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
1967                                       &adev->gfx.mec.mec_fw_obj,
1968                                       &adev->gfx.mec.mec_fw_gpu_addr,
1969                                       (void **)&fw);
1970         if (r) {
1971                 dev_warn(adev->dev, "(%d) create mec firmware bo failed\n", r);
1972                 gfx_v9_0_mec_fini(adev);
1973                 return r;
1974         }
1975
1976         memcpy(fw, fw_data, fw_size);
1977
1978         amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj);
1979         amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj);
1980
1981         return 0;
1982 }
1983
1984 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
1985 {
1986         WREG32_SOC15_RLC(GC, 0, mmSQ_IND_INDEX,
1987                 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
1988                 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
1989                 (address << SQ_IND_INDEX__INDEX__SHIFT) |
1990                 (SQ_IND_INDEX__FORCE_READ_MASK));
1991         return RREG32_SOC15(GC, 0, mmSQ_IND_DATA);
1992 }
1993
1994 static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
1995                            uint32_t wave, uint32_t thread,
1996                            uint32_t regno, uint32_t num, uint32_t *out)
1997 {
1998         WREG32_SOC15_RLC(GC, 0, mmSQ_IND_INDEX,
1999                 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
2000                 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
2001                 (regno << SQ_IND_INDEX__INDEX__SHIFT) |
2002                 (thread << SQ_IND_INDEX__THREAD_ID__SHIFT) |
2003                 (SQ_IND_INDEX__FORCE_READ_MASK) |
2004                 (SQ_IND_INDEX__AUTO_INCR_MASK));
2005         while (num--)
2006                 *(out++) = RREG32_SOC15(GC, 0, mmSQ_IND_DATA);
2007 }
2008
2009 static void gfx_v9_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
2010 {
2011         /* type 1 wave data */
2012         dst[(*no_fields)++] = 1;
2013         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS);
2014         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO);
2015         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI);
2016         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO);
2017         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI);
2018         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID);
2019         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0);
2020         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1);
2021         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC);
2022         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC);
2023         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS);
2024         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS);
2025         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0);
2026         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0);
2027 }
2028
2029 static void gfx_v9_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
2030                                      uint32_t wave, uint32_t start,
2031                                      uint32_t size, uint32_t *dst)
2032 {
2033         wave_read_regs(
2034                 adev, simd, wave, 0,
2035                 start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
2036 }
2037
2038 static void gfx_v9_0_read_wave_vgprs(struct amdgpu_device *adev, uint32_t simd,
2039                                      uint32_t wave, uint32_t thread,
2040                                      uint32_t start, uint32_t size,
2041                                      uint32_t *dst)
2042 {
2043         wave_read_regs(
2044                 adev, simd, wave, thread,
2045                 start + SQIND_WAVE_VGPRS_OFFSET, size, dst);
2046 }
2047
2048 static void gfx_v9_0_select_me_pipe_q(struct amdgpu_device *adev,
2049                                   u32 me, u32 pipe, u32 q, u32 vm)
2050 {
2051         soc15_grbm_select(adev, me, pipe, q, vm);
2052 }
2053
2054 static const struct amdgpu_gfx_funcs gfx_v9_0_gfx_funcs = {
2055         .get_gpu_clock_counter = &gfx_v9_0_get_gpu_clock_counter,
2056         .select_se_sh = &gfx_v9_0_select_se_sh,
2057         .read_wave_data = &gfx_v9_0_read_wave_data,
2058         .read_wave_sgprs = &gfx_v9_0_read_wave_sgprs,
2059         .read_wave_vgprs = &gfx_v9_0_read_wave_vgprs,
2060         .select_me_pipe_q = &gfx_v9_0_select_me_pipe_q,
2061         .ras_error_inject = &gfx_v9_0_ras_error_inject,
2062         .query_ras_error_count = &gfx_v9_0_query_ras_error_count,
2063         .reset_ras_error_count = &gfx_v9_0_reset_ras_error_count,
2064 };
2065
2066 static const struct amdgpu_gfx_funcs gfx_v9_4_gfx_funcs = {
2067         .get_gpu_clock_counter = &gfx_v9_0_get_gpu_clock_counter,
2068         .select_se_sh = &gfx_v9_0_select_se_sh,
2069         .read_wave_data = &gfx_v9_0_read_wave_data,
2070         .read_wave_sgprs = &gfx_v9_0_read_wave_sgprs,
2071         .read_wave_vgprs = &gfx_v9_0_read_wave_vgprs,
2072         .select_me_pipe_q = &gfx_v9_0_select_me_pipe_q,
2073         .ras_error_inject = &gfx_v9_4_ras_error_inject,
2074         .query_ras_error_count = &gfx_v9_4_query_ras_error_count,
2075         .reset_ras_error_count = &gfx_v9_4_reset_ras_error_count,
2076 };
2077
2078 static int gfx_v9_0_gpu_early_init(struct amdgpu_device *adev)
2079 {
2080         u32 gb_addr_config;
2081         int err;
2082
2083         adev->gfx.funcs = &gfx_v9_0_gfx_funcs;
2084
2085         switch (adev->asic_type) {
2086         case CHIP_VEGA10:
2087                 adev->gfx.config.max_hw_contexts = 8;
2088                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2089                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2090                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2091                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2092                 gb_addr_config = VEGA10_GB_ADDR_CONFIG_GOLDEN;
2093                 break;
2094         case CHIP_VEGA12:
2095                 adev->gfx.config.max_hw_contexts = 8;
2096                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2097                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2098                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2099                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2100                 gb_addr_config = VEGA12_GB_ADDR_CONFIG_GOLDEN;
2101                 DRM_INFO("fix gfx.config for vega12\n");
2102                 break;
2103         case CHIP_VEGA20:
2104                 adev->gfx.config.max_hw_contexts = 8;
2105                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2106                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2107                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2108                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2109                 gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
2110                 gb_addr_config &= ~0xf3e777ff;
2111                 gb_addr_config |= 0x22014042;
2112                 /* check vbios table if gpu info is not available */
2113                 err = amdgpu_atomfirmware_get_gfx_info(adev);
2114                 if (err)
2115                         return err;
2116                 break;
2117         case CHIP_RAVEN:
2118                 adev->gfx.config.max_hw_contexts = 8;
2119                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2120                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2121                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2122                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2123                 if (adev->apu_flags & AMD_APU_IS_RAVEN2)
2124                         gb_addr_config = RAVEN2_GB_ADDR_CONFIG_GOLDEN;
2125                 else
2126                         gb_addr_config = RAVEN_GB_ADDR_CONFIG_GOLDEN;
2127                 break;
2128         case CHIP_ARCTURUS:
2129                 adev->gfx.funcs = &gfx_v9_4_gfx_funcs;
2130                 adev->gfx.config.max_hw_contexts = 8;
2131                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2132                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2133                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2134                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2135                 gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
2136                 gb_addr_config &= ~0xf3e777ff;
2137                 gb_addr_config |= 0x22014042;
2138                 break;
2139         case CHIP_RENOIR:
2140                 adev->gfx.config.max_hw_contexts = 8;
2141                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2142                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2143                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x80;
2144                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2145                 gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
2146                 gb_addr_config &= ~0xf3e777ff;
2147                 gb_addr_config |= 0x22010042;
2148                 break;
2149         default:
2150                 BUG();
2151                 break;
2152         }
2153
2154         adev->gfx.config.gb_addr_config = gb_addr_config;
2155
2156         adev->gfx.config.gb_addr_config_fields.num_pipes = 1 <<
2157                         REG_GET_FIELD(
2158                                         adev->gfx.config.gb_addr_config,
2159                                         GB_ADDR_CONFIG,
2160                                         NUM_PIPES);
2161
2162         adev->gfx.config.max_tile_pipes =
2163                 adev->gfx.config.gb_addr_config_fields.num_pipes;
2164
2165         adev->gfx.config.gb_addr_config_fields.num_banks = 1 <<
2166                         REG_GET_FIELD(
2167                                         adev->gfx.config.gb_addr_config,
2168                                         GB_ADDR_CONFIG,
2169                                         NUM_BANKS);
2170         adev->gfx.config.gb_addr_config_fields.max_compress_frags = 1 <<
2171                         REG_GET_FIELD(
2172                                         adev->gfx.config.gb_addr_config,
2173                                         GB_ADDR_CONFIG,
2174                                         MAX_COMPRESSED_FRAGS);
2175         adev->gfx.config.gb_addr_config_fields.num_rb_per_se = 1 <<
2176                         REG_GET_FIELD(
2177                                         adev->gfx.config.gb_addr_config,
2178                                         GB_ADDR_CONFIG,
2179                                         NUM_RB_PER_SE);
2180         adev->gfx.config.gb_addr_config_fields.num_se = 1 <<
2181                         REG_GET_FIELD(
2182                                         adev->gfx.config.gb_addr_config,
2183                                         GB_ADDR_CONFIG,
2184                                         NUM_SHADER_ENGINES);
2185         adev->gfx.config.gb_addr_config_fields.pipe_interleave_size = 1 << (8 +
2186                         REG_GET_FIELD(
2187                                         adev->gfx.config.gb_addr_config,
2188                                         GB_ADDR_CONFIG,
2189                                         PIPE_INTERLEAVE_SIZE));
2190
2191         return 0;
2192 }
2193
2194 static int gfx_v9_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
2195                                       int mec, int pipe, int queue)
2196 {
2197         int r;
2198         unsigned irq_type;
2199         struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
2200         unsigned int hw_prio;
2201
2202         ring = &adev->gfx.compute_ring[ring_id];
2203
2204         /* mec0 is me1 */
2205         ring->me = mec + 1;
2206         ring->pipe = pipe;
2207         ring->queue = queue;
2208
2209         ring->ring_obj = NULL;
2210         ring->use_doorbell = true;
2211         ring->doorbell_index = (adev->doorbell_index.mec_ring0 + ring_id) << 1;
2212         ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
2213                                 + (ring_id * GFX9_MEC_HPD_SIZE);
2214         sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
2215
2216         irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
2217                 + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
2218                 + ring->pipe;
2219         hw_prio = amdgpu_gfx_is_high_priority_compute_queue(adev, ring->queue) ?
2220                         AMDGPU_GFX_PIPE_PRIO_HIGH : AMDGPU_GFX_PIPE_PRIO_NORMAL;
2221         /* type-2 packets are deprecated on MEC, use type-3 instead */
2222         r = amdgpu_ring_init(adev, ring, 1024,
2223                              &adev->gfx.eop_irq, irq_type, hw_prio);
2224         if (r)
2225                 return r;
2226
2227
2228         return 0;
2229 }
2230
2231 static int gfx_v9_0_sw_init(void *handle)
2232 {
2233         int i, j, k, r, ring_id;
2234         struct amdgpu_ring *ring;
2235         struct amdgpu_kiq *kiq;
2236         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2237
2238         switch (adev->asic_type) {
2239         case CHIP_VEGA10:
2240         case CHIP_VEGA12:
2241         case CHIP_VEGA20:
2242         case CHIP_RAVEN:
2243         case CHIP_ARCTURUS:
2244         case CHIP_RENOIR:
2245                 adev->gfx.mec.num_mec = 2;
2246                 break;
2247         default:
2248                 adev->gfx.mec.num_mec = 1;
2249                 break;
2250         }
2251
2252         adev->gfx.mec.num_pipe_per_mec = 4;
2253         adev->gfx.mec.num_queue_per_pipe = 8;
2254
2255         /* EOP Event */
2256         r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_EOP_INTERRUPT, &adev->gfx.eop_irq);
2257         if (r)
2258                 return r;
2259
2260         /* Privileged reg */
2261         r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_REG_FAULT,
2262                               &adev->gfx.priv_reg_irq);
2263         if (r)
2264                 return r;
2265
2266         /* Privileged inst */
2267         r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_INSTR_FAULT,
2268                               &adev->gfx.priv_inst_irq);
2269         if (r)
2270                 return r;
2271
2272         /* ECC error */
2273         r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_ECC_ERROR,
2274                               &adev->gfx.cp_ecc_error_irq);
2275         if (r)
2276                 return r;
2277
2278         /* FUE error */
2279         r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_FUE_ERROR,
2280                               &adev->gfx.cp_ecc_error_irq);
2281         if (r)
2282                 return r;
2283
2284         adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
2285
2286         gfx_v9_0_scratch_init(adev);
2287
2288         r = gfx_v9_0_init_microcode(adev);
2289         if (r) {
2290                 DRM_ERROR("Failed to load gfx firmware!\n");
2291                 return r;
2292         }
2293
2294         r = adev->gfx.rlc.funcs->init(adev);
2295         if (r) {
2296                 DRM_ERROR("Failed to init rlc BOs!\n");
2297                 return r;
2298         }
2299
2300         r = gfx_v9_0_mec_init(adev);
2301         if (r) {
2302                 DRM_ERROR("Failed to init MEC BOs!\n");
2303                 return r;
2304         }
2305
2306         /* set up the gfx ring */
2307         for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
2308                 ring = &adev->gfx.gfx_ring[i];
2309                 ring->ring_obj = NULL;
2310                 if (!i)
2311                         sprintf(ring->name, "gfx");
2312                 else
2313                         sprintf(ring->name, "gfx_%d", i);
2314                 ring->use_doorbell = true;
2315                 ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1;
2316                 r = amdgpu_ring_init(adev, ring, 1024,
2317                                      &adev->gfx.eop_irq,
2318                                      AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP,
2319                                      AMDGPU_RING_PRIO_DEFAULT);
2320                 if (r)
2321                         return r;
2322         }
2323
2324         /* set up the compute queues - allocate horizontally across pipes */
2325         ring_id = 0;
2326         for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
2327                 for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
2328                         for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
2329                                 if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, j))
2330                                         continue;
2331
2332                                 r = gfx_v9_0_compute_ring_init(adev,
2333                                                                ring_id,
2334                                                                i, k, j);
2335                                 if (r)
2336                                         return r;
2337
2338                                 ring_id++;
2339                         }
2340                 }
2341         }
2342
2343         r = amdgpu_gfx_kiq_init(adev, GFX9_MEC_HPD_SIZE);
2344         if (r) {
2345                 DRM_ERROR("Failed to init KIQ BOs!\n");
2346                 return r;
2347         }
2348
2349         kiq = &adev->gfx.kiq;
2350         r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
2351         if (r)
2352                 return r;
2353
2354         /* create MQD for all compute queues as wel as KIQ for SRIOV case */
2355         r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct v9_mqd_allocation));
2356         if (r)
2357                 return r;
2358
2359         adev->gfx.ce_ram_size = 0x8000;
2360
2361         r = gfx_v9_0_gpu_early_init(adev);
2362         if (r)
2363                 return r;
2364
2365         return 0;
2366 }
2367
2368
2369 static int gfx_v9_0_sw_fini(void *handle)
2370 {
2371         int i;
2372         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2373
2374         amdgpu_gfx_ras_fini(adev);
2375
2376         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2377                 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
2378         for (i = 0; i < adev->gfx.num_compute_rings; i++)
2379                 amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
2380
2381         amdgpu_gfx_mqd_sw_fini(adev);
2382         amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring);
2383         amdgpu_gfx_kiq_fini(adev);
2384
2385         gfx_v9_0_mec_fini(adev);
2386         amdgpu_bo_unref(&adev->gfx.rlc.clear_state_obj);
2387         if (adev->asic_type == CHIP_RAVEN || adev->asic_type == CHIP_RENOIR) {
2388                 amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj,
2389                                 &adev->gfx.rlc.cp_table_gpu_addr,
2390                                 (void **)&adev->gfx.rlc.cp_table_ptr);
2391         }
2392         gfx_v9_0_free_microcode(adev);
2393
2394         return 0;
2395 }
2396
2397
2398 static void gfx_v9_0_tiling_mode_table_init(struct amdgpu_device *adev)
2399 {
2400         /* TODO */
2401 }
2402
2403 static void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num, u32 instance)
2404 {
2405         u32 data;
2406
2407         if (instance == 0xffffffff)
2408                 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
2409         else
2410                 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
2411
2412         if (se_num == 0xffffffff)
2413                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
2414         else
2415                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
2416
2417         if (sh_num == 0xffffffff)
2418                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
2419         else
2420                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
2421
2422         WREG32_SOC15_RLC_SHADOW(GC, 0, mmGRBM_GFX_INDEX, data);
2423 }
2424
2425 static u32 gfx_v9_0_get_rb_active_bitmap(struct amdgpu_device *adev)
2426 {
2427         u32 data, mask;
2428
2429         data = RREG32_SOC15(GC, 0, mmCC_RB_BACKEND_DISABLE);
2430         data |= RREG32_SOC15(GC, 0, mmGC_USER_RB_BACKEND_DISABLE);
2431
2432         data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK;
2433         data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT;
2434
2435         mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se /
2436                                          adev->gfx.config.max_sh_per_se);
2437
2438         return (~data) & mask;
2439 }
2440
2441 static void gfx_v9_0_setup_rb(struct amdgpu_device *adev)
2442 {
2443         int i, j;
2444         u32 data;
2445         u32 active_rbs = 0;
2446         u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
2447                                         adev->gfx.config.max_sh_per_se;
2448
2449         mutex_lock(&adev->grbm_idx_mutex);
2450         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
2451                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
2452                         gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
2453                         data = gfx_v9_0_get_rb_active_bitmap(adev);
2454                         active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
2455                                                rb_bitmap_width_per_sh);
2456                 }
2457         }
2458         gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
2459         mutex_unlock(&adev->grbm_idx_mutex);
2460
2461         adev->gfx.config.backend_enable_mask = active_rbs;
2462         adev->gfx.config.num_rbs = hweight32(active_rbs);
2463 }
2464
2465 #define DEFAULT_SH_MEM_BASES    (0x6000)
2466 #define FIRST_COMPUTE_VMID      (8)
2467 #define LAST_COMPUTE_VMID       (16)
2468 static void gfx_v9_0_init_compute_vmid(struct amdgpu_device *adev)
2469 {
2470         int i;
2471         uint32_t sh_mem_config;
2472         uint32_t sh_mem_bases;
2473
2474         /*
2475          * Configure apertures:
2476          * LDS:         0x60000000'00000000 - 0x60000001'00000000 (4GB)
2477          * Scratch:     0x60000001'00000000 - 0x60000002'00000000 (4GB)
2478          * GPUVM:       0x60010000'00000000 - 0x60020000'00000000 (1TB)
2479          */
2480         sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
2481
2482         sh_mem_config = SH_MEM_ADDRESS_MODE_64 |
2483                         SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
2484                         SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT;
2485
2486         mutex_lock(&adev->srbm_mutex);
2487         for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
2488                 soc15_grbm_select(adev, 0, 0, 0, i);
2489                 /* CP and shaders */
2490                 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, sh_mem_config);
2491                 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, sh_mem_bases);
2492         }
2493         soc15_grbm_select(adev, 0, 0, 0, 0);
2494         mutex_unlock(&adev->srbm_mutex);
2495
2496         /* Initialize all compute VMIDs to have no GDS, GWS, or OA
2497            acccess. These should be enabled by FW for target VMIDs. */
2498         for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
2499                 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * i, 0);
2500                 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * i, 0);
2501                 WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, i, 0);
2502                 WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, i, 0);
2503         }
2504 }
2505
2506 static void gfx_v9_0_init_gds_vmid(struct amdgpu_device *adev)
2507 {
2508         int vmid;
2509
2510         /*
2511          * Initialize all compute and user-gfx VMIDs to have no GDS, GWS, or OA
2512          * access. Compute VMIDs should be enabled by FW for target VMIDs,
2513          * the driver can enable them for graphics. VMID0 should maintain
2514          * access so that HWS firmware can save/restore entries.
2515          */
2516         for (vmid = 1; vmid < 16; vmid++) {
2517                 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * vmid, 0);
2518                 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * vmid, 0);
2519                 WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, vmid, 0);
2520                 WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, vmid, 0);
2521         }
2522 }
2523
2524 static void gfx_v9_0_init_sq_config(struct amdgpu_device *adev)
2525 {
2526         uint32_t tmp;
2527
2528         switch (adev->asic_type) {
2529         case CHIP_ARCTURUS:
2530                 tmp = RREG32_SOC15(GC, 0, mmSQ_CONFIG);
2531                 tmp = REG_SET_FIELD(tmp, SQ_CONFIG,
2532                                         DISABLE_BARRIER_WAITCNT, 1);
2533                 WREG32_SOC15(GC, 0, mmSQ_CONFIG, tmp);
2534                 break;
2535         default:
2536                 break;
2537         }
2538 }
2539
2540 static void gfx_v9_0_constants_init(struct amdgpu_device *adev)
2541 {
2542         u32 tmp;
2543         int i;
2544
2545         WREG32_FIELD15_RLC(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff);
2546
2547         gfx_v9_0_tiling_mode_table_init(adev);
2548
2549         gfx_v9_0_setup_rb(adev);
2550         gfx_v9_0_get_cu_info(adev, &adev->gfx.cu_info);
2551         adev->gfx.config.db_debug2 = RREG32_SOC15(GC, 0, mmDB_DEBUG2);
2552
2553         /* XXX SH_MEM regs */
2554         /* where to put LDS, scratch, GPUVM in FSA64 space */
2555         mutex_lock(&adev->srbm_mutex);
2556         for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB_0].num_ids; i++) {
2557                 soc15_grbm_select(adev, 0, 0, 0, i);
2558                 /* CP and shaders */
2559                 if (i == 0) {
2560                         tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
2561                                             SH_MEM_ALIGNMENT_MODE_UNALIGNED);
2562                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE,
2563                                             !!amdgpu_noretry);
2564                         WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp);
2565                         WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, 0);
2566                 } else {
2567                         tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
2568                                             SH_MEM_ALIGNMENT_MODE_UNALIGNED);
2569                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE,
2570                                             !!amdgpu_noretry);
2571                         WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp);
2572                         tmp = REG_SET_FIELD(0, SH_MEM_BASES, PRIVATE_BASE,
2573                                 (adev->gmc.private_aperture_start >> 48));
2574                         tmp = REG_SET_FIELD(tmp, SH_MEM_BASES, SHARED_BASE,
2575                                 (adev->gmc.shared_aperture_start >> 48));
2576                         WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, tmp);
2577                 }
2578         }
2579         soc15_grbm_select(adev, 0, 0, 0, 0);
2580
2581         mutex_unlock(&adev->srbm_mutex);
2582
2583         gfx_v9_0_init_compute_vmid(adev);
2584         gfx_v9_0_init_gds_vmid(adev);
2585         gfx_v9_0_init_sq_config(adev);
2586 }
2587
2588 static void gfx_v9_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
2589 {
2590         u32 i, j, k;
2591         u32 mask;
2592
2593         mutex_lock(&adev->grbm_idx_mutex);
2594         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
2595                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
2596                         gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
2597                         for (k = 0; k < adev->usec_timeout; k++) {
2598                                 if (RREG32_SOC15(GC, 0, mmRLC_SERDES_CU_MASTER_BUSY) == 0)
2599                                         break;
2600                                 udelay(1);
2601                         }
2602                         if (k == adev->usec_timeout) {
2603                                 gfx_v9_0_select_se_sh(adev, 0xffffffff,
2604                                                       0xffffffff, 0xffffffff);
2605                                 mutex_unlock(&adev->grbm_idx_mutex);
2606                                 DRM_INFO("Timeout wait for RLC serdes %u,%u\n",
2607                                          i, j);
2608                                 return;
2609                         }
2610                 }
2611         }
2612         gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
2613         mutex_unlock(&adev->grbm_idx_mutex);
2614
2615         mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
2616                 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
2617                 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
2618                 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
2619         for (k = 0; k < adev->usec_timeout; k++) {
2620                 if ((RREG32_SOC15(GC, 0, mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
2621                         break;
2622                 udelay(1);
2623         }
2624 }
2625
2626 static void gfx_v9_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
2627                                                bool enable)
2628 {
2629         u32 tmp = RREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0);
2630
2631         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
2632         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
2633         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
2634         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
2635
2636         WREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0, tmp);
2637 }
2638
2639 static void gfx_v9_0_init_csb(struct amdgpu_device *adev)
2640 {
2641         adev->gfx.rlc.funcs->get_csb_buffer(adev, adev->gfx.rlc.cs_ptr);
2642         /* csib */
2643         WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_HI),
2644                         adev->gfx.rlc.clear_state_gpu_addr >> 32);
2645         WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_LO),
2646                         adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
2647         WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_LENGTH),
2648                         adev->gfx.rlc.clear_state_size);
2649 }
2650
2651 static void gfx_v9_1_parse_ind_reg_list(int *register_list_format,
2652                                 int indirect_offset,
2653                                 int list_size,
2654                                 int *unique_indirect_regs,
2655                                 int unique_indirect_reg_count,
2656                                 int *indirect_start_offsets,
2657                                 int *indirect_start_offsets_count,
2658                                 int max_start_offsets_count)
2659 {
2660         int idx;
2661
2662         for (; indirect_offset < list_size; indirect_offset++) {
2663                 WARN_ON(*indirect_start_offsets_count >= max_start_offsets_count);
2664                 indirect_start_offsets[*indirect_start_offsets_count] = indirect_offset;
2665                 *indirect_start_offsets_count = *indirect_start_offsets_count + 1;
2666
2667                 while (register_list_format[indirect_offset] != 0xFFFFFFFF) {
2668                         indirect_offset += 2;
2669
2670                         /* look for the matching indice */
2671                         for (idx = 0; idx < unique_indirect_reg_count; idx++) {
2672                                 if (unique_indirect_regs[idx] ==
2673                                         register_list_format[indirect_offset] ||
2674                                         !unique_indirect_regs[idx])
2675                                         break;
2676                         }
2677
2678                         BUG_ON(idx >= unique_indirect_reg_count);
2679
2680                         if (!unique_indirect_regs[idx])
2681                                 unique_indirect_regs[idx] = register_list_format[indirect_offset];
2682
2683                         indirect_offset++;
2684                 }
2685         }
2686 }
2687
2688 static int gfx_v9_1_init_rlc_save_restore_list(struct amdgpu_device *adev)
2689 {
2690         int unique_indirect_regs[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
2691         int unique_indirect_reg_count = 0;
2692
2693         int indirect_start_offsets[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
2694         int indirect_start_offsets_count = 0;
2695
2696         int list_size = 0;
2697         int i = 0, j = 0;
2698         u32 tmp = 0;
2699
2700         u32 *register_list_format =
2701                 kmemdup(adev->gfx.rlc.register_list_format,
2702                         adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
2703         if (!register_list_format)
2704                 return -ENOMEM;
2705
2706         /* setup unique_indirect_regs array and indirect_start_offsets array */
2707         unique_indirect_reg_count = ARRAY_SIZE(unique_indirect_regs);
2708         gfx_v9_1_parse_ind_reg_list(register_list_format,
2709                                     adev->gfx.rlc.reg_list_format_direct_reg_list_length,
2710                                     adev->gfx.rlc.reg_list_format_size_bytes >> 2,
2711                                     unique_indirect_regs,
2712                                     unique_indirect_reg_count,
2713                                     indirect_start_offsets,
2714                                     &indirect_start_offsets_count,
2715                                     ARRAY_SIZE(indirect_start_offsets));
2716
2717         /* enable auto inc in case it is disabled */
2718         tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL));
2719         tmp |= RLC_SRM_CNTL__AUTO_INCR_ADDR_MASK;
2720         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL), tmp);
2721
2722         /* write register_restore table to offset 0x0 using RLC_SRM_ARAM_ADDR/DATA */
2723         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_ADDR),
2724                 RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET);
2725         for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
2726                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_DATA),
2727                         adev->gfx.rlc.register_restore[i]);
2728
2729         /* load indirect register */
2730         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2731                 adev->gfx.rlc.reg_list_format_start);
2732
2733         /* direct register portion */
2734         for (i = 0; i < adev->gfx.rlc.reg_list_format_direct_reg_list_length; i++)
2735                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA),
2736                         register_list_format[i]);
2737
2738         /* indirect register portion */
2739         while (i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2)) {
2740                 if (register_list_format[i] == 0xFFFFFFFF) {
2741                         WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2742                         continue;
2743                 }
2744
2745                 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2746                 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2747
2748                 for (j = 0; j < unique_indirect_reg_count; j++) {
2749                         if (register_list_format[i] == unique_indirect_regs[j]) {
2750                                 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, j);
2751                                 break;
2752                         }
2753                 }
2754
2755                 BUG_ON(j >= unique_indirect_reg_count);
2756
2757                 i++;
2758         }
2759
2760         /* set save/restore list size */
2761         list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
2762         list_size = list_size >> 1;
2763         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2764                 adev->gfx.rlc.reg_restore_list_size);
2765         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA), list_size);
2766
2767         /* write the starting offsets to RLC scratch ram */
2768         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2769                 adev->gfx.rlc.starting_offsets_start);
2770         for (i = 0; i < ARRAY_SIZE(indirect_start_offsets); i++)
2771                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA),
2772                        indirect_start_offsets[i]);
2773
2774         /* load unique indirect regs*/
2775         for (i = 0; i < ARRAY_SIZE(unique_indirect_regs); i++) {
2776                 if (unique_indirect_regs[i] != 0) {
2777                         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_ADDR_0)
2778                                + GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[i],
2779                                unique_indirect_regs[i] & 0x3FFFF);
2780
2781                         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_DATA_0)
2782                                + GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[i],
2783                                unique_indirect_regs[i] >> 20);
2784                 }
2785         }
2786
2787         kfree(register_list_format);
2788         return 0;
2789 }
2790
2791 static void gfx_v9_0_enable_save_restore_machine(struct amdgpu_device *adev)
2792 {
2793         WREG32_FIELD15(GC, 0, RLC_SRM_CNTL, SRM_ENABLE, 1);
2794 }
2795
2796 static void pwr_10_0_gfxip_control_over_cgpg(struct amdgpu_device *adev,
2797                                              bool enable)
2798 {
2799         uint32_t data = 0;
2800         uint32_t default_data = 0;
2801
2802         default_data = data = RREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS));
2803         if (enable == true) {
2804                 /* enable GFXIP control over CGPG */
2805                 data |= PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK;
2806                 if(default_data != data)
2807                         WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2808
2809                 /* update status */
2810                 data &= ~PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK;
2811                 data |= (2 << PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT);
2812                 if(default_data != data)
2813                         WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2814         } else {
2815                 /* restore GFXIP control over GCPG */
2816                 data &= ~PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK;
2817                 if(default_data != data)
2818                         WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2819         }
2820 }
2821
2822 static void gfx_v9_0_init_gfx_power_gating(struct amdgpu_device *adev)
2823 {
2824         uint32_t data = 0;
2825
2826         if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
2827                               AMD_PG_SUPPORT_GFX_SMG |
2828                               AMD_PG_SUPPORT_GFX_DMG)) {
2829                 /* init IDLE_POLL_COUNT = 60 */
2830                 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL));
2831                 data &= ~CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT_MASK;
2832                 data |= (0x60 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
2833                 WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL), data);
2834
2835                 /* init RLC PG Delay */
2836                 data = 0;
2837                 data |= (0x10 << RLC_PG_DELAY__POWER_UP_DELAY__SHIFT);
2838                 data |= (0x10 << RLC_PG_DELAY__POWER_DOWN_DELAY__SHIFT);
2839                 data |= (0x10 << RLC_PG_DELAY__CMD_PROPAGATE_DELAY__SHIFT);
2840                 data |= (0x40 << RLC_PG_DELAY__MEM_SLEEP_DELAY__SHIFT);
2841                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY), data);
2842
2843                 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2));
2844                 data &= ~RLC_PG_DELAY_2__SERDES_CMD_DELAY_MASK;
2845                 data |= (0x4 << RLC_PG_DELAY_2__SERDES_CMD_DELAY__SHIFT);
2846                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2), data);
2847
2848                 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3));
2849                 data &= ~RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG_MASK;
2850                 data |= (0xff << RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG__SHIFT);
2851                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3), data);
2852
2853                 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL));
2854                 data &= ~RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD_MASK;
2855
2856                 /* program GRBM_REG_SAVE_GFX_IDLE_THRESHOLD to 0x55f0 */
2857                 data |= (0x55f0 << RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD__SHIFT);
2858                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL), data);
2859
2860                 pwr_10_0_gfxip_control_over_cgpg(adev, true);
2861         }
2862 }
2863
2864 static void gfx_v9_0_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
2865                                                 bool enable)
2866 {
2867         uint32_t data = 0;
2868         uint32_t default_data = 0;
2869
2870         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2871         data = REG_SET_FIELD(data, RLC_PG_CNTL,
2872                              SMU_CLK_SLOWDOWN_ON_PU_ENABLE,
2873                              enable ? 1 : 0);
2874         if (default_data != data)
2875                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2876 }
2877
2878 static void gfx_v9_0_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
2879                                                 bool enable)
2880 {
2881         uint32_t data = 0;
2882         uint32_t default_data = 0;
2883
2884         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2885         data = REG_SET_FIELD(data, RLC_PG_CNTL,
2886                              SMU_CLK_SLOWDOWN_ON_PD_ENABLE,
2887                              enable ? 1 : 0);
2888         if(default_data != data)
2889                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2890 }
2891
2892 static void gfx_v9_0_enable_cp_power_gating(struct amdgpu_device *adev,
2893                                         bool enable)
2894 {
2895         uint32_t data = 0;
2896         uint32_t default_data = 0;
2897
2898         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2899         data = REG_SET_FIELD(data, RLC_PG_CNTL,
2900                              CP_PG_DISABLE,
2901                              enable ? 0 : 1);
2902         if(default_data != data)
2903                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2904 }
2905
2906 static void gfx_v9_0_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
2907                                                 bool enable)
2908 {
2909         uint32_t data, default_data;
2910
2911         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2912         data = REG_SET_FIELD(data, RLC_PG_CNTL,
2913                              GFX_POWER_GATING_ENABLE,
2914                              enable ? 1 : 0);
2915         if(default_data != data)
2916                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2917 }
2918
2919 static void gfx_v9_0_enable_gfx_pipeline_powergating(struct amdgpu_device *adev,
2920                                                 bool enable)
2921 {
2922         uint32_t data, default_data;
2923
2924         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2925         data = REG_SET_FIELD(data, RLC_PG_CNTL,
2926                              GFX_PIPELINE_PG_ENABLE,
2927                              enable ? 1 : 0);
2928         if(default_data != data)
2929                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2930
2931         if (!enable)
2932                 /* read any GFX register to wake up GFX */
2933                 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmDB_RENDER_CONTROL));
2934 }
2935
2936 static void gfx_v9_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
2937                                                        bool enable)
2938 {
2939         uint32_t data, default_data;
2940
2941         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2942         data = REG_SET_FIELD(data, RLC_PG_CNTL,
2943                              STATIC_PER_CU_PG_ENABLE,
2944                              enable ? 1 : 0);
2945         if(default_data != data)
2946                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2947 }
2948
2949 static void gfx_v9_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
2950                                                 bool enable)
2951 {
2952         uint32_t data, default_data;
2953
2954         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2955         data = REG_SET_FIELD(data, RLC_PG_CNTL,
2956                              DYN_PER_CU_PG_ENABLE,
2957                              enable ? 1 : 0);
2958         if(default_data != data)
2959                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2960 }
2961
2962 static void gfx_v9_0_init_pg(struct amdgpu_device *adev)
2963 {
2964         gfx_v9_0_init_csb(adev);
2965
2966         /*
2967          * Rlc save restore list is workable since v2_1.
2968          * And it's needed by gfxoff feature.
2969          */
2970         if (adev->gfx.rlc.is_rlc_v2_1) {
2971                 if (adev->asic_type == CHIP_VEGA12 ||
2972                     (adev->apu_flags & AMD_APU_IS_RAVEN2))
2973                         gfx_v9_1_init_rlc_save_restore_list(adev);
2974                 gfx_v9_0_enable_save_restore_machine(adev);
2975         }
2976
2977         if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
2978                               AMD_PG_SUPPORT_GFX_SMG |
2979                               AMD_PG_SUPPORT_GFX_DMG |
2980                               AMD_PG_SUPPORT_CP |
2981                               AMD_PG_SUPPORT_GDS |
2982                               AMD_PG_SUPPORT_RLC_SMU_HS)) {
2983                 WREG32(mmRLC_JUMP_TABLE_RESTORE,
2984                        adev->gfx.rlc.cp_table_gpu_addr >> 8);
2985                 gfx_v9_0_init_gfx_power_gating(adev);
2986         }
2987 }
2988
2989 void gfx_v9_0_rlc_stop(struct amdgpu_device *adev)
2990 {
2991         WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 0);
2992         gfx_v9_0_enable_gui_idle_interrupt(adev, false);
2993         gfx_v9_0_wait_for_rlc_serdes(adev);
2994 }
2995
2996 static void gfx_v9_0_rlc_reset(struct amdgpu_device *adev)
2997 {
2998         WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
2999         udelay(50);
3000         WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
3001         udelay(50);
3002 }
3003
3004 static void gfx_v9_0_rlc_start(struct amdgpu_device *adev)
3005 {
3006 #ifdef AMDGPU_RLC_DEBUG_RETRY
3007         u32 rlc_ucode_ver;
3008 #endif
3009
3010         WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 1);
3011         udelay(50);
3012
3013         /* carrizo do enable cp interrupt after cp inited */
3014         if (!(adev->flags & AMD_IS_APU)) {
3015                 gfx_v9_0_enable_gui_idle_interrupt(adev, true);
3016                 udelay(50);
3017         }
3018
3019 #ifdef AMDGPU_RLC_DEBUG_RETRY
3020         /* RLC_GPM_GENERAL_6 : RLC Ucode version */
3021         rlc_ucode_ver = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_6);
3022         if(rlc_ucode_ver == 0x108) {
3023                 DRM_INFO("Using rlc debug ucode. mmRLC_GPM_GENERAL_6 ==0x08%x / fw_ver == %i \n",
3024                                 rlc_ucode_ver, adev->gfx.rlc_fw_version);
3025                 /* RLC_GPM_TIMER_INT_3 : Timer interval in RefCLK cycles,
3026                  * default is 0x9C4 to create a 100us interval */
3027                 WREG32_SOC15(GC, 0, mmRLC_GPM_TIMER_INT_3, 0x9C4);
3028                 /* RLC_GPM_GENERAL_12 : Minimum gap between wptr and rptr
3029                  * to disable the page fault retry interrupts, default is
3030                  * 0x100 (256) */
3031                 WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_12, 0x100);
3032         }
3033 #endif
3034 }
3035
3036 static int gfx_v9_0_rlc_load_microcode(struct amdgpu_device *adev)
3037 {
3038         const struct rlc_firmware_header_v2_0 *hdr;
3039         const __le32 *fw_data;
3040         unsigned i, fw_size;
3041
3042         if (!adev->gfx.rlc_fw)
3043                 return -EINVAL;
3044
3045         hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
3046         amdgpu_ucode_print_rlc_hdr(&hdr->header);
3047
3048         fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
3049                            le32_to_cpu(hdr->header.ucode_array_offset_bytes));
3050         fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
3051
3052         WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR,
3053                         RLCG_UCODE_LOADING_START_ADDRESS);
3054         for (i = 0; i < fw_size; i++)
3055                 WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
3056         WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
3057
3058         return 0;
3059 }
3060
3061 static int gfx_v9_0_rlc_resume(struct amdgpu_device *adev)
3062 {
3063         int r;
3064
3065         if (amdgpu_sriov_vf(adev)) {
3066                 gfx_v9_0_init_csb(adev);
3067                 return 0;
3068         }
3069
3070         adev->gfx.rlc.funcs->stop(adev);
3071
3072         /* disable CG */
3073         WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, 0);
3074
3075         gfx_v9_0_init_pg(adev);
3076
3077         if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
3078                 /* legacy rlc firmware loading */
3079                 r = gfx_v9_0_rlc_load_microcode(adev);
3080                 if (r)
3081                         return r;
3082         }
3083
3084         switch (adev->asic_type) {
3085         case CHIP_RAVEN:
3086                 if (amdgpu_lbpw == 0)
3087                         gfx_v9_0_enable_lbpw(adev, false);
3088                 else
3089                         gfx_v9_0_enable_lbpw(adev, true);
3090                 break;
3091         case CHIP_VEGA20:
3092                 if (amdgpu_lbpw > 0)
3093                         gfx_v9_0_enable_lbpw(adev, true);
3094                 else
3095                         gfx_v9_0_enable_lbpw(adev, false);
3096                 break;
3097         default:
3098                 break;
3099         }
3100
3101         adev->gfx.rlc.funcs->start(adev);
3102
3103         return 0;
3104 }
3105
3106 static void gfx_v9_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
3107 {
3108         u32 tmp = RREG32_SOC15(GC, 0, mmCP_ME_CNTL);
3109
3110         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, enable ? 0 : 1);
3111         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, enable ? 0 : 1);
3112         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, enable ? 0 : 1);
3113         WREG32_SOC15_RLC(GC, 0, mmCP_ME_CNTL, tmp);
3114         udelay(50);
3115 }
3116
3117 static int gfx_v9_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
3118 {
3119         const struct gfx_firmware_header_v1_0 *pfp_hdr;
3120         const struct gfx_firmware_header_v1_0 *ce_hdr;
3121         const struct gfx_firmware_header_v1_0 *me_hdr;
3122         const __le32 *fw_data;
3123         unsigned i, fw_size;
3124
3125         if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
3126                 return -EINVAL;
3127
3128         pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
3129                 adev->gfx.pfp_fw->data;
3130         ce_hdr = (const struct gfx_firmware_header_v1_0 *)
3131                 adev->gfx.ce_fw->data;
3132         me_hdr = (const struct gfx_firmware_header_v1_0 *)
3133                 adev->gfx.me_fw->data;
3134
3135         amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
3136         amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
3137         amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
3138
3139         gfx_v9_0_cp_gfx_enable(adev, false);
3140
3141         /* PFP */
3142         fw_data = (const __le32 *)
3143                 (adev->gfx.pfp_fw->data +
3144                  le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
3145         fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
3146         WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, 0);
3147         for (i = 0; i < fw_size; i++)
3148                 WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
3149         WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
3150
3151         /* CE */
3152         fw_data = (const __le32 *)
3153                 (adev->gfx.ce_fw->data +
3154                  le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
3155         fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
3156         WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, 0);
3157         for (i = 0; i < fw_size; i++)
3158                 WREG32_SOC15(GC, 0, mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
3159         WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
3160
3161         /* ME */
3162         fw_data = (const __le32 *)
3163                 (adev->gfx.me_fw->data +
3164                  le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
3165         fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
3166         WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, 0);
3167         for (i = 0; i < fw_size; i++)
3168                 WREG32_SOC15(GC, 0, mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
3169         WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version);
3170
3171         return 0;
3172 }
3173
3174 static int gfx_v9_0_cp_gfx_start(struct amdgpu_device *adev)
3175 {
3176         struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
3177         const struct cs_section_def *sect = NULL;
3178         const struct cs_extent_def *ext = NULL;
3179         int r, i, tmp;
3180
3181         /* init the CP */
3182         WREG32_SOC15(GC, 0, mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
3183         WREG32_SOC15(GC, 0, mmCP_DEVICE_ID, 1);
3184
3185         gfx_v9_0_cp_gfx_enable(adev, true);
3186
3187         r = amdgpu_ring_alloc(ring, gfx_v9_0_get_csb_size(adev) + 4 + 3);
3188         if (r) {
3189                 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
3190                 return r;
3191         }
3192
3193         amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3194         amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3195
3196         amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
3197         amdgpu_ring_write(ring, 0x80000000);
3198         amdgpu_ring_write(ring, 0x80000000);
3199
3200         for (sect = gfx9_cs_data; sect->section != NULL; ++sect) {
3201                 for (ext = sect->section; ext->extent != NULL; ++ext) {
3202                         if (sect->id == SECT_CONTEXT) {
3203                                 amdgpu_ring_write(ring,
3204                                        PACKET3(PACKET3_SET_CONTEXT_REG,
3205                                                ext->reg_count));
3206                                 amdgpu_ring_write(ring,
3207                                        ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
3208                                 for (i = 0; i < ext->reg_count; i++)
3209                                         amdgpu_ring_write(ring, ext->extent[i]);
3210                         }
3211                 }
3212         }
3213
3214         amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3215         amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
3216
3217         amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3218         amdgpu_ring_write(ring, 0);
3219
3220         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
3221         amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
3222         amdgpu_ring_write(ring, 0x8000);
3223         amdgpu_ring_write(ring, 0x8000);
3224
3225         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG,1));
3226         tmp = (PACKET3_SET_UCONFIG_REG_INDEX_TYPE |
3227                 (SOC15_REG_OFFSET(GC, 0, mmVGT_INDEX_TYPE) - PACKET3_SET_UCONFIG_REG_START));
3228         amdgpu_ring_write(ring, tmp);
3229         amdgpu_ring_write(ring, 0);
3230
3231         amdgpu_ring_commit(ring);
3232
3233         return 0;
3234 }
3235
3236 static int gfx_v9_0_cp_gfx_resume(struct amdgpu_device *adev)
3237 {
3238         struct amdgpu_ring *ring;
3239         u32 tmp;
3240         u32 rb_bufsz;
3241         u64 rb_addr, rptr_addr, wptr_gpu_addr;
3242
3243         /* Set the write pointer delay */
3244         WREG32_SOC15(GC, 0, mmCP_RB_WPTR_DELAY, 0);
3245
3246         /* set the RB to use vmid 0 */
3247         WREG32_SOC15(GC, 0, mmCP_RB_VMID, 0);
3248
3249         /* Set ring buffer size */
3250         ring = &adev->gfx.gfx_ring[0];
3251         rb_bufsz = order_base_2(ring->ring_size / 8);
3252         tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
3253         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
3254 #ifdef __BIG_ENDIAN
3255         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
3256 #endif
3257         WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp);
3258
3259         /* Initialize the ring buffer's write pointers */
3260         ring->wptr = 0;
3261         WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
3262         WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
3263
3264         /* set the wb address wether it's enabled or not */
3265         rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
3266         WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
3267         WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & CP_RB_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK);
3268
3269         wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
3270         WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr));
3271         WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr));
3272
3273         mdelay(1);
3274         WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp);
3275
3276         rb_addr = ring->gpu_addr >> 8;
3277         WREG32_SOC15(GC, 0, mmCP_RB0_BASE, rb_addr);
3278         WREG32_SOC15(GC, 0, mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
3279
3280         tmp = RREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL);
3281         if (ring->use_doorbell) {
3282                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3283                                     DOORBELL_OFFSET, ring->doorbell_index);
3284                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3285                                     DOORBELL_EN, 1);
3286         } else {
3287                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0);
3288         }
3289         WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL, tmp);
3290
3291         tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
3292                         DOORBELL_RANGE_LOWER, ring->doorbell_index);
3293         WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
3294
3295         WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_UPPER,
3296                        CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
3297
3298
3299         /* start the ring */
3300         gfx_v9_0_cp_gfx_start(adev);
3301         ring->sched.ready = true;
3302
3303         return 0;
3304 }
3305
3306 static void gfx_v9_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
3307 {
3308         if (enable) {
3309                 WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL, 0);
3310         } else {
3311                 WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL,
3312                         (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
3313                 adev->gfx.kiq.ring.sched.ready = false;
3314         }
3315         udelay(50);
3316 }
3317
3318 static int gfx_v9_0_cp_compute_load_microcode(struct amdgpu_device *adev)
3319 {
3320         const struct gfx_firmware_header_v1_0 *mec_hdr;
3321         const __le32 *fw_data;
3322         unsigned i;
3323         u32 tmp;
3324
3325         if (!adev->gfx.mec_fw)
3326                 return -EINVAL;
3327
3328         gfx_v9_0_cp_compute_enable(adev, false);
3329
3330         mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
3331         amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
3332
3333         fw_data = (const __le32 *)
3334                 (adev->gfx.mec_fw->data +
3335                  le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
3336         tmp = 0;
3337         tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, VMID, 0);
3338         tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0);
3339         WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_CNTL, tmp);
3340
3341         WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_LO,
3342                 adev->gfx.mec.mec_fw_gpu_addr & 0xFFFFF000);
3343         WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_HI,
3344                 upper_32_bits(adev->gfx.mec.mec_fw_gpu_addr));
3345
3346         /* MEC1 */
3347         WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR,
3348                          mec_hdr->jt_offset);
3349         for (i = 0; i < mec_hdr->jt_size; i++)
3350                 WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_DATA,
3351                         le32_to_cpup(fw_data + mec_hdr->jt_offset + i));
3352
3353         WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR,
3354                         adev->gfx.mec_fw_version);
3355         /* Todo : Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */
3356
3357         return 0;
3358 }
3359
3360 /* KIQ functions */
3361 static void gfx_v9_0_kiq_setting(struct amdgpu_ring *ring)
3362 {
3363         uint32_t tmp;
3364         struct amdgpu_device *adev = ring->adev;
3365
3366         /* tell RLC which is KIQ queue */
3367         tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS);
3368         tmp &= 0xffffff00;
3369         tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
3370         WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
3371         tmp |= 0x80;
3372         WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
3373 }
3374
3375 static void gfx_v9_0_mqd_set_priority(struct amdgpu_ring *ring, struct v9_mqd *mqd)
3376 {
3377         struct amdgpu_device *adev = ring->adev;
3378
3379         if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
3380                 if (amdgpu_gfx_is_high_priority_compute_queue(adev, ring->queue)) {
3381                         mqd->cp_hqd_pipe_priority = AMDGPU_GFX_PIPE_PRIO_HIGH;
3382                         mqd->cp_hqd_queue_priority =
3383                                 AMDGPU_GFX_QUEUE_PRIORITY_MAXIMUM;
3384                 }
3385         }
3386 }
3387
3388 static int gfx_v9_0_mqd_init(struct amdgpu_ring *ring)
3389 {
3390         struct amdgpu_device *adev = ring->adev;
3391         struct v9_mqd *mqd = ring->mqd_ptr;
3392         uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
3393         uint32_t tmp;
3394
3395         mqd->header = 0xC0310800;
3396         mqd->compute_pipelinestat_enable = 0x00000001;
3397         mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
3398         mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
3399         mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
3400         mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
3401         mqd->compute_static_thread_mgmt_se4 = 0xffffffff;
3402         mqd->compute_static_thread_mgmt_se5 = 0xffffffff;
3403         mqd->compute_static_thread_mgmt_se6 = 0xffffffff;
3404         mqd->compute_static_thread_mgmt_se7 = 0xffffffff;
3405         mqd->compute_misc_reserved = 0x00000003;
3406
3407         mqd->dynamic_cu_mask_addr_lo =
3408                 lower_32_bits(ring->mqd_gpu_addr
3409                               + offsetof(struct v9_mqd_allocation, dynamic_cu_mask));
3410         mqd->dynamic_cu_mask_addr_hi =
3411                 upper_32_bits(ring->mqd_gpu_addr
3412                               + offsetof(struct v9_mqd_allocation, dynamic_cu_mask));
3413
3414         eop_base_addr = ring->eop_gpu_addr >> 8;
3415         mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
3416         mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
3417
3418         /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
3419         tmp = RREG32_SOC15(GC, 0, mmCP_HQD_EOP_CONTROL);
3420         tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
3421                         (order_base_2(GFX9_MEC_HPD_SIZE / 4) - 1));
3422
3423         mqd->cp_hqd_eop_control = tmp;
3424
3425         /* enable doorbell? */
3426         tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
3427
3428         if (ring->use_doorbell) {
3429                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3430                                     DOORBELL_OFFSET, ring->doorbell_index);
3431                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3432                                     DOORBELL_EN, 1);
3433                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3434                                     DOORBELL_SOURCE, 0);
3435                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3436                                     DOORBELL_HIT, 0);
3437         } else {
3438                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3439                                          DOORBELL_EN, 0);
3440         }
3441
3442         mqd->cp_hqd_pq_doorbell_control = tmp;
3443
3444         /* disable the queue if it's active */
3445         ring->wptr = 0;
3446         mqd->cp_hqd_dequeue_request = 0;
3447         mqd->cp_hqd_pq_rptr = 0;
3448         mqd->cp_hqd_pq_wptr_lo = 0;
3449         mqd->cp_hqd_pq_wptr_hi = 0;
3450
3451         /* set the pointer to the MQD */
3452         mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
3453         mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
3454
3455         /* set MQD vmid to 0 */
3456         tmp = RREG32_SOC15(GC, 0, mmCP_MQD_CONTROL);
3457         tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
3458         mqd->cp_mqd_control = tmp;
3459
3460         /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3461         hqd_gpu_addr = ring->gpu_addr >> 8;
3462         mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
3463         mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
3464
3465         /* set up the HQD, this is similar to CP_RB0_CNTL */
3466         tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_CONTROL);
3467         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
3468                             (order_base_2(ring->ring_size / 4) - 1));
3469         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
3470                         ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
3471 #ifdef __BIG_ENDIAN
3472         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
3473 #endif
3474         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
3475         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
3476         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
3477         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
3478         mqd->cp_hqd_pq_control = tmp;
3479
3480         /* set the wb address whether it's enabled or not */
3481         wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
3482         mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
3483         mqd->cp_hqd_pq_rptr_report_addr_hi =
3484                 upper_32_bits(wb_gpu_addr) & 0xffff;
3485
3486         /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
3487         wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
3488         mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
3489         mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
3490
3491         tmp = 0;
3492         /* enable the doorbell if requested */
3493         if (ring->use_doorbell) {
3494                 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
3495                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3496                                 DOORBELL_OFFSET, ring->doorbell_index);
3497
3498                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3499                                          DOORBELL_EN, 1);
3500                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3501                                          DOORBELL_SOURCE, 0);
3502                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3503                                          DOORBELL_HIT, 0);
3504         }
3505
3506         mqd->cp_hqd_pq_doorbell_control = tmp;
3507
3508         /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3509         ring->wptr = 0;
3510         mqd->cp_hqd_pq_rptr = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR);
3511
3512         /* set the vmid for the queue */
3513         mqd->cp_hqd_vmid = 0;
3514
3515         tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PERSISTENT_STATE);
3516         tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
3517         mqd->cp_hqd_persistent_state = tmp;
3518
3519         /* set MIN_IB_AVAIL_SIZE */
3520         tmp = RREG32_SOC15(GC, 0, mmCP_HQD_IB_CONTROL);
3521         tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
3522         mqd->cp_hqd_ib_control = tmp;
3523
3524         /* set static priority for a queue/ring */
3525         gfx_v9_0_mqd_set_priority(ring, mqd);
3526         mqd->cp_hqd_quantum = RREG32(mmCP_HQD_QUANTUM);
3527
3528         /* map_queues packet doesn't need activate the queue,
3529          * so only kiq need set this field.
3530          */
3531         if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)
3532                 mqd->cp_hqd_active = 1;
3533
3534         return 0;
3535 }
3536
3537 static int gfx_v9_0_kiq_init_register(struct amdgpu_ring *ring)
3538 {
3539         struct amdgpu_device *adev = ring->adev;
3540         struct v9_mqd *mqd = ring->mqd_ptr;
3541         int j;
3542
3543         /* disable wptr polling */
3544         WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
3545
3546         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR,
3547                mqd->cp_hqd_eop_base_addr_lo);
3548         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI,
3549                mqd->cp_hqd_eop_base_addr_hi);
3550
3551         /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
3552         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_CONTROL,
3553                mqd->cp_hqd_eop_control);
3554
3555         /* enable doorbell? */
3556         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
3557                mqd->cp_hqd_pq_doorbell_control);
3558
3559         /* disable the queue if it's active */
3560         if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) {
3561                 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
3562                 for (j = 0; j < adev->usec_timeout; j++) {
3563                         if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
3564                                 break;
3565                         udelay(1);
3566                 }
3567                 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST,
3568                        mqd->cp_hqd_dequeue_request);
3569                 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR,
3570                        mqd->cp_hqd_pq_rptr);
3571                 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO,
3572                        mqd->cp_hqd_pq_wptr_lo);
3573                 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI,
3574                        mqd->cp_hqd_pq_wptr_hi);
3575         }
3576
3577         /* set the pointer to the MQD */
3578         WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR,
3579                mqd->cp_mqd_base_addr_lo);
3580         WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR_HI,
3581                mqd->cp_mqd_base_addr_hi);
3582
3583         /* set MQD vmid to 0 */
3584         WREG32_SOC15_RLC(GC, 0, mmCP_MQD_CONTROL,
3585                mqd->cp_mqd_control);
3586
3587         /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3588         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE,
3589                mqd->cp_hqd_pq_base_lo);
3590         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE_HI,
3591                mqd->cp_hqd_pq_base_hi);
3592
3593         /* set up the HQD, this is similar to CP_RB0_CNTL */
3594         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_CONTROL,
3595                mqd->cp_hqd_pq_control);
3596
3597         /* set the wb address whether it's enabled or not */
3598         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR,
3599                                 mqd->cp_hqd_pq_rptr_report_addr_lo);
3600         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
3601                                 mqd->cp_hqd_pq_rptr_report_addr_hi);
3602
3603         /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
3604         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR,
3605                mqd->cp_hqd_pq_wptr_poll_addr_lo);
3606         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
3607                mqd->cp_hqd_pq_wptr_poll_addr_hi);
3608
3609         /* enable the doorbell if requested */
3610         if (ring->use_doorbell) {
3611                 WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_LOWER,
3612                                         (adev->doorbell_index.kiq * 2) << 2);
3613                 WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER,
3614                                         (adev->doorbell_index.userqueue_end * 2) << 2);
3615         }
3616
3617         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
3618                mqd->cp_hqd_pq_doorbell_control);
3619
3620         /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3621         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO,
3622                mqd->cp_hqd_pq_wptr_lo);
3623         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI,
3624                mqd->cp_hqd_pq_wptr_hi);
3625
3626         /* set the vmid for the queue */
3627         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_VMID, mqd->cp_hqd_vmid);
3628
3629         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE,
3630                mqd->cp_hqd_persistent_state);
3631
3632         /* activate the queue */
3633         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE,
3634                mqd->cp_hqd_active);
3635
3636         if (ring->use_doorbell)
3637                 WREG32_FIELD15(GC, 0, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
3638
3639         return 0;
3640 }
3641
3642 static int gfx_v9_0_kiq_fini_register(struct amdgpu_ring *ring)
3643 {
3644         struct amdgpu_device *adev = ring->adev;
3645         int j;
3646
3647         /* disable the queue if it's active */
3648         if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) {
3649
3650                 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
3651
3652                 for (j = 0; j < adev->usec_timeout; j++) {
3653                         if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
3654                                 break;
3655                         udelay(1);
3656                 }
3657
3658                 if (j == AMDGPU_MAX_USEC_TIMEOUT) {
3659                         DRM_DEBUG("KIQ dequeue request failed.\n");
3660
3661                         /* Manual disable if dequeue request times out */
3662                         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE, 0);
3663                 }
3664
3665                 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST,
3666                       0);
3667         }
3668
3669         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IQ_TIMER, 0);
3670         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IB_CONTROL, 0);
3671         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE, 0);
3672         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0x40000000);
3673         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0);
3674         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR, 0);
3675         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI, 0);
3676         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO, 0);
3677
3678         return 0;
3679 }
3680
3681 static int gfx_v9_0_kiq_init_queue(struct amdgpu_ring *ring)
3682 {
3683         struct amdgpu_device *adev = ring->adev;
3684         struct v9_mqd *mqd = ring->mqd_ptr;
3685         int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS;
3686
3687         gfx_v9_0_kiq_setting(ring);
3688
3689         if (adev->in_gpu_reset) { /* for GPU_RESET case */
3690                 /* reset MQD to a clean status */
3691                 if (adev->gfx.mec.mqd_backup[mqd_idx])
3692                         memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation));
3693
3694                 /* reset ring buffer */
3695                 ring->wptr = 0;
3696                 amdgpu_ring_clear_ring(ring);
3697
3698                 mutex_lock(&adev->srbm_mutex);
3699                 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3700                 gfx_v9_0_kiq_init_register(ring);
3701                 soc15_grbm_select(adev, 0, 0, 0, 0);
3702                 mutex_unlock(&adev->srbm_mutex);
3703         } else {
3704                 memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation));
3705                 ((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
3706                 ((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
3707                 mutex_lock(&adev->srbm_mutex);
3708                 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3709                 gfx_v9_0_mqd_init(ring);
3710                 gfx_v9_0_kiq_init_register(ring);
3711                 soc15_grbm_select(adev, 0, 0, 0, 0);
3712                 mutex_unlock(&adev->srbm_mutex);
3713
3714                 if (adev->gfx.mec.mqd_backup[mqd_idx])
3715                         memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation));
3716         }
3717
3718         return 0;
3719 }
3720
3721 static int gfx_v9_0_kcq_init_queue(struct amdgpu_ring *ring)
3722 {
3723         struct amdgpu_device *adev = ring->adev;
3724         struct v9_mqd *mqd = ring->mqd_ptr;
3725         int mqd_idx = ring - &adev->gfx.compute_ring[0];
3726
3727         if (!adev->in_gpu_reset && !adev->in_suspend) {
3728                 memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation));
3729                 ((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
3730                 ((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
3731                 mutex_lock(&adev->srbm_mutex);
3732                 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3733                 gfx_v9_0_mqd_init(ring);
3734                 soc15_grbm_select(adev, 0, 0, 0, 0);
3735                 mutex_unlock(&adev->srbm_mutex);
3736
3737                 if (adev->gfx.mec.mqd_backup[mqd_idx])
3738                         memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation));
3739         } else if (adev->in_gpu_reset) { /* for GPU_RESET case */
3740                 /* reset MQD to a clean status */
3741                 if (adev->gfx.mec.mqd_backup[mqd_idx])
3742                         memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation));
3743
3744                 /* reset ring buffer */
3745                 ring->wptr = 0;
3746                 atomic64_set((atomic64_t *)&adev->wb.wb[ring->wptr_offs], 0);
3747                 amdgpu_ring_clear_ring(ring);
3748         } else {
3749                 amdgpu_ring_clear_ring(ring);
3750         }
3751
3752         return 0;
3753 }
3754
3755 static int gfx_v9_0_kiq_resume(struct amdgpu_device *adev)
3756 {
3757         struct amdgpu_ring *ring;
3758         int r;
3759
3760         ring = &adev->gfx.kiq.ring;
3761
3762         r = amdgpu_bo_reserve(ring->mqd_obj, false);
3763         if (unlikely(r != 0))
3764                 return r;
3765
3766         r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
3767         if (unlikely(r != 0))
3768                 return r;
3769
3770         gfx_v9_0_kiq_init_queue(ring);
3771         amdgpu_bo_kunmap(ring->mqd_obj);
3772         ring->mqd_ptr = NULL;
3773         amdgpu_bo_unreserve(ring->mqd_obj);
3774         ring->sched.ready = true;
3775         return 0;
3776 }
3777
3778 static int gfx_v9_0_kcq_resume(struct amdgpu_device *adev)
3779 {
3780         struct amdgpu_ring *ring = NULL;
3781         int r = 0, i;
3782
3783         gfx_v9_0_cp_compute_enable(adev, true);
3784
3785         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3786                 ring = &adev->gfx.compute_ring[i];
3787
3788                 r = amdgpu_bo_reserve(ring->mqd_obj, false);
3789                 if (unlikely(r != 0))
3790                         goto done;
3791                 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
3792                 if (!r) {
3793                         r = gfx_v9_0_kcq_init_queue(ring);
3794                         amdgpu_bo_kunmap(ring->mqd_obj);
3795                         ring->mqd_ptr = NULL;
3796                 }
3797                 amdgpu_bo_unreserve(ring->mqd_obj);
3798                 if (r)
3799                         goto done;
3800         }
3801
3802         r = amdgpu_gfx_enable_kcq(adev);
3803 done:
3804         return r;
3805 }
3806
3807 static int gfx_v9_0_cp_resume(struct amdgpu_device *adev)
3808 {
3809         int r, i;
3810         struct amdgpu_ring *ring;
3811
3812         if (!(adev->flags & AMD_IS_APU))
3813                 gfx_v9_0_enable_gui_idle_interrupt(adev, false);
3814
3815         if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
3816                 if (adev->asic_type != CHIP_ARCTURUS) {
3817                         /* legacy firmware loading */
3818                         r = gfx_v9_0_cp_gfx_load_microcode(adev);
3819                         if (r)
3820                                 return r;
3821                 }
3822
3823                 r = gfx_v9_0_cp_compute_load_microcode(adev);
3824                 if (r)
3825                         return r;
3826         }
3827
3828         r = gfx_v9_0_kiq_resume(adev);
3829         if (r)
3830                 return r;
3831
3832         if (adev->asic_type != CHIP_ARCTURUS) {
3833                 r = gfx_v9_0_cp_gfx_resume(adev);
3834                 if (r)
3835                         return r;
3836         }
3837
3838         r = gfx_v9_0_kcq_resume(adev);
3839         if (r)
3840                 return r;
3841
3842         if (adev->asic_type != CHIP_ARCTURUS) {
3843                 ring = &adev->gfx.gfx_ring[0];
3844                 r = amdgpu_ring_test_helper(ring);
3845                 if (r)
3846                         return r;
3847         }
3848
3849         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3850                 ring = &adev->gfx.compute_ring[i];
3851                 amdgpu_ring_test_helper(ring);
3852         }
3853
3854         gfx_v9_0_enable_gui_idle_interrupt(adev, true);
3855
3856         return 0;
3857 }
3858
3859 static void gfx_v9_0_init_tcp_config(struct amdgpu_device *adev)
3860 {
3861         u32 tmp;
3862
3863         if (adev->asic_type != CHIP_ARCTURUS)
3864                 return;
3865
3866         tmp = RREG32_SOC15(GC, 0, mmTCP_ADDR_CONFIG);
3867         tmp = REG_SET_FIELD(tmp, TCP_ADDR_CONFIG, ENABLE64KHASH,
3868                                 adev->df.hash_status.hash_64k);
3869         tmp = REG_SET_FIELD(tmp, TCP_ADDR_CONFIG, ENABLE2MHASH,
3870                                 adev->df.hash_status.hash_2m);
3871         tmp = REG_SET_FIELD(tmp, TCP_ADDR_CONFIG, ENABLE1GHASH,
3872                                 adev->df.hash_status.hash_1g);
3873         WREG32_SOC15(GC, 0, mmTCP_ADDR_CONFIG, tmp);
3874 }
3875
3876 static void gfx_v9_0_cp_enable(struct amdgpu_device *adev, bool enable)
3877 {
3878         if (adev->asic_type != CHIP_ARCTURUS)
3879                 gfx_v9_0_cp_gfx_enable(adev, enable);
3880         gfx_v9_0_cp_compute_enable(adev, enable);
3881 }
3882
3883 static int gfx_v9_0_hw_init(void *handle)
3884 {
3885         int r;
3886         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3887
3888         if (!amdgpu_sriov_vf(adev))
3889                 gfx_v9_0_init_golden_registers(adev);
3890
3891         gfx_v9_0_constants_init(adev);
3892
3893         gfx_v9_0_init_tcp_config(adev);
3894
3895         r = adev->gfx.rlc.funcs->resume(adev);
3896         if (r)
3897                 return r;
3898
3899         r = gfx_v9_0_cp_resume(adev);
3900         if (r)
3901                 return r;
3902
3903         return r;
3904 }
3905
3906 static int gfx_v9_0_hw_fini(void *handle)
3907 {
3908         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3909
3910         amdgpu_irq_put(adev, &adev->gfx.cp_ecc_error_irq, 0);
3911         amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
3912         amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
3913
3914         /* DF freeze and kcq disable will fail */
3915         if (!amdgpu_ras_intr_triggered())
3916                 /* disable KCQ to avoid CPC touch memory not valid anymore */
3917                 amdgpu_gfx_disable_kcq(adev);
3918
3919         if (amdgpu_sriov_vf(adev)) {
3920                 gfx_v9_0_cp_gfx_enable(adev, false);
3921                 /* must disable polling for SRIOV when hw finished, otherwise
3922                  * CPC engine may still keep fetching WB address which is already
3923                  * invalid after sw finished and trigger DMAR reading error in
3924                  * hypervisor side.
3925                  */
3926                 WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
3927                 return 0;
3928         }
3929
3930         /* Use deinitialize sequence from CAIL when unbinding device from driver,
3931          * otherwise KIQ is hanging when binding back
3932          */
3933         if (!adev->in_gpu_reset && !adev->in_suspend) {
3934                 mutex_lock(&adev->srbm_mutex);
3935                 soc15_grbm_select(adev, adev->gfx.kiq.ring.me,
3936                                 adev->gfx.kiq.ring.pipe,
3937                                 adev->gfx.kiq.ring.queue, 0);
3938                 gfx_v9_0_kiq_fini_register(&adev->gfx.kiq.ring);
3939                 soc15_grbm_select(adev, 0, 0, 0, 0);
3940                 mutex_unlock(&adev->srbm_mutex);
3941         }
3942
3943         gfx_v9_0_cp_enable(adev, false);
3944         adev->gfx.rlc.funcs->stop(adev);
3945
3946         return 0;
3947 }
3948
3949 static int gfx_v9_0_suspend(void *handle)
3950 {
3951         return gfx_v9_0_hw_fini(handle);
3952 }
3953
3954 static int gfx_v9_0_resume(void *handle)
3955 {
3956         return gfx_v9_0_hw_init(handle);
3957 }
3958
3959 static bool gfx_v9_0_is_idle(void *handle)
3960 {
3961         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3962
3963         if (REG_GET_FIELD(RREG32_SOC15(GC, 0, mmGRBM_STATUS),
3964                                 GRBM_STATUS, GUI_ACTIVE))
3965                 return false;
3966         else
3967                 return true;
3968 }
3969
3970 static int gfx_v9_0_wait_for_idle(void *handle)
3971 {
3972         unsigned i;
3973         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3974
3975         for (i = 0; i < adev->usec_timeout; i++) {
3976                 if (gfx_v9_0_is_idle(handle))
3977                         return 0;
3978                 udelay(1);
3979         }
3980         return -ETIMEDOUT;
3981 }
3982
3983 static int gfx_v9_0_soft_reset(void *handle)
3984 {
3985         u32 grbm_soft_reset = 0;
3986         u32 tmp;
3987         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3988
3989         /* GRBM_STATUS */
3990         tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS);
3991         if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
3992                    GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
3993                    GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
3994                    GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
3995                    GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
3996                    GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK)) {
3997                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
3998                                                 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
3999                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4000                                                 GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
4001         }
4002
4003         if (tmp & (GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
4004                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4005                                                 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
4006         }
4007
4008         /* GRBM_STATUS2 */
4009         tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS2);
4010         if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
4011                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4012                                                 GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
4013
4014
4015         if (grbm_soft_reset) {
4016                 /* stop the rlc */
4017                 adev->gfx.rlc.funcs->stop(adev);
4018
4019                 if (adev->asic_type != CHIP_ARCTURUS)
4020                         /* Disable GFX parsing/prefetching */
4021                         gfx_v9_0_cp_gfx_enable(adev, false);
4022
4023                 /* Disable MEC parsing/prefetching */
4024                 gfx_v9_0_cp_compute_enable(adev, false);
4025
4026                 if (grbm_soft_reset) {
4027                         tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
4028                         tmp |= grbm_soft_reset;
4029                         dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
4030                         WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
4031                         tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
4032
4033                         udelay(50);
4034
4035                         tmp &= ~grbm_soft_reset;
4036                         WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
4037                         tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
4038                 }
4039
4040                 /* Wait a little for things to settle down */
4041                 udelay(50);
4042         }
4043         return 0;
4044 }
4045
4046 static uint64_t gfx_v9_0_kiq_read_clock(struct amdgpu_device *adev)
4047 {
4048         signed long r, cnt = 0;
4049         unsigned long flags;
4050         uint32_t seq, reg_val_offs = 0;
4051         uint64_t value = 0;
4052         struct amdgpu_kiq *kiq = &adev->gfx.kiq;
4053         struct amdgpu_ring *ring = &kiq->ring;
4054
4055         BUG_ON(!ring->funcs->emit_rreg);
4056
4057         spin_lock_irqsave(&kiq->ring_lock, flags);
4058         if (amdgpu_device_wb_get(adev, &reg_val_offs)) {
4059                 pr_err("critical bug! too many kiq readers\n");
4060                 goto failed_unlock;
4061         }
4062         amdgpu_ring_alloc(ring, 32);
4063         amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
4064         amdgpu_ring_write(ring, 9 |     /* src: register*/
4065                                 (5 << 8) |      /* dst: memory */
4066                                 (1 << 16) |     /* count sel */
4067                                 (1 << 20));     /* write confirm */
4068         amdgpu_ring_write(ring, 0);
4069         amdgpu_ring_write(ring, 0);
4070         amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
4071                                 reg_val_offs * 4));
4072         amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
4073                                 reg_val_offs * 4));
4074         r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT);
4075         if (r)
4076                 goto failed_undo;
4077
4078         amdgpu_ring_commit(ring);
4079         spin_unlock_irqrestore(&kiq->ring_lock, flags);
4080
4081         r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
4082
4083         /* don't wait anymore for gpu reset case because this way may
4084          * block gpu_recover() routine forever, e.g. this virt_kiq_rreg
4085          * is triggered in TTM and ttm_bo_lock_delayed_workqueue() will
4086          * never return if we keep waiting in virt_kiq_rreg, which cause
4087          * gpu_recover() hang there.
4088          *
4089          * also don't wait anymore for IRQ context
4090          * */
4091         if (r < 1 && (adev->in_gpu_reset || in_interrupt()))
4092                 goto failed_kiq_read;
4093
4094         might_sleep();
4095         while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) {
4096                 msleep(MAX_KIQ_REG_BAILOUT_INTERVAL);
4097                 r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
4098         }
4099
4100         if (cnt > MAX_KIQ_REG_TRY)
4101                 goto failed_kiq_read;
4102
4103         mb();
4104         value = (uint64_t)adev->wb.wb[reg_val_offs] |
4105                 (uint64_t)adev->wb.wb[reg_val_offs + 1 ] << 32ULL;
4106         amdgpu_device_wb_free(adev, reg_val_offs);
4107         return value;
4108
4109 failed_undo:
4110         amdgpu_ring_undo(ring);
4111 failed_unlock:
4112         spin_unlock_irqrestore(&kiq->ring_lock, flags);
4113 failed_kiq_read:
4114         if (reg_val_offs)
4115                 amdgpu_device_wb_free(adev, reg_val_offs);
4116         pr_err("failed to read gpu clock\n");
4117         return ~0;
4118 }
4119
4120 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev)
4121 {
4122         uint64_t clock;
4123
4124         amdgpu_gfx_off_ctrl(adev, false);
4125         mutex_lock(&adev->gfx.gpu_clock_mutex);
4126         if (adev->asic_type == CHIP_VEGA10 && amdgpu_sriov_runtime(adev)) {
4127                 clock = gfx_v9_0_kiq_read_clock(adev);
4128         } else {
4129                 WREG32_SOC15(GC, 0, mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
4130                 clock = (uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_LSB) |
4131                         ((uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
4132         }
4133         mutex_unlock(&adev->gfx.gpu_clock_mutex);
4134         amdgpu_gfx_off_ctrl(adev, true);
4135         return clock;
4136 }
4137
4138 static void gfx_v9_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
4139                                           uint32_t vmid,
4140                                           uint32_t gds_base, uint32_t gds_size,
4141                                           uint32_t gws_base, uint32_t gws_size,
4142                                           uint32_t oa_base, uint32_t oa_size)
4143 {
4144         struct amdgpu_device *adev = ring->adev;
4145
4146         /* GDS Base */
4147         gfx_v9_0_write_data_to_reg(ring, 0, false,
4148                                    SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_BASE) + 2 * vmid,
4149                                    gds_base);
4150
4151         /* GDS Size */
4152         gfx_v9_0_write_data_to_reg(ring, 0, false,
4153                                    SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE) + 2 * vmid,
4154                                    gds_size);
4155
4156         /* GWS */
4157         gfx_v9_0_write_data_to_reg(ring, 0, false,
4158                                    SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID0) + vmid,
4159                                    gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
4160
4161         /* OA */
4162         gfx_v9_0_write_data_to_reg(ring, 0, false,
4163                                    SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID0) + vmid,
4164                                    (1 << (oa_size + oa_base)) - (1 << oa_base));
4165 }
4166
4167 static const u32 vgpr_init_compute_shader[] =
4168 {
4169         0xb07c0000, 0xbe8000ff,
4170         0x000000f8, 0xbf110800,
4171         0x7e000280, 0x7e020280,
4172         0x7e040280, 0x7e060280,
4173         0x7e080280, 0x7e0a0280,
4174         0x7e0c0280, 0x7e0e0280,
4175         0x80808800, 0xbe803200,
4176         0xbf84fff5, 0xbf9c0000,
4177         0xd28c0001, 0x0001007f,
4178         0xd28d0001, 0x0002027e,
4179         0x10020288, 0xb8810904,
4180         0xb7814000, 0xd1196a01,
4181         0x00000301, 0xbe800087,
4182         0xbefc00c1, 0xd89c4000,
4183         0x00020201, 0xd89cc080,
4184         0x00040401, 0x320202ff,
4185         0x00000800, 0x80808100,
4186         0xbf84fff8, 0x7e020280,
4187         0xbf810000, 0x00000000,
4188 };
4189
4190 static const u32 sgpr_init_compute_shader[] =
4191 {
4192         0xb07c0000, 0xbe8000ff,
4193         0x0000005f, 0xbee50080,
4194         0xbe812c65, 0xbe822c65,
4195         0xbe832c65, 0xbe842c65,
4196         0xbe852c65, 0xb77c0005,
4197         0x80808500, 0xbf84fff8,
4198         0xbe800080, 0xbf810000,
4199 };
4200
4201 static const u32 vgpr_init_compute_shader_arcturus[] = {
4202         0xd3d94000, 0x18000080, 0xd3d94001, 0x18000080, 0xd3d94002, 0x18000080,
4203         0xd3d94003, 0x18000080, 0xd3d94004, 0x18000080, 0xd3d94005, 0x18000080,
4204         0xd3d94006, 0x18000080, 0xd3d94007, 0x18000080, 0xd3d94008, 0x18000080,
4205         0xd3d94009, 0x18000080, 0xd3d9400a, 0x18000080, 0xd3d9400b, 0x18000080,
4206         0xd3d9400c, 0x18000080, 0xd3d9400d, 0x18000080, 0xd3d9400e, 0x18000080,
4207         0xd3d9400f, 0x18000080, 0xd3d94010, 0x18000080, 0xd3d94011, 0x18000080,
4208         0xd3d94012, 0x18000080, 0xd3d94013, 0x18000080, 0xd3d94014, 0x18000080,
4209         0xd3d94015, 0x18000080, 0xd3d94016, 0x18000080, 0xd3d94017, 0x18000080,
4210         0xd3d94018, 0x18000080, 0xd3d94019, 0x18000080, 0xd3d9401a, 0x18000080,
4211         0xd3d9401b, 0x18000080, 0xd3d9401c, 0x18000080, 0xd3d9401d, 0x18000080,
4212         0xd3d9401e, 0x18000080, 0xd3d9401f, 0x18000080, 0xd3d94020, 0x18000080,
4213         0xd3d94021, 0x18000080, 0xd3d94022, 0x18000080, 0xd3d94023, 0x18000080,
4214         0xd3d94024, 0x18000080, 0xd3d94025, 0x18000080, 0xd3d94026, 0x18000080,
4215         0xd3d94027, 0x18000080, 0xd3d94028, 0x18000080, 0xd3d94029, 0x18000080,
4216         0xd3d9402a, 0x18000080, 0xd3d9402b, 0x18000080, 0xd3d9402c, 0x18000080,
4217         0xd3d9402d, 0x18000080, 0xd3d9402e, 0x18000080, 0xd3d9402f, 0x18000080,
4218         0xd3d94030, 0x18000080, 0xd3d94031, 0x18000080, 0xd3d94032, 0x18000080,
4219         0xd3d94033, 0x18000080, 0xd3d94034, 0x18000080, 0xd3d94035, 0x18000080,
4220         0xd3d94036, 0x18000080, 0xd3d94037, 0x18000080, 0xd3d94038, 0x18000080,
4221         0xd3d94039, 0x18000080, 0xd3d9403a, 0x18000080, 0xd3d9403b, 0x18000080,
4222         0xd3d9403c, 0x18000080, 0xd3d9403d, 0x18000080, 0xd3d9403e, 0x18000080,
4223         0xd3d9403f, 0x18000080, 0xd3d94040, 0x18000080, 0xd3d94041, 0x18000080,
4224         0xd3d94042, 0x18000080, 0xd3d94043, 0x18000080, 0xd3d94044, 0x18000080,
4225         0xd3d94045, 0x18000080, 0xd3d94046, 0x18000080, 0xd3d94047, 0x18000080,
4226         0xd3d94048, 0x18000080, 0xd3d94049, 0x18000080, 0xd3d9404a, 0x18000080,
4227         0xd3d9404b, 0x18000080, 0xd3d9404c, 0x18000080, 0xd3d9404d, 0x18000080,
4228         0xd3d9404e, 0x18000080, 0xd3d9404f, 0x18000080, 0xd3d94050, 0x18000080,
4229         0xd3d94051, 0x18000080, 0xd3d94052, 0x18000080, 0xd3d94053, 0x18000080,
4230         0xd3d94054, 0x18000080, 0xd3d94055, 0x18000080, 0xd3d94056, 0x18000080,
4231         0xd3d94057, 0x18000080, 0xd3d94058, 0x18000080, 0xd3d94059, 0x18000080,
4232         0xd3d9405a, 0x18000080, 0xd3d9405b, 0x18000080, 0xd3d9405c, 0x18000080,
4233         0xd3d9405d, 0x18000080, 0xd3d9405e, 0x18000080, 0xd3d9405f, 0x18000080,
4234         0xd3d94060, 0x18000080, 0xd3d94061, 0x18000080, 0xd3d94062, 0x18000080,
4235         0xd3d94063, 0x18000080, 0xd3d94064, 0x18000080, 0xd3d94065, 0x18000080,
4236         0xd3d94066, 0x18000080, 0xd3d94067, 0x18000080, 0xd3d94068, 0x18000080,
4237         0xd3d94069, 0x18000080, 0xd3d9406a, 0x18000080, 0xd3d9406b, 0x18000080,
4238         0xd3d9406c, 0x18000080, 0xd3d9406d, 0x18000080, 0xd3d9406e, 0x18000080,
4239         0xd3d9406f, 0x18000080, 0xd3d94070, 0x18000080, 0xd3d94071, 0x18000080,
4240         0xd3d94072, 0x18000080, 0xd3d94073, 0x18000080, 0xd3d94074, 0x18000080,
4241         0xd3d94075, 0x18000080, 0xd3d94076, 0x18000080, 0xd3d94077, 0x18000080,
4242         0xd3d94078, 0x18000080, 0xd3d94079, 0x18000080, 0xd3d9407a, 0x18000080,
4243         0xd3d9407b, 0x18000080, 0xd3d9407c, 0x18000080, 0xd3d9407d, 0x18000080,
4244         0xd3d9407e, 0x18000080, 0xd3d9407f, 0x18000080, 0xd3d94080, 0x18000080,
4245         0xd3d94081, 0x18000080, 0xd3d94082, 0x18000080, 0xd3d94083, 0x18000080,
4246         0xd3d94084, 0x18000080, 0xd3d94085, 0x18000080, 0xd3d94086, 0x18000080,
4247         0xd3d94087, 0x18000080, 0xd3d94088, 0x18000080, 0xd3d94089, 0x18000080,
4248         0xd3d9408a, 0x18000080, 0xd3d9408b, 0x18000080, 0xd3d9408c, 0x18000080,
4249         0xd3d9408d, 0x18000080, 0xd3d9408e, 0x18000080, 0xd3d9408f, 0x18000080,
4250         0xd3d94090, 0x18000080, 0xd3d94091, 0x18000080, 0xd3d94092, 0x18000080,
4251         0xd3d94093, 0x18000080, 0xd3d94094, 0x18000080, 0xd3d94095, 0x18000080,
4252         0xd3d94096, 0x18000080, 0xd3d94097, 0x18000080, 0xd3d94098, 0x18000080,
4253         0xd3d94099, 0x18000080, 0xd3d9409a, 0x18000080, 0xd3d9409b, 0x18000080,
4254         0xd3d9409c, 0x18000080, 0xd3d9409d, 0x18000080, 0xd3d9409e, 0x18000080,
4255         0xd3d9409f, 0x18000080, 0xd3d940a0, 0x18000080, 0xd3d940a1, 0x18000080,
4256         0xd3d940a2, 0x18000080, 0xd3d940a3, 0x18000080, 0xd3d940a4, 0x18000080,
4257         0xd3d940a5, 0x18000080, 0xd3d940a6, 0x18000080, 0xd3d940a7, 0x18000080,
4258         0xd3d940a8, 0x18000080, 0xd3d940a9, 0x18000080, 0xd3d940aa, 0x18000080,
4259         0xd3d940ab, 0x18000080, 0xd3d940ac, 0x18000080, 0xd3d940ad, 0x18000080,
4260         0xd3d940ae, 0x18000080, 0xd3d940af, 0x18000080, 0xd3d940b0, 0x18000080,
4261         0xd3d940b1, 0x18000080, 0xd3d940b2, 0x18000080, 0xd3d940b3, 0x18000080,
4262         0xd3d940b4, 0x18000080, 0xd3d940b5, 0x18000080, 0xd3d940b6, 0x18000080,
4263         0xd3d940b7, 0x18000080, 0xd3d940b8, 0x18000080, 0xd3d940b9, 0x18000080,
4264         0xd3d940ba, 0x18000080, 0xd3d940bb, 0x18000080, 0xd3d940bc, 0x18000080,
4265         0xd3d940bd, 0x18000080, 0xd3d940be, 0x18000080, 0xd3d940bf, 0x18000080,
4266         0xd3d940c0, 0x18000080, 0xd3d940c1, 0x18000080, 0xd3d940c2, 0x18000080,
4267         0xd3d940c3, 0x18000080, 0xd3d940c4, 0x18000080, 0xd3d940c5, 0x18000080,
4268         0xd3d940c6, 0x18000080, 0xd3d940c7, 0x18000080, 0xd3d940c8, 0x18000080,
4269         0xd3d940c9, 0x18000080, 0xd3d940ca, 0x18000080, 0xd3d940cb, 0x18000080,
4270         0xd3d940cc, 0x18000080, 0xd3d940cd, 0x18000080, 0xd3d940ce, 0x18000080,
4271         0xd3d940cf, 0x18000080, 0xd3d940d0, 0x18000080, 0xd3d940d1, 0x18000080,
4272         0xd3d940d2, 0x18000080, 0xd3d940d3, 0x18000080, 0xd3d940d4, 0x18000080,
4273         0xd3d940d5, 0x18000080, 0xd3d940d6, 0x18000080, 0xd3d940d7, 0x18000080,
4274         0xd3d940d8, 0x18000080, 0xd3d940d9, 0x18000080, 0xd3d940da, 0x18000080,
4275         0xd3d940db, 0x18000080, 0xd3d940dc, 0x18000080, 0xd3d940dd, 0x18000080,
4276         0xd3d940de, 0x18000080, 0xd3d940df, 0x18000080, 0xd3d940e0, 0x18000080,
4277         0xd3d940e1, 0x18000080, 0xd3d940e2, 0x18000080, 0xd3d940e3, 0x18000080,
4278         0xd3d940e4, 0x18000080, 0xd3d940e5, 0x18000080, 0xd3d940e6, 0x18000080,
4279         0xd3d940e7, 0x18000080, 0xd3d940e8, 0x18000080, 0xd3d940e9, 0x18000080,
4280         0xd3d940ea, 0x18000080, 0xd3d940eb, 0x18000080, 0xd3d940ec, 0x18000080,
4281         0xd3d940ed, 0x18000080, 0xd3d940ee, 0x18000080, 0xd3d940ef, 0x18000080,
4282         0xd3d940f0, 0x18000080, 0xd3d940f1, 0x18000080, 0xd3d940f2, 0x18000080,
4283         0xd3d940f3, 0x18000080, 0xd3d940f4, 0x18000080, 0xd3d940f5, 0x18000080,
4284         0xd3d940f6, 0x18000080, 0xd3d940f7, 0x18000080, 0xd3d940f8, 0x18000080,
4285         0xd3d940f9, 0x18000080, 0xd3d940fa, 0x18000080, 0xd3d940fb, 0x18000080,
4286         0xd3d940fc, 0x18000080, 0xd3d940fd, 0x18000080, 0xd3d940fe, 0x18000080,
4287         0xd3d940ff, 0x18000080, 0xb07c0000, 0xbe8a00ff, 0x000000f8, 0xbf11080a,
4288         0x7e000280, 0x7e020280, 0x7e040280, 0x7e060280, 0x7e080280, 0x7e0a0280,
4289         0x7e0c0280, 0x7e0e0280, 0x808a880a, 0xbe80320a, 0xbf84fff5, 0xbf9c0000,
4290         0xd28c0001, 0x0001007f, 0xd28d0001, 0x0002027e, 0x10020288, 0xb88b0904,
4291         0xb78b4000, 0xd1196a01, 0x00001701, 0xbe8a0087, 0xbefc00c1, 0xd89c4000,
4292         0x00020201, 0xd89cc080, 0x00040401, 0x320202ff, 0x00000800, 0x808a810a,
4293         0xbf84fff8, 0xbf810000,
4294 };
4295
4296 /* When below register arrays changed, please update gpr_reg_size,
4297   and sec_ded_counter_reg_size in function gfx_v9_0_do_edc_gpr_workarounds,
4298   to cover all gfx9 ASICs */
4299 static const struct soc15_reg_entry vgpr_init_regs[] = {
4300    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
4301    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 },
4302    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 4 },
4303    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
4304    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x3f },
4305    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x400000 },  /* 64KB LDS */
4306    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff },
4307    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff },
4308    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff },
4309    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff },
4310    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0xffffffff },
4311    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0xffffffff },
4312    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0xffffffff },
4313    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0xffffffff },
4314 };
4315
4316 static const struct soc15_reg_entry vgpr_init_regs_arcturus[] = {
4317    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
4318    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 },
4319    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 4 },
4320    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
4321    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0xbf },
4322    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x400000 },  /* 64KB LDS */
4323    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff },
4324    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff },
4325    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff },
4326    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff },
4327    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0xffffffff },
4328    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0xffffffff },
4329    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0xffffffff },
4330    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0xffffffff },
4331 };
4332
4333 static const struct soc15_reg_entry sgpr1_init_regs[] = {
4334    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
4335    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 },
4336    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 8 },
4337    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
4338    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x240 }, /* (80 GPRS) */
4339    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x0 },
4340    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0x000000ff },
4341    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0x000000ff },
4342    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0x000000ff },
4343    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0x000000ff },
4344    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0x000000ff },
4345    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0x000000ff },
4346    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0x000000ff },
4347    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0x000000ff },
4348 };
4349
4350 static const struct soc15_reg_entry sgpr2_init_regs[] = {
4351    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
4352    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 },
4353    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 8 },
4354    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
4355    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x240 }, /* (80 GPRS) */
4356    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x0 },
4357    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0x0000ff00 },
4358    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0x0000ff00 },
4359    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0x0000ff00 },
4360    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0x0000ff00 },
4361    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0x0000ff00 },
4362    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0x0000ff00 },
4363    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0x0000ff00 },
4364    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0x0000ff00 },
4365 };
4366
4367 static const struct soc15_reg_entry gfx_v9_0_edc_counter_regs[] = {
4368    { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT), 0, 1, 1},
4369    { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT), 0, 1, 1},
4370    { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT), 0, 1, 1},
4371    { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT), 0, 1, 1},
4372    { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT), 0, 1, 1},
4373    { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT), 0, 1, 1},
4374    { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT), 0, 1, 1},
4375    { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT), 0, 1, 1},
4376    { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT), 0, 1, 1},
4377    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT), 0, 1, 1},
4378    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_GRBM_CNT), 0, 1, 1},
4379    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_DED), 0, 1, 1},
4380    { SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT), 0, 4, 1},
4381    { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 0, 4, 6},
4382    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_DED_CNT), 0, 4, 16},
4383    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_INFO), 0, 4, 16},
4384    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_SEC_CNT), 0, 4, 16},
4385    { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 1, 16},
4386    { SOC15_REG_ENTRY(GC, 0, mmTCP_ATC_EDC_GATCL1_CNT), 0, 4, 16},
4387    { SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT), 0, 4, 16},
4388    { SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 0, 4, 16},
4389    { SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 0, 4, 16},
4390    { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 0, 4, 6},
4391    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 0, 4, 16},
4392    { SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 0, 4, 16},
4393    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), 0, 1, 1},
4394    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 0, 1, 1},
4395    { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 1, 32},
4396    { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 1, 32},
4397    { SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT), 0, 1, 72},
4398    { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0, 1, 16},
4399    { SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT), 0, 1, 2},
4400    { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 0, 4, 6},
4401 };
4402
4403 static int gfx_v9_0_do_edc_gds_workarounds(struct amdgpu_device *adev)
4404 {
4405         struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
4406         int i, r;
4407
4408         /* only support when RAS is enabled */
4409         if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
4410                 return 0;
4411
4412         r = amdgpu_ring_alloc(ring, 7);
4413         if (r) {
4414                 DRM_ERROR("amdgpu: GDS workarounds failed to lock ring %s (%d).\n",
4415                         ring->name, r);
4416                 return r;
4417         }
4418
4419         WREG32_SOC15(GC, 0, mmGDS_VMID0_BASE, 0x00000000);
4420         WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, adev->gds.gds_size);
4421
4422         amdgpu_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
4423         amdgpu_ring_write(ring, (PACKET3_DMA_DATA_CP_SYNC |
4424                                 PACKET3_DMA_DATA_DST_SEL(1) |
4425                                 PACKET3_DMA_DATA_SRC_SEL(2) |
4426                                 PACKET3_DMA_DATA_ENGINE(0)));
4427         amdgpu_ring_write(ring, 0);
4428         amdgpu_ring_write(ring, 0);
4429         amdgpu_ring_write(ring, 0);
4430         amdgpu_ring_write(ring, 0);
4431         amdgpu_ring_write(ring, PACKET3_DMA_DATA_CMD_RAW_WAIT |
4432                                 adev->gds.gds_size);
4433
4434         amdgpu_ring_commit(ring);
4435
4436         for (i = 0; i < adev->usec_timeout; i++) {
4437                 if (ring->wptr == gfx_v9_0_ring_get_rptr_compute(ring))
4438                         break;
4439                 udelay(1);
4440         }
4441
4442         if (i >= adev->usec_timeout)
4443                 r = -ETIMEDOUT;
4444
4445         WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, 0x00000000);
4446
4447         return r;
4448 }
4449
4450 static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
4451 {
4452         struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
4453         struct amdgpu_ib ib;
4454         struct dma_fence *f = NULL;
4455         int r, i;
4456         unsigned total_size, vgpr_offset, sgpr_offset;
4457         u64 gpu_addr;
4458
4459         int compute_dim_x = adev->gfx.config.max_shader_engines *
4460                                                 adev->gfx.config.max_cu_per_sh *
4461                                                 adev->gfx.config.max_sh_per_se;
4462         int sgpr_work_group_size = 5;
4463         int gpr_reg_size = adev->gfx.config.max_shader_engines + 6;
4464         int vgpr_init_shader_size;
4465         const u32 *vgpr_init_shader_ptr;
4466         const struct soc15_reg_entry *vgpr_init_regs_ptr;
4467
4468         /* only support when RAS is enabled */
4469         if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
4470                 return 0;
4471
4472         /* bail if the compute ring is not ready */
4473         if (!ring->sched.ready)
4474                 return 0;
4475
4476         if (adev->asic_type == CHIP_ARCTURUS) {
4477                 vgpr_init_shader_ptr = vgpr_init_compute_shader_arcturus;
4478                 vgpr_init_shader_size = sizeof(vgpr_init_compute_shader_arcturus);
4479                 vgpr_init_regs_ptr = vgpr_init_regs_arcturus;
4480         } else {
4481                 vgpr_init_shader_ptr = vgpr_init_compute_shader;
4482                 vgpr_init_shader_size = sizeof(vgpr_init_compute_shader);
4483                 vgpr_init_regs_ptr = vgpr_init_regs;
4484         }
4485
4486         total_size =
4487                 (gpr_reg_size * 3 + 4 + 5 + 2) * 4; /* VGPRS */
4488         total_size +=
4489                 (gpr_reg_size * 3 + 4 + 5 + 2) * 4; /* SGPRS1 */
4490         total_size +=
4491                 (gpr_reg_size * 3 + 4 + 5 + 2) * 4; /* SGPRS2 */
4492         total_size = ALIGN(total_size, 256);
4493         vgpr_offset = total_size;
4494         total_size += ALIGN(vgpr_init_shader_size, 256);
4495         sgpr_offset = total_size;
4496         total_size += sizeof(sgpr_init_compute_shader);
4497
4498         /* allocate an indirect buffer to put the commands in */
4499         memset(&ib, 0, sizeof(ib));
4500         r = amdgpu_ib_get(adev, NULL, total_size,
4501                                         AMDGPU_IB_POOL_DIRECT, &ib);
4502         if (r) {
4503                 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
4504                 return r;
4505         }
4506
4507         /* load the compute shaders */
4508         for (i = 0; i < vgpr_init_shader_size/sizeof(u32); i++)
4509                 ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_shader_ptr[i];
4510
4511         for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
4512                 ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
4513
4514         /* init the ib length to 0 */
4515         ib.length_dw = 0;
4516
4517         /* VGPR */
4518         /* write the register state for the compute dispatch */
4519         for (i = 0; i < gpr_reg_size; i++) {
4520                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
4521                 ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(vgpr_init_regs_ptr[i])
4522                                                                 - PACKET3_SET_SH_REG_START;
4523                 ib.ptr[ib.length_dw++] = vgpr_init_regs_ptr[i].reg_value;
4524         }
4525         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
4526         gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
4527         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
4528         ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
4529                                                         - PACKET3_SET_SH_REG_START;
4530         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
4531         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
4532
4533         /* write dispatch packet */
4534         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
4535         ib.ptr[ib.length_dw++] = compute_dim_x * 2; /* x */
4536         ib.ptr[ib.length_dw++] = 1; /* y */
4537         ib.ptr[ib.length_dw++] = 1; /* z */
4538         ib.ptr[ib.length_dw++] =
4539                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
4540
4541         /* write CS partial flush packet */
4542         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
4543         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
4544
4545         /* SGPR1 */
4546         /* write the register state for the compute dispatch */
4547         for (i = 0; i < gpr_reg_size; i++) {
4548                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
4549                 ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(sgpr1_init_regs[i])
4550                                                                 - PACKET3_SET_SH_REG_START;
4551                 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i].reg_value;
4552         }
4553         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
4554         gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
4555         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
4556         ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
4557                                                         - PACKET3_SET_SH_REG_START;
4558         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
4559         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
4560
4561         /* write dispatch packet */
4562         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
4563         ib.ptr[ib.length_dw++] = compute_dim_x / 2 * sgpr_work_group_size; /* x */
4564         ib.ptr[ib.length_dw++] = 1; /* y */
4565         ib.ptr[ib.length_dw++] = 1; /* z */
4566         ib.ptr[ib.length_dw++] =
4567                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
4568
4569         /* write CS partial flush packet */
4570         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
4571         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
4572
4573         /* SGPR2 */
4574         /* write the register state for the compute dispatch */
4575         for (i = 0; i < gpr_reg_size; i++) {
4576                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
4577                 ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(sgpr2_init_regs[i])
4578                                                                 - PACKET3_SET_SH_REG_START;
4579                 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i].reg_value;
4580         }
4581         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
4582         gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
4583         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
4584         ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
4585                                                         - PACKET3_SET_SH_REG_START;
4586         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
4587         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
4588
4589         /* write dispatch packet */
4590         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
4591         ib.ptr[ib.length_dw++] = compute_dim_x / 2 * sgpr_work_group_size; /* x */
4592         ib.ptr[ib.length_dw++] = 1; /* y */
4593         ib.ptr[ib.length_dw++] = 1; /* z */
4594         ib.ptr[ib.length_dw++] =
4595                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
4596
4597         /* write CS partial flush packet */
4598         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
4599         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
4600
4601         /* shedule the ib on the ring */
4602         r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
4603         if (r) {
4604                 DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
4605                 goto fail;
4606         }
4607
4608         /* wait for the GPU to finish processing the IB */
4609         r = dma_fence_wait(f, false);
4610         if (r) {
4611                 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
4612                 goto fail;
4613         }
4614
4615 fail:
4616         amdgpu_ib_free(adev, &ib, NULL);
4617         dma_fence_put(f);
4618
4619         return r;
4620 }
4621
4622 static int gfx_v9_0_early_init(void *handle)
4623 {
4624         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4625
4626         if (adev->asic_type == CHIP_ARCTURUS)
4627                 adev->gfx.num_gfx_rings = 0;
4628         else
4629                 adev->gfx.num_gfx_rings = GFX9_NUM_GFX_RINGS;
4630         adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
4631         gfx_v9_0_set_kiq_pm4_funcs(adev);
4632         gfx_v9_0_set_ring_funcs(adev);
4633         gfx_v9_0_set_irq_funcs(adev);
4634         gfx_v9_0_set_gds_init(adev);
4635         gfx_v9_0_set_rlc_funcs(adev);
4636
4637         return 0;
4638 }
4639
4640 static int gfx_v9_0_ecc_late_init(void *handle)
4641 {
4642         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4643         int r;
4644
4645         /*
4646          * Temp workaround to fix the issue that CP firmware fails to
4647          * update read pointer when CPDMA is writing clearing operation
4648          * to GDS in suspend/resume sequence on several cards. So just
4649          * limit this operation in cold boot sequence.
4650          */
4651         if (!adev->in_suspend) {
4652                 r = gfx_v9_0_do_edc_gds_workarounds(adev);
4653                 if (r)
4654                         return r;
4655         }
4656
4657         /* requires IBs so do in late init after IB pool is initialized */
4658         r = gfx_v9_0_do_edc_gpr_workarounds(adev);
4659         if (r)
4660                 return r;
4661
4662         if (adev->gfx.funcs &&
4663             adev->gfx.funcs->reset_ras_error_count)
4664                 adev->gfx.funcs->reset_ras_error_count(adev);
4665
4666         r = amdgpu_gfx_ras_late_init(adev);
4667         if (r)
4668                 return r;
4669
4670         return 0;
4671 }
4672
4673 static int gfx_v9_0_late_init(void *handle)
4674 {
4675         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4676         int r;
4677
4678         r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
4679         if (r)
4680                 return r;
4681
4682         r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
4683         if (r)
4684                 return r;
4685
4686         r = gfx_v9_0_ecc_late_init(handle);
4687         if (r)
4688                 return r;
4689
4690         return 0;
4691 }
4692
4693 static bool gfx_v9_0_is_rlc_enabled(struct amdgpu_device *adev)
4694 {
4695         uint32_t rlc_setting;
4696
4697         /* if RLC is not enabled, do nothing */
4698         rlc_setting = RREG32_SOC15(GC, 0, mmRLC_CNTL);
4699         if (!(rlc_setting & RLC_CNTL__RLC_ENABLE_F32_MASK))
4700                 return false;
4701
4702         return true;
4703 }
4704
4705 static void gfx_v9_0_set_safe_mode(struct amdgpu_device *adev)
4706 {
4707         uint32_t data;
4708         unsigned i;
4709
4710         data = RLC_SAFE_MODE__CMD_MASK;
4711         data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
4712         WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data);
4713
4714         /* wait for RLC_SAFE_MODE */
4715         for (i = 0; i < adev->usec_timeout; i++) {
4716                 if (!REG_GET_FIELD(RREG32_SOC15(GC, 0, mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
4717                         break;
4718                 udelay(1);
4719         }
4720 }
4721
4722 static void gfx_v9_0_unset_safe_mode(struct amdgpu_device *adev)
4723 {
4724         uint32_t data;
4725
4726         data = RLC_SAFE_MODE__CMD_MASK;
4727         WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data);
4728 }
4729
4730 static void gfx_v9_0_update_gfx_cg_power_gating(struct amdgpu_device *adev,
4731                                                 bool enable)
4732 {
4733         amdgpu_gfx_rlc_enter_safe_mode(adev);
4734
4735         if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
4736                 gfx_v9_0_enable_gfx_cg_power_gating(adev, true);
4737                 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
4738                         gfx_v9_0_enable_gfx_pipeline_powergating(adev, true);
4739         } else {
4740                 gfx_v9_0_enable_gfx_cg_power_gating(adev, false);
4741                 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
4742                         gfx_v9_0_enable_gfx_pipeline_powergating(adev, false);
4743         }
4744
4745         amdgpu_gfx_rlc_exit_safe_mode(adev);
4746 }
4747
4748 static void gfx_v9_0_update_gfx_mg_power_gating(struct amdgpu_device *adev,
4749                                                 bool enable)
4750 {
4751         /* TODO: double check if we need to perform under safe mode */
4752         /* gfx_v9_0_enter_rlc_safe_mode(adev); */
4753
4754         if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
4755                 gfx_v9_0_enable_gfx_static_mg_power_gating(adev, true);
4756         else
4757                 gfx_v9_0_enable_gfx_static_mg_power_gating(adev, false);
4758
4759         if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
4760                 gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, true);
4761         else
4762                 gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, false);
4763
4764         /* gfx_v9_0_exit_rlc_safe_mode(adev); */
4765 }
4766
4767 static void gfx_v9_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
4768                                                       bool enable)
4769 {
4770         uint32_t data, def;
4771
4772         amdgpu_gfx_rlc_enter_safe_mode(adev);
4773
4774         /* It is disabled by HW by default */
4775         if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
4776                 /* 1 - RLC_CGTT_MGCG_OVERRIDE */
4777                 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4778
4779                 if (adev->asic_type != CHIP_VEGA12)
4780                         data &= ~RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK;
4781
4782                 data &= ~(RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
4783                           RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK |
4784                           RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK);
4785
4786                 /* only for Vega10 & Raven1 */
4787                 data |= RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK;
4788
4789                 if (def != data)
4790                         WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4791
4792                 /* MGLS is a global flag to control all MGLS in GFX */
4793                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
4794                         /* 2 - RLC memory Light sleep */
4795                         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
4796                                 def = data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
4797                                 data |= RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
4798                                 if (def != data)
4799                                         WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data);
4800                         }
4801                         /* 3 - CP memory Light sleep */
4802                         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
4803                                 def = data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
4804                                 data |= CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
4805                                 if (def != data)
4806                                         WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data);
4807                         }
4808                 }
4809         } else {
4810                 /* 1 - MGCG_OVERRIDE */
4811                 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4812
4813                 if (adev->asic_type != CHIP_VEGA12)
4814                         data |= RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK;
4815
4816                 data |= (RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK |
4817                          RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
4818                          RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK |
4819                          RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK);
4820
4821                 if (def != data)
4822                         WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4823
4824                 /* 2 - disable MGLS in RLC */
4825                 data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
4826                 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
4827                         data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
4828                         WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data);
4829                 }
4830
4831                 /* 3 - disable MGLS in CP */
4832                 data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
4833                 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
4834                         data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
4835                         WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data);
4836                 }
4837         }
4838
4839         amdgpu_gfx_rlc_exit_safe_mode(adev);
4840 }
4841
4842 static void gfx_v9_0_update_3d_clock_gating(struct amdgpu_device *adev,
4843                                            bool enable)
4844 {
4845         uint32_t data, def;
4846
4847         if (adev->asic_type == CHIP_ARCTURUS)
4848                 return;
4849
4850         amdgpu_gfx_rlc_enter_safe_mode(adev);
4851
4852         /* Enable 3D CGCG/CGLS */
4853         if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG)) {
4854                 /* write cmd to clear cgcg/cgls ov */
4855                 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4856                 /* unset CGCG override */
4857                 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_GFX3D_CG_OVERRIDE_MASK;
4858                 /* update CGCG and CGLS override bits */
4859                 if (def != data)
4860                         WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4861
4862                 /* enable 3Dcgcg FSM(0x0000363f) */
4863                 def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
4864
4865                 data = (0x36 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
4866                         RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK;
4867                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS)
4868                         data |= (0x000F << RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
4869                                 RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK;
4870                 if (def != data)
4871                         WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data);
4872
4873                 /* set IDLE_POLL_COUNT(0x00900100) */
4874                 def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL);
4875                 data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
4876                         (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
4877                 if (def != data)
4878                         WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data);
4879         } else {
4880                 /* Disable CGCG/CGLS */
4881                 def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
4882                 /* disable cgcg, cgls should be disabled */
4883                 data &= ~(RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK |
4884                           RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK);
4885                 /* disable cgcg and cgls in FSM */
4886                 if (def != data)
4887                         WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data);
4888         }
4889
4890         amdgpu_gfx_rlc_exit_safe_mode(adev);
4891 }
4892
4893 static void gfx_v9_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
4894                                                       bool enable)
4895 {
4896         uint32_t def, data;
4897
4898         amdgpu_gfx_rlc_enter_safe_mode(adev);
4899
4900         if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
4901                 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4902                 /* unset CGCG override */
4903                 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGCG_OVERRIDE_MASK;
4904                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
4905                         data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
4906                 else
4907                         data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
4908                 /* update CGCG and CGLS override bits */
4909                 if (def != data)
4910                         WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4911
4912                 /* enable cgcg FSM(0x0000363F) */
4913                 def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
4914
4915                 if (adev->asic_type == CHIP_ARCTURUS)
4916                         data = (0x2000 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
4917                                 RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
4918                 else
4919                         data = (0x36 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
4920                                 RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
4921                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
4922                         data |= (0x000F << RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
4923                                 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
4924                 if (def != data)
4925                         WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data);
4926
4927                 /* set IDLE_POLL_COUNT(0x00900100) */
4928                 def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL);
4929                 data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
4930                         (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
4931                 if (def != data)
4932                         WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data);
4933         } else {
4934                 def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
4935                 /* reset CGCG/CGLS bits */
4936                 data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
4937                 /* disable cgcg and cgls in FSM */
4938                 if (def != data)
4939                         WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data);
4940         }
4941
4942         amdgpu_gfx_rlc_exit_safe_mode(adev);
4943 }
4944
4945 static int gfx_v9_0_update_gfx_clock_gating(struct amdgpu_device *adev,
4946                                             bool enable)
4947 {
4948         if (enable) {
4949                 /* CGCG/CGLS should be enabled after MGCG/MGLS
4950                  * ===  MGCG + MGLS ===
4951                  */
4952                 gfx_v9_0_update_medium_grain_clock_gating(adev, enable);
4953                 /* ===  CGCG /CGLS for GFX 3D Only === */
4954                 gfx_v9_0_update_3d_clock_gating(adev, enable);
4955                 /* ===  CGCG + CGLS === */
4956                 gfx_v9_0_update_coarse_grain_clock_gating(adev, enable);
4957         } else {
4958                 /* CGCG/CGLS should be disabled before MGCG/MGLS
4959                  * ===  CGCG + CGLS ===
4960                  */
4961                 gfx_v9_0_update_coarse_grain_clock_gating(adev, enable);
4962                 /* ===  CGCG /CGLS for GFX 3D Only === */
4963                 gfx_v9_0_update_3d_clock_gating(adev, enable);
4964                 /* ===  MGCG + MGLS === */
4965                 gfx_v9_0_update_medium_grain_clock_gating(adev, enable);
4966         }
4967         return 0;
4968 }
4969
4970 static void gfx_v9_0_update_spm_vmid(struct amdgpu_device *adev, unsigned vmid)
4971 {
4972         u32 reg, data;
4973
4974         reg = SOC15_REG_OFFSET(GC, 0, mmRLC_SPM_MC_CNTL);
4975         if (amdgpu_sriov_is_pp_one_vf(adev))
4976                 data = RREG32_NO_KIQ(reg);
4977         else
4978                 data = RREG32(reg);
4979
4980         data &= ~RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK;
4981         data |= (vmid & RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK) << RLC_SPM_MC_CNTL__RLC_SPM_VMID__SHIFT;
4982
4983         if (amdgpu_sriov_is_pp_one_vf(adev))
4984                 WREG32_SOC15_NO_KIQ(GC, 0, mmRLC_SPM_MC_CNTL, data);
4985         else
4986                 WREG32_SOC15(GC, 0, mmRLC_SPM_MC_CNTL, data);
4987 }
4988
4989 static bool gfx_v9_0_check_rlcg_range(struct amdgpu_device *adev,
4990                                         uint32_t offset,
4991                                         struct soc15_reg_rlcg *entries, int arr_size)
4992 {
4993         int i;
4994         uint32_t reg;
4995
4996         if (!entries)
4997                 return false;
4998
4999         for (i = 0; i < arr_size; i++) {
5000                 const struct soc15_reg_rlcg *entry;
5001
5002                 entry = &entries[i];
5003                 reg = adev->reg_offset[entry->hwip][entry->instance][entry->segment] + entry->reg;
5004                 if (offset == reg)
5005                         return true;
5006         }
5007
5008         return false;
5009 }
5010
5011 static bool gfx_v9_0_is_rlcg_access_range(struct amdgpu_device *adev, u32 offset)
5012 {
5013         return gfx_v9_0_check_rlcg_range(adev, offset,
5014                                         (void *)rlcg_access_gc_9_0,
5015                                         ARRAY_SIZE(rlcg_access_gc_9_0));
5016 }
5017
5018 static const struct amdgpu_rlc_funcs gfx_v9_0_rlc_funcs = {
5019         .is_rlc_enabled = gfx_v9_0_is_rlc_enabled,
5020         .set_safe_mode = gfx_v9_0_set_safe_mode,
5021         .unset_safe_mode = gfx_v9_0_unset_safe_mode,
5022         .init = gfx_v9_0_rlc_init,
5023         .get_csb_size = gfx_v9_0_get_csb_size,
5024         .get_csb_buffer = gfx_v9_0_get_csb_buffer,
5025         .get_cp_table_num = gfx_v9_0_cp_jump_table_num,
5026         .resume = gfx_v9_0_rlc_resume,
5027         .stop = gfx_v9_0_rlc_stop,
5028         .reset = gfx_v9_0_rlc_reset,
5029         .start = gfx_v9_0_rlc_start,
5030         .update_spm_vmid = gfx_v9_0_update_spm_vmid,
5031         .rlcg_wreg = gfx_v9_0_rlcg_wreg,
5032         .is_rlcg_access_range = gfx_v9_0_is_rlcg_access_range,
5033 };
5034
5035 static int gfx_v9_0_set_powergating_state(void *handle,
5036                                           enum amd_powergating_state state)
5037 {
5038         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5039         bool enable = (state == AMD_PG_STATE_GATE);
5040
5041         switch (adev->asic_type) {
5042         case CHIP_RAVEN:
5043         case CHIP_RENOIR:
5044                 if (!enable)
5045                         amdgpu_gfx_off_ctrl(adev, false);
5046
5047                 if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
5048                         gfx_v9_0_enable_sck_slow_down_on_power_up(adev, true);
5049                         gfx_v9_0_enable_sck_slow_down_on_power_down(adev, true);
5050                 } else {
5051                         gfx_v9_0_enable_sck_slow_down_on_power_up(adev, false);
5052                         gfx_v9_0_enable_sck_slow_down_on_power_down(adev, false);
5053                 }
5054
5055                 if (adev->pg_flags & AMD_PG_SUPPORT_CP)
5056                         gfx_v9_0_enable_cp_power_gating(adev, true);
5057                 else
5058                         gfx_v9_0_enable_cp_power_gating(adev, false);
5059
5060                 /* update gfx cgpg state */
5061                 gfx_v9_0_update_gfx_cg_power_gating(adev, enable);
5062
5063                 /* update mgcg state */
5064                 gfx_v9_0_update_gfx_mg_power_gating(adev, enable);
5065
5066                 if (enable)
5067                         amdgpu_gfx_off_ctrl(adev, true);
5068                 break;
5069         case CHIP_VEGA12:
5070                 amdgpu_gfx_off_ctrl(adev, enable);
5071                 break;
5072         default:
5073                 break;
5074         }
5075
5076         return 0;
5077 }
5078
5079 static int gfx_v9_0_set_clockgating_state(void *handle,
5080                                           enum amd_clockgating_state state)
5081 {
5082         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5083
5084         if (amdgpu_sriov_vf(adev))
5085                 return 0;
5086
5087         switch (adev->asic_type) {
5088         case CHIP_VEGA10:
5089         case CHIP_VEGA12:
5090         case CHIP_VEGA20:
5091         case CHIP_RAVEN:
5092         case CHIP_ARCTURUS:
5093         case CHIP_RENOIR:
5094                 gfx_v9_0_update_gfx_clock_gating(adev,
5095                                                  state == AMD_CG_STATE_GATE);
5096                 break;
5097         default:
5098                 break;
5099         }
5100         return 0;
5101 }
5102
5103 static void gfx_v9_0_get_clockgating_state(void *handle, u32 *flags)
5104 {
5105         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5106         int data;
5107
5108         if (amdgpu_sriov_vf(adev))
5109                 *flags = 0;
5110
5111         /* AMD_CG_SUPPORT_GFX_MGCG */
5112         data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE));
5113         if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK))
5114                 *flags |= AMD_CG_SUPPORT_GFX_MGCG;
5115
5116         /* AMD_CG_SUPPORT_GFX_CGCG */
5117         data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL));
5118         if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
5119                 *flags |= AMD_CG_SUPPORT_GFX_CGCG;
5120
5121         /* AMD_CG_SUPPORT_GFX_CGLS */
5122         if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
5123                 *flags |= AMD_CG_SUPPORT_GFX_CGLS;
5124
5125         /* AMD_CG_SUPPORT_GFX_RLC_LS */
5126         data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_MEM_SLP_CNTL));
5127         if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK)
5128                 *flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS;
5129
5130         /* AMD_CG_SUPPORT_GFX_CP_LS */
5131         data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmCP_MEM_SLP_CNTL));
5132         if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK)
5133                 *flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS;
5134
5135         if (adev->asic_type != CHIP_ARCTURUS) {
5136                 /* AMD_CG_SUPPORT_GFX_3D_CGCG */
5137                 data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D));
5138                 if (data & RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK)
5139                         *flags |= AMD_CG_SUPPORT_GFX_3D_CGCG;
5140
5141                 /* AMD_CG_SUPPORT_GFX_3D_CGLS */
5142                 if (data & RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK)
5143                         *flags |= AMD_CG_SUPPORT_GFX_3D_CGLS;
5144         }
5145 }
5146
5147 static u64 gfx_v9_0_ring_get_rptr_gfx(struct amdgpu_ring *ring)
5148 {
5149         return ring->adev->wb.wb[ring->rptr_offs]; /* gfx9 is 32bit rptr*/
5150 }
5151
5152 static u64 gfx_v9_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
5153 {
5154         struct amdgpu_device *adev = ring->adev;
5155         u64 wptr;
5156
5157         /* XXX check if swapping is necessary on BE */
5158         if (ring->use_doorbell) {
5159                 wptr = atomic64_read((atomic64_t *)&adev->wb.wb[ring->wptr_offs]);
5160         } else {
5161                 wptr = RREG32_SOC15(GC, 0, mmCP_RB0_WPTR);
5162                 wptr += (u64)RREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI) << 32;
5163         }
5164
5165         return wptr;
5166 }
5167
5168 static void gfx_v9_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
5169 {
5170         struct amdgpu_device *adev = ring->adev;
5171
5172         if (ring->use_doorbell) {
5173                 /* XXX check if swapping is necessary on BE */
5174                 atomic64_set((atomic64_t*)&adev->wb.wb[ring->wptr_offs], ring->wptr);
5175                 WDOORBELL64(ring->doorbell_index, ring->wptr);
5176         } else {
5177                 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
5178                 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
5179         }
5180 }
5181
5182 static void gfx_v9_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
5183 {
5184         struct amdgpu_device *adev = ring->adev;
5185         u32 ref_and_mask, reg_mem_engine;
5186         const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio.hdp_flush_reg;
5187
5188         if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
5189                 switch (ring->me) {
5190                 case 1:
5191                         ref_and_mask = nbio_hf_reg->ref_and_mask_cp2 << ring->pipe;
5192                         break;
5193                 case 2:
5194                         ref_and_mask = nbio_hf_reg->ref_and_mask_cp6 << ring->pipe;
5195                         break;
5196                 default:
5197                         return;
5198                 }
5199                 reg_mem_engine = 0;
5200         } else {
5201                 ref_and_mask = nbio_hf_reg->ref_and_mask_cp0;
5202                 reg_mem_engine = 1; /* pfp */
5203         }
5204
5205         gfx_v9_0_wait_reg_mem(ring, reg_mem_engine, 0, 1,
5206                               adev->nbio.funcs->get_hdp_flush_req_offset(adev),
5207                               adev->nbio.funcs->get_hdp_flush_done_offset(adev),
5208                               ref_and_mask, ref_and_mask, 0x20);
5209 }
5210
5211 static void gfx_v9_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
5212                                         struct amdgpu_job *job,
5213                                         struct amdgpu_ib *ib,
5214                                         uint32_t flags)
5215 {
5216         unsigned vmid = AMDGPU_JOB_GET_VMID(job);
5217         u32 header, control = 0;
5218
5219         if (ib->flags & AMDGPU_IB_FLAG_CE)
5220                 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
5221         else
5222                 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
5223
5224         control |= ib->length_dw | (vmid << 24);
5225
5226         if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) {
5227                 control |= INDIRECT_BUFFER_PRE_ENB(1);
5228
5229                 if (!(ib->flags & AMDGPU_IB_FLAG_CE) && vmid)
5230                         gfx_v9_0_ring_emit_de_meta(ring);
5231         }
5232
5233         amdgpu_ring_write(ring, header);
5234         BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
5235         amdgpu_ring_write(ring,
5236 #ifdef __BIG_ENDIAN
5237                 (2 << 0) |
5238 #endif
5239                 lower_32_bits(ib->gpu_addr));
5240         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
5241         amdgpu_ring_write(ring, control);
5242 }
5243
5244 static void gfx_v9_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
5245                                           struct amdgpu_job *job,
5246                                           struct amdgpu_ib *ib,
5247                                           uint32_t flags)
5248 {
5249         unsigned vmid = AMDGPU_JOB_GET_VMID(job);
5250         u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
5251
5252         /* Currently, there is a high possibility to get wave ID mismatch
5253          * between ME and GDS, leading to a hw deadlock, because ME generates
5254          * different wave IDs than the GDS expects. This situation happens
5255          * randomly when at least 5 compute pipes use GDS ordered append.
5256          * The wave IDs generated by ME are also wrong after suspend/resume.
5257          * Those are probably bugs somewhere else in the kernel driver.
5258          *
5259          * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and
5260          * GDS to 0 for this ring (me/pipe).
5261          */
5262         if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) {
5263                 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
5264                 amdgpu_ring_write(ring, mmGDS_COMPUTE_MAX_WAVE_ID);
5265                 amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id);
5266         }
5267
5268         amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
5269         BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
5270         amdgpu_ring_write(ring,
5271 #ifdef __BIG_ENDIAN
5272                                 (2 << 0) |
5273 #endif
5274                                 lower_32_bits(ib->gpu_addr));
5275         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
5276         amdgpu_ring_write(ring, control);
5277 }
5278
5279 static void gfx_v9_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
5280                                      u64 seq, unsigned flags)
5281 {
5282         bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
5283         bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
5284         bool writeback = flags & AMDGPU_FENCE_FLAG_TC_WB_ONLY;
5285
5286         /* RELEASE_MEM - flush caches, send int */
5287         amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6));
5288         amdgpu_ring_write(ring, ((writeback ? (EOP_TC_WB_ACTION_EN |
5289                                                EOP_TC_NC_ACTION_EN) :
5290                                               (EOP_TCL1_ACTION_EN |
5291                                                EOP_TC_ACTION_EN |
5292                                                EOP_TC_WB_ACTION_EN |
5293                                                EOP_TC_MD_ACTION_EN)) |
5294                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
5295                                  EVENT_INDEX(5)));
5296         amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
5297
5298         /*
5299          * the address should be Qword aligned if 64bit write, Dword
5300          * aligned if only send 32bit data low (discard data high)
5301          */
5302         if (write64bit)
5303                 BUG_ON(addr & 0x7);
5304         else
5305                 BUG_ON(addr & 0x3);
5306         amdgpu_ring_write(ring, lower_32_bits(addr));
5307         amdgpu_ring_write(ring, upper_32_bits(addr));
5308         amdgpu_ring_write(ring, lower_32_bits(seq));
5309         amdgpu_ring_write(ring, upper_32_bits(seq));
5310         amdgpu_ring_write(ring, 0);
5311 }
5312
5313 static void gfx_v9_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
5314 {
5315         int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
5316         uint32_t seq = ring->fence_drv.sync_seq;
5317         uint64_t addr = ring->fence_drv.gpu_addr;
5318
5319         gfx_v9_0_wait_reg_mem(ring, usepfp, 1, 0,
5320                               lower_32_bits(addr), upper_32_bits(addr),
5321                               seq, 0xffffffff, 4);
5322 }
5323
5324 static void gfx_v9_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
5325                                         unsigned vmid, uint64_t pd_addr)
5326 {
5327         amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
5328
5329         /* compute doesn't have PFP */
5330         if (ring->funcs->type == AMDGPU_RING_TYPE_GFX) {
5331                 /* sync PFP to ME, otherwise we might get invalid PFP reads */
5332                 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
5333                 amdgpu_ring_write(ring, 0x0);
5334         }
5335 }
5336
5337 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
5338 {
5339         return ring->adev->wb.wb[ring->rptr_offs]; /* gfx9 hardware is 32bit rptr */
5340 }
5341
5342 static u64 gfx_v9_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
5343 {
5344         u64 wptr;
5345
5346         /* XXX check if swapping is necessary on BE */
5347         if (ring->use_doorbell)
5348                 wptr = atomic64_read((atomic64_t *)&ring->adev->wb.wb[ring->wptr_offs]);
5349         else
5350                 BUG();
5351         return wptr;
5352 }
5353
5354 static void gfx_v9_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
5355 {
5356         struct amdgpu_device *adev = ring->adev;
5357
5358         /* XXX check if swapping is necessary on BE */
5359         if (ring->use_doorbell) {
5360                 atomic64_set((atomic64_t*)&adev->wb.wb[ring->wptr_offs], ring->wptr);
5361                 WDOORBELL64(ring->doorbell_index, ring->wptr);
5362         } else{
5363                 BUG(); /* only DOORBELL method supported on gfx9 now */
5364         }
5365 }
5366
5367 static void gfx_v9_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
5368                                          u64 seq, unsigned int flags)
5369 {
5370         struct amdgpu_device *adev = ring->adev;
5371
5372         /* we only allocate 32bit for each seq wb address */
5373         BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
5374
5375         /* write fence seq to the "addr" */
5376         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5377         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5378                                  WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
5379         amdgpu_ring_write(ring, lower_32_bits(addr));
5380         amdgpu_ring_write(ring, upper_32_bits(addr));
5381         amdgpu_ring_write(ring, lower_32_bits(seq));
5382
5383         if (flags & AMDGPU_FENCE_FLAG_INT) {
5384                 /* set register to trigger INT */
5385                 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5386                 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5387                                          WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
5388                 amdgpu_ring_write(ring, SOC15_REG_OFFSET(GC, 0, mmCPC_INT_STATUS));
5389                 amdgpu_ring_write(ring, 0);
5390                 amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
5391         }
5392 }
5393
5394 static void gfx_v9_ring_emit_sb(struct amdgpu_ring *ring)
5395 {
5396         amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
5397         amdgpu_ring_write(ring, 0);
5398 }
5399
5400 static void gfx_v9_0_ring_emit_ce_meta(struct amdgpu_ring *ring)
5401 {
5402         struct v9_ce_ib_state ce_payload = {0};
5403         uint64_t csa_addr;
5404         int cnt;
5405
5406         cnt = (sizeof(ce_payload) >> 2) + 4 - 2;
5407         csa_addr = amdgpu_csa_vaddr(ring->adev);
5408
5409         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
5410         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
5411                                  WRITE_DATA_DST_SEL(8) |
5412                                  WR_CONFIRM) |
5413                                  WRITE_DATA_CACHE_POLICY(0));
5414         amdgpu_ring_write(ring, lower_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, ce_payload)));
5415         amdgpu_ring_write(ring, upper_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, ce_payload)));
5416         amdgpu_ring_write_multiple(ring, (void *)&ce_payload, sizeof(ce_payload) >> 2);
5417 }
5418
5419 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring)
5420 {
5421         struct v9_de_ib_state de_payload = {0};
5422         uint64_t csa_addr, gds_addr;
5423         int cnt;
5424
5425         csa_addr = amdgpu_csa_vaddr(ring->adev);
5426         gds_addr = csa_addr + 4096;
5427         de_payload.gds_backup_addrlo = lower_32_bits(gds_addr);
5428         de_payload.gds_backup_addrhi = upper_32_bits(gds_addr);
5429
5430         cnt = (sizeof(de_payload) >> 2) + 4 - 2;
5431         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
5432         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
5433                                  WRITE_DATA_DST_SEL(8) |
5434                                  WR_CONFIRM) |
5435                                  WRITE_DATA_CACHE_POLICY(0));
5436         amdgpu_ring_write(ring, lower_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, de_payload)));
5437         amdgpu_ring_write(ring, upper_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, de_payload)));
5438         amdgpu_ring_write_multiple(ring, (void *)&de_payload, sizeof(de_payload) >> 2);
5439 }
5440
5441 static void gfx_v9_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start,
5442                                    bool secure)
5443 {
5444         uint32_t v = secure ? FRAME_TMZ : 0;
5445
5446         amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0));
5447         amdgpu_ring_write(ring, v | FRAME_CMD(start ? 0 : 1));
5448 }
5449
5450 static void gfx_v9_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
5451 {
5452         uint32_t dw2 = 0;
5453
5454         if (amdgpu_sriov_vf(ring->adev))
5455                 gfx_v9_0_ring_emit_ce_meta(ring);
5456
5457         dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
5458         if (flags & AMDGPU_HAVE_CTX_SWITCH) {
5459                 /* set load_global_config & load_global_uconfig */
5460                 dw2 |= 0x8001;
5461                 /* set load_cs_sh_regs */
5462                 dw2 |= 0x01000000;
5463                 /* set load_per_context_state & load_gfx_sh_regs for GFX */
5464                 dw2 |= 0x10002;
5465
5466                 /* set load_ce_ram if preamble presented */
5467                 if (AMDGPU_PREAMBLE_IB_PRESENT & flags)
5468                         dw2 |= 0x10000000;
5469         } else {
5470                 /* still load_ce_ram if this is the first time preamble presented
5471                  * although there is no context switch happens.
5472                  */
5473                 if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags)
5474                         dw2 |= 0x10000000;
5475         }
5476
5477         amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
5478         amdgpu_ring_write(ring, dw2);
5479         amdgpu_ring_write(ring, 0);
5480 }
5481
5482 static unsigned gfx_v9_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring)
5483 {
5484         unsigned ret;
5485         amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
5486         amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
5487         amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
5488         amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */
5489         ret = ring->wptr & ring->buf_mask;
5490         amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */
5491         return ret;
5492 }
5493
5494 static void gfx_v9_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset)
5495 {
5496         unsigned cur;
5497         BUG_ON(offset > ring->buf_mask);
5498         BUG_ON(ring->ring[offset] != 0x55aa55aa);
5499
5500         cur = (ring->wptr & ring->buf_mask) - 1;
5501         if (likely(cur > offset))
5502                 ring->ring[offset] = cur - offset;
5503         else
5504                 ring->ring[offset] = (ring->ring_size>>2) - offset + cur;
5505 }
5506
5507 static void gfx_v9_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg,
5508                                     uint32_t reg_val_offs)
5509 {
5510         struct amdgpu_device *adev = ring->adev;
5511
5512         amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
5513         amdgpu_ring_write(ring, 0 |     /* src: register*/
5514                                 (5 << 8) |      /* dst: memory */
5515                                 (1 << 20));     /* write confirm */
5516         amdgpu_ring_write(ring, reg);
5517         amdgpu_ring_write(ring, 0);
5518         amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
5519                                 reg_val_offs * 4));
5520         amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
5521                                 reg_val_offs * 4));
5522 }
5523
5524 static void gfx_v9_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
5525                                     uint32_t val)
5526 {
5527         uint32_t cmd = 0;
5528
5529         switch (ring->funcs->type) {
5530         case AMDGPU_RING_TYPE_GFX:
5531                 cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM;
5532                 break;
5533         case AMDGPU_RING_TYPE_KIQ:
5534                 cmd = (1 << 16); /* no inc addr */
5535                 break;
5536         default:
5537                 cmd = WR_CONFIRM;
5538                 break;
5539         }
5540         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5541         amdgpu_ring_write(ring, cmd);
5542         amdgpu_ring_write(ring, reg);
5543         amdgpu_ring_write(ring, 0);
5544         amdgpu_ring_write(ring, val);
5545 }
5546
5547 static void gfx_v9_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
5548                                         uint32_t val, uint32_t mask)
5549 {
5550         gfx_v9_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20);
5551 }
5552
5553 static void gfx_v9_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
5554                                                   uint32_t reg0, uint32_t reg1,
5555                                                   uint32_t ref, uint32_t mask)
5556 {
5557         int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
5558         struct amdgpu_device *adev = ring->adev;
5559         bool fw_version_ok = (ring->funcs->type == AMDGPU_RING_TYPE_GFX) ?
5560                 adev->gfx.me_fw_write_wait : adev->gfx.mec_fw_write_wait;
5561
5562         if (fw_version_ok)
5563                 gfx_v9_0_wait_reg_mem(ring, usepfp, 0, 1, reg0, reg1,
5564                                       ref, mask, 0x20);
5565         else
5566                 amdgpu_ring_emit_reg_write_reg_wait_helper(ring, reg0, reg1,
5567                                                            ref, mask);
5568 }
5569
5570 static void gfx_v9_0_ring_soft_recovery(struct amdgpu_ring *ring, unsigned vmid)
5571 {
5572         struct amdgpu_device *adev = ring->adev;
5573         uint32_t value = 0;
5574
5575         value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03);
5576         value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01);
5577         value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1);
5578         value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid);
5579         WREG32_SOC15(GC, 0, mmSQ_CMD, value);
5580 }
5581
5582 static void gfx_v9_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
5583                                                  enum amdgpu_interrupt_state state)
5584 {
5585         switch (state) {
5586         case AMDGPU_IRQ_STATE_DISABLE:
5587         case AMDGPU_IRQ_STATE_ENABLE:
5588                 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5589                                TIME_STAMP_INT_ENABLE,
5590                                state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
5591                 break;
5592         default:
5593                 break;
5594         }
5595 }
5596
5597 static void gfx_v9_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
5598                                                      int me, int pipe,
5599                                                      enum amdgpu_interrupt_state state)
5600 {
5601         u32 mec_int_cntl, mec_int_cntl_reg;
5602
5603         /*
5604          * amdgpu controls only the first MEC. That's why this function only
5605          * handles the setting of interrupts for this specific MEC. All other
5606          * pipes' interrupts are set by amdkfd.
5607          */
5608
5609         if (me == 1) {
5610                 switch (pipe) {
5611                 case 0:
5612                         mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE0_INT_CNTL);
5613                         break;
5614                 case 1:
5615                         mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE1_INT_CNTL);
5616                         break;
5617                 case 2:
5618                         mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE2_INT_CNTL);
5619                         break;
5620                 case 3:
5621                         mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE3_INT_CNTL);
5622                         break;
5623                 default:
5624                         DRM_DEBUG("invalid pipe %d\n", pipe);
5625                         return;
5626                 }
5627         } else {
5628                 DRM_DEBUG("invalid me %d\n", me);
5629                 return;
5630         }
5631
5632         switch (state) {
5633         case AMDGPU_IRQ_STATE_DISABLE:
5634                 mec_int_cntl = RREG32(mec_int_cntl_reg);
5635                 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
5636                                              TIME_STAMP_INT_ENABLE, 0);
5637                 WREG32(mec_int_cntl_reg, mec_int_cntl);
5638                 break;
5639         case AMDGPU_IRQ_STATE_ENABLE:
5640                 mec_int_cntl = RREG32(mec_int_cntl_reg);
5641                 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
5642                                              TIME_STAMP_INT_ENABLE, 1);
5643                 WREG32(mec_int_cntl_reg, mec_int_cntl);
5644                 break;
5645         default:
5646                 break;
5647         }
5648 }
5649
5650 static int gfx_v9_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
5651                                              struct amdgpu_irq_src *source,
5652                                              unsigned type,
5653                                              enum amdgpu_interrupt_state state)
5654 {
5655         switch (state) {
5656         case AMDGPU_IRQ_STATE_DISABLE:
5657         case AMDGPU_IRQ_STATE_ENABLE:
5658                 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5659                                PRIV_REG_INT_ENABLE,
5660                                state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
5661                 break;
5662         default:
5663                 break;
5664         }
5665
5666         return 0;
5667 }
5668
5669 static int gfx_v9_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
5670                                               struct amdgpu_irq_src *source,
5671                                               unsigned type,
5672                                               enum amdgpu_interrupt_state state)
5673 {
5674         switch (state) {
5675         case AMDGPU_IRQ_STATE_DISABLE:
5676         case AMDGPU_IRQ_STATE_ENABLE:
5677                 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5678                                PRIV_INSTR_INT_ENABLE,
5679                                state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
5680         default:
5681                 break;
5682         }
5683
5684         return 0;
5685 }
5686
5687 #define ENABLE_ECC_ON_ME_PIPE(me, pipe)                         \
5688         WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\
5689                         CP_ECC_ERROR_INT_ENABLE, 1)
5690
5691 #define DISABLE_ECC_ON_ME_PIPE(me, pipe)                        \
5692         WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\
5693                         CP_ECC_ERROR_INT_ENABLE, 0)
5694
5695 static int gfx_v9_0_set_cp_ecc_error_state(struct amdgpu_device *adev,
5696                                               struct amdgpu_irq_src *source,
5697                                               unsigned type,
5698                                               enum amdgpu_interrupt_state state)
5699 {
5700         switch (state) {
5701         case AMDGPU_IRQ_STATE_DISABLE:
5702                 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5703                                 CP_ECC_ERROR_INT_ENABLE, 0);
5704                 DISABLE_ECC_ON_ME_PIPE(1, 0);
5705                 DISABLE_ECC_ON_ME_PIPE(1, 1);
5706                 DISABLE_ECC_ON_ME_PIPE(1, 2);
5707                 DISABLE_ECC_ON_ME_PIPE(1, 3);
5708                 break;
5709
5710         case AMDGPU_IRQ_STATE_ENABLE:
5711                 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5712                                 CP_ECC_ERROR_INT_ENABLE, 1);
5713                 ENABLE_ECC_ON_ME_PIPE(1, 0);
5714                 ENABLE_ECC_ON_ME_PIPE(1, 1);
5715                 ENABLE_ECC_ON_ME_PIPE(1, 2);
5716                 ENABLE_ECC_ON_ME_PIPE(1, 3);
5717                 break;
5718         default:
5719                 break;
5720         }
5721
5722         return 0;
5723 }
5724
5725
5726 static int gfx_v9_0_set_eop_interrupt_state(struct amdgpu_device *adev,
5727                                             struct amdgpu_irq_src *src,
5728                                             unsigned type,
5729                                             enum amdgpu_interrupt_state state)
5730 {
5731         switch (type) {
5732         case AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP:
5733                 gfx_v9_0_set_gfx_eop_interrupt_state(adev, state);
5734                 break;
5735         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
5736                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
5737                 break;
5738         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
5739                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
5740                 break;
5741         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
5742                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
5743                 break;
5744         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
5745                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
5746                 break;
5747         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
5748                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
5749                 break;
5750         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
5751                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
5752                 break;
5753         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
5754                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
5755                 break;
5756         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
5757                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
5758                 break;
5759         default:
5760                 break;
5761         }
5762         return 0;
5763 }
5764
5765 static int gfx_v9_0_eop_irq(struct amdgpu_device *adev,
5766                             struct amdgpu_irq_src *source,
5767                             struct amdgpu_iv_entry *entry)
5768 {
5769         int i;
5770         u8 me_id, pipe_id, queue_id;
5771         struct amdgpu_ring *ring;
5772
5773         DRM_DEBUG("IH: CP EOP\n");
5774         me_id = (entry->ring_id & 0x0c) >> 2;
5775         pipe_id = (entry->ring_id & 0x03) >> 0;
5776         queue_id = (entry->ring_id & 0x70) >> 4;
5777
5778         switch (me_id) {
5779         case 0:
5780                 amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
5781                 break;
5782         case 1:
5783         case 2:
5784                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5785                         ring = &adev->gfx.compute_ring[i];
5786                         /* Per-queue interrupt is supported for MEC starting from VI.
5787                           * The interrupt can only be enabled/disabled per pipe instead of per queue.
5788                           */
5789                         if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
5790                                 amdgpu_fence_process(ring);
5791                 }
5792                 break;
5793         }
5794         return 0;
5795 }
5796
5797 static void gfx_v9_0_fault(struct amdgpu_device *adev,
5798                            struct amdgpu_iv_entry *entry)
5799 {
5800         u8 me_id, pipe_id, queue_id;
5801         struct amdgpu_ring *ring;
5802         int i;
5803
5804         me_id = (entry->ring_id & 0x0c) >> 2;
5805         pipe_id = (entry->ring_id & 0x03) >> 0;
5806         queue_id = (entry->ring_id & 0x70) >> 4;
5807
5808         switch (me_id) {
5809         case 0:
5810                 drm_sched_fault(&adev->gfx.gfx_ring[0].sched);
5811                 break;
5812         case 1:
5813         case 2:
5814                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5815                         ring = &adev->gfx.compute_ring[i];
5816                         if (ring->me == me_id && ring->pipe == pipe_id &&
5817                             ring->queue == queue_id)
5818                                 drm_sched_fault(&ring->sched);
5819                 }
5820                 break;
5821         }
5822 }
5823
5824 static int gfx_v9_0_priv_reg_irq(struct amdgpu_device *adev,
5825                                  struct amdgpu_irq_src *source,
5826                                  struct amdgpu_iv_entry *entry)
5827 {
5828         DRM_ERROR("Illegal register access in command stream\n");
5829         gfx_v9_0_fault(adev, entry);
5830         return 0;
5831 }
5832
5833 static int gfx_v9_0_priv_inst_irq(struct amdgpu_device *adev,
5834                                   struct amdgpu_irq_src *source,
5835                                   struct amdgpu_iv_entry *entry)
5836 {
5837         DRM_ERROR("Illegal instruction in command stream\n");
5838         gfx_v9_0_fault(adev, entry);
5839         return 0;
5840 }
5841
5842
5843 static const struct soc15_ras_field_entry gfx_v9_0_ras_fields[] = {
5844         { "CPC_SCRATCH", SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT),
5845           SOC15_REG_FIELD(CPC_EDC_SCRATCH_CNT, SEC_COUNT),
5846           SOC15_REG_FIELD(CPC_EDC_SCRATCH_CNT, DED_COUNT)
5847         },
5848         { "CPC_UCODE", SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT),
5849           SOC15_REG_FIELD(CPC_EDC_UCODE_CNT, SEC_COUNT),
5850           SOC15_REG_FIELD(CPC_EDC_UCODE_CNT, DED_COUNT)
5851         },
5852         { "CPF_ROQ_ME1", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT),
5853           SOC15_REG_FIELD(CPF_EDC_ROQ_CNT, COUNT_ME1),
5854           0, 0
5855         },
5856         { "CPF_ROQ_ME2", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT),
5857           SOC15_REG_FIELD(CPF_EDC_ROQ_CNT, COUNT_ME2),
5858           0, 0
5859         },
5860         { "CPF_TAG", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT),
5861           SOC15_REG_FIELD(CPF_EDC_TAG_CNT, SEC_COUNT),
5862           SOC15_REG_FIELD(CPF_EDC_TAG_CNT, DED_COUNT)
5863         },
5864         { "CPG_DMA_ROQ", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT),
5865           SOC15_REG_FIELD(CPG_EDC_DMA_CNT, ROQ_COUNT),
5866           0, 0
5867         },
5868         { "CPG_DMA_TAG", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT),
5869           SOC15_REG_FIELD(CPG_EDC_DMA_CNT, TAG_SEC_COUNT),
5870           SOC15_REG_FIELD(CPG_EDC_DMA_CNT, TAG_DED_COUNT)
5871         },
5872         { "CPG_TAG", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT),
5873           SOC15_REG_FIELD(CPG_EDC_TAG_CNT, SEC_COUNT),
5874           SOC15_REG_FIELD(CPG_EDC_TAG_CNT, DED_COUNT)
5875         },
5876         { "DC_CSINVOC", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT),
5877           SOC15_REG_FIELD(DC_EDC_CSINVOC_CNT, COUNT_ME1),
5878           0, 0
5879         },
5880         { "DC_RESTORE", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT),
5881           SOC15_REG_FIELD(DC_EDC_RESTORE_CNT, COUNT_ME1),
5882           0, 0
5883         },
5884         { "DC_STATE", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT),
5885           SOC15_REG_FIELD(DC_EDC_STATE_CNT, COUNT_ME1),
5886           0, 0
5887         },
5888         { "GDS_MEM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT),
5889           SOC15_REG_FIELD(GDS_EDC_CNT, GDS_MEM_SEC),
5890           SOC15_REG_FIELD(GDS_EDC_CNT, GDS_MEM_DED)
5891         },
5892         { "GDS_INPUT_QUEUE", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT),
5893           SOC15_REG_FIELD(GDS_EDC_CNT, GDS_INPUT_QUEUE_SED),
5894           0, 0
5895         },
5896         { "GDS_ME0_CS_PIPE_MEM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT),
5897           SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, ME0_CS_PIPE_MEM_SEC),
5898           SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, ME0_CS_PIPE_MEM_DED)
5899         },
5900         { "GDS_OA_PHY_PHY_CMD_RAM_MEM",
5901           SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT),
5902           SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_CMD_RAM_MEM_SEC),
5903           SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_CMD_RAM_MEM_DED)
5904         },
5905         { "GDS_OA_PHY_PHY_DATA_RAM_MEM",
5906           SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT),
5907           SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_DATA_RAM_MEM_SED),
5908           0, 0
5909         },
5910         { "GDS_OA_PIPE_ME1_PIPE0_PIPE_MEM",
5911           SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
5912           SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE0_PIPE_MEM_SEC),
5913           SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE0_PIPE_MEM_DED)
5914         },
5915         { "GDS_OA_PIPE_ME1_PIPE1_PIPE_MEM",
5916           SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
5917           SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE1_PIPE_MEM_SEC),
5918           SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE1_PIPE_MEM_DED)
5919         },
5920         { "GDS_OA_PIPE_ME1_PIPE2_PIPE_MEM",
5921           SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
5922           SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE2_PIPE_MEM_SEC),
5923           SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE2_PIPE_MEM_DED)
5924         },
5925         { "GDS_OA_PIPE_ME1_PIPE3_PIPE_MEM",
5926           SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
5927           SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE3_PIPE_MEM_SEC),
5928           SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE3_PIPE_MEM_DED)
5929         },
5930         { "SPI_SR_MEM", SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT),
5931           SOC15_REG_FIELD(SPI_EDC_CNT, SPI_SR_MEM_SED_COUNT),
5932           0, 0
5933         },
5934         { "TA_FS_DFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
5935           SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_DFIFO_SEC_COUNT),
5936           SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_DFIFO_DED_COUNT)
5937         },
5938         { "TA_FS_AFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
5939           SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_AFIFO_SED_COUNT),
5940           0, 0
5941         },
5942         { "TA_FL_LFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
5943           SOC15_REG_FIELD(TA_EDC_CNT, TA_FL_LFIFO_SED_COUNT),
5944           0, 0
5945         },
5946         { "TA_FX_LFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
5947           SOC15_REG_FIELD(TA_EDC_CNT, TA_FX_LFIFO_SED_COUNT),
5948           0, 0
5949         },
5950         { "TA_FS_CFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
5951           SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_CFIFO_SED_COUNT),
5952           0, 0
5953         },
5954         { "TCA_HOLE_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT),
5955           SOC15_REG_FIELD(TCA_EDC_CNT, HOLE_FIFO_SED_COUNT),
5956           0, 0
5957         },
5958         { "TCA_REQ_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT),
5959           SOC15_REG_FIELD(TCA_EDC_CNT, REQ_FIFO_SED_COUNT),
5960           0, 0
5961         },
5962         { "TCC_CACHE_DATA", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5963           SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DATA_SEC_COUNT),
5964           SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DATA_DED_COUNT)
5965         },
5966         { "TCC_CACHE_DIRTY", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5967           SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DIRTY_SEC_COUNT),
5968           SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DIRTY_DED_COUNT)
5969         },
5970         { "TCC_HIGH_RATE_TAG", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5971           SOC15_REG_FIELD(TCC_EDC_CNT, HIGH_RATE_TAG_SEC_COUNT),
5972           SOC15_REG_FIELD(TCC_EDC_CNT, HIGH_RATE_TAG_DED_COUNT)
5973         },
5974         { "TCC_LOW_RATE_TAG", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5975           SOC15_REG_FIELD(TCC_EDC_CNT, LOW_RATE_TAG_SEC_COUNT),
5976           SOC15_REG_FIELD(TCC_EDC_CNT, LOW_RATE_TAG_DED_COUNT)
5977         },
5978         { "TCC_SRC_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5979           SOC15_REG_FIELD(TCC_EDC_CNT, SRC_FIFO_SEC_COUNT),
5980           SOC15_REG_FIELD(TCC_EDC_CNT, SRC_FIFO_DED_COUNT)
5981         },
5982         { "TCC_IN_USE_DEC", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5983           SOC15_REG_FIELD(TCC_EDC_CNT, IN_USE_DEC_SED_COUNT),
5984           0, 0
5985         },
5986         { "TCC_IN_USE_TRANSFER", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5987           SOC15_REG_FIELD(TCC_EDC_CNT, IN_USE_TRANSFER_SED_COUNT),
5988           0, 0
5989         },
5990         { "TCC_LATENCY_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5991           SOC15_REG_FIELD(TCC_EDC_CNT, LATENCY_FIFO_SED_COUNT),
5992           0, 0
5993         },
5994         { "TCC_RETURN_DATA", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5995           SOC15_REG_FIELD(TCC_EDC_CNT, RETURN_DATA_SED_COUNT),
5996           0, 0
5997         },
5998         { "TCC_RETURN_CONTROL", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5999           SOC15_REG_FIELD(TCC_EDC_CNT, RETURN_CONTROL_SED_COUNT),
6000           0, 0
6001         },
6002         { "TCC_UC_ATOMIC_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6003           SOC15_REG_FIELD(TCC_EDC_CNT, UC_ATOMIC_FIFO_SED_COUNT),
6004           0, 0
6005         },
6006         { "TCC_WRITE_RETURN", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6007           SOC15_REG_FIELD(TCC_EDC_CNT2, WRITE_RETURN_SED_COUNT),
6008           0, 0
6009         },
6010         { "TCC_WRITE_CACHE_READ", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6011           SOC15_REG_FIELD(TCC_EDC_CNT2, WRITE_CACHE_READ_SED_COUNT),
6012           0, 0
6013         },
6014         { "TCC_SRC_FIFO_NEXT_RAM", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6015           SOC15_REG_FIELD(TCC_EDC_CNT2, SRC_FIFO_NEXT_RAM_SED_COUNT),
6016           0, 0
6017         },
6018         { "TCC_LATENCY_FIFO_NEXT_RAM", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6019           SOC15_REG_FIELD(TCC_EDC_CNT2, LATENCY_FIFO_NEXT_RAM_SED_COUNT),
6020           0, 0
6021         },
6022         { "TCC_CACHE_TAG_PROBE_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6023           SOC15_REG_FIELD(TCC_EDC_CNT2, CACHE_TAG_PROBE_FIFO_SED_COUNT),
6024           0, 0
6025         },
6026         { "TCC_WRRET_TAG_WRITE_RETURN", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6027           SOC15_REG_FIELD(TCC_EDC_CNT2, WRRET_TAG_WRITE_RETURN_SED_COUNT),
6028           0, 0
6029         },
6030         { "TCC_ATOMIC_RETURN_BUFFER", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6031           SOC15_REG_FIELD(TCC_EDC_CNT2, ATOMIC_RETURN_BUFFER_SED_COUNT),
6032           0, 0
6033         },
6034         { "TCI_WRITE_RAM", SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT),
6035           SOC15_REG_FIELD(TCI_EDC_CNT, WRITE_RAM_SED_COUNT),
6036           0, 0
6037         },
6038         { "TCP_CACHE_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6039           SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CACHE_RAM_SEC_COUNT),
6040           SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CACHE_RAM_DED_COUNT)
6041         },
6042         { "TCP_LFIFO_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6043           SOC15_REG_FIELD(TCP_EDC_CNT_NEW, LFIFO_RAM_SEC_COUNT),
6044           SOC15_REG_FIELD(TCP_EDC_CNT_NEW, LFIFO_RAM_DED_COUNT)
6045         },
6046         { "TCP_CMD_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6047           SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CMD_FIFO_SED_COUNT),
6048           0, 0
6049         },
6050         { "TCP_VM_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6051           SOC15_REG_FIELD(TCP_EDC_CNT_NEW, VM_FIFO_SEC_COUNT),
6052           0, 0
6053         },
6054         { "TCP_DB_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6055           SOC15_REG_FIELD(TCP_EDC_CNT_NEW, DB_RAM_SED_COUNT),
6056           0, 0
6057         },
6058         { "TCP_UTCL1_LFIFO0", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6059           SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO0_SEC_COUNT),
6060           SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO0_DED_COUNT)
6061         },
6062         { "TCP_UTCL1_LFIFO1", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6063           SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO1_SEC_COUNT),
6064           SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO1_DED_COUNT)
6065         },
6066         { "TD_SS_FIFO_LO", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT),
6067           SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_LO_SEC_COUNT),
6068           SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_LO_DED_COUNT)
6069         },
6070         { "TD_SS_FIFO_HI", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT),
6071           SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_HI_SEC_COUNT),
6072           SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_HI_DED_COUNT)
6073         },
6074         { "TD_CS_FIFO", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT),
6075           SOC15_REG_FIELD(TD_EDC_CNT, CS_FIFO_SED_COUNT),
6076           0, 0
6077         },
6078         { "SQ_LDS_D", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6079           SOC15_REG_FIELD(SQ_EDC_CNT, LDS_D_SEC_COUNT),
6080           SOC15_REG_FIELD(SQ_EDC_CNT, LDS_D_DED_COUNT)
6081         },
6082         { "SQ_LDS_I", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6083           SOC15_REG_FIELD(SQ_EDC_CNT, LDS_I_SEC_COUNT),
6084           SOC15_REG_FIELD(SQ_EDC_CNT, LDS_I_DED_COUNT)
6085         },
6086         { "SQ_SGPR", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6087           SOC15_REG_FIELD(SQ_EDC_CNT, SGPR_SEC_COUNT),
6088           SOC15_REG_FIELD(SQ_EDC_CNT, SGPR_DED_COUNT)
6089         },
6090         { "SQ_VGPR0", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6091           SOC15_REG_FIELD(SQ_EDC_CNT, VGPR0_SEC_COUNT),
6092           SOC15_REG_FIELD(SQ_EDC_CNT, VGPR0_DED_COUNT)
6093         },
6094         { "SQ_VGPR1", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6095           SOC15_REG_FIELD(SQ_EDC_CNT, VGPR1_SEC_COUNT),
6096           SOC15_REG_FIELD(SQ_EDC_CNT, VGPR1_DED_COUNT)
6097         },
6098         { "SQ_VGPR2", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6099           SOC15_REG_FIELD(SQ_EDC_CNT, VGPR2_SEC_COUNT),
6100           SOC15_REG_FIELD(SQ_EDC_CNT, VGPR2_DED_COUNT)
6101         },
6102         { "SQ_VGPR3", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6103           SOC15_REG_FIELD(SQ_EDC_CNT, VGPR3_SEC_COUNT),
6104           SOC15_REG_FIELD(SQ_EDC_CNT, VGPR3_DED_COUNT)
6105         },
6106         { "SQC_DATA_CU0_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6107           SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_WRITE_DATA_BUF_SEC_COUNT),
6108           SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_WRITE_DATA_BUF_DED_COUNT)
6109         },
6110         { "SQC_DATA_CU0_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6111           SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_UTCL1_LFIFO_SEC_COUNT),
6112           SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_UTCL1_LFIFO_DED_COUNT)
6113         },
6114         { "SQC_DATA_CU1_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6115           SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_WRITE_DATA_BUF_SEC_COUNT),
6116           SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_WRITE_DATA_BUF_DED_COUNT)
6117         },
6118         { "SQC_DATA_CU1_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6119           SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_UTCL1_LFIFO_SEC_COUNT),
6120           SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_UTCL1_LFIFO_DED_COUNT)
6121         },
6122         { "SQC_DATA_CU2_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6123           SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_WRITE_DATA_BUF_SEC_COUNT),
6124           SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_WRITE_DATA_BUF_DED_COUNT)
6125         },
6126         { "SQC_DATA_CU2_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6127           SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_UTCL1_LFIFO_SEC_COUNT),
6128           SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_UTCL1_LFIFO_DED_COUNT)
6129         },
6130         { "SQC_INST_BANKA_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6131           SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_TAG_RAM_SEC_COUNT),
6132           SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_TAG_RAM_DED_COUNT)
6133         },
6134         { "SQC_INST_BANKA_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6135           SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_BANK_RAM_SEC_COUNT),
6136           SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_BANK_RAM_DED_COUNT)
6137         },
6138         { "SQC_DATA_BANKA_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6139           SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_TAG_RAM_SEC_COUNT),
6140           SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_TAG_RAM_DED_COUNT)
6141         },
6142         { "SQC_DATA_BANKA_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6143           SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_BANK_RAM_SEC_COUNT),
6144           SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_BANK_RAM_DED_COUNT)
6145         },
6146         { "SQC_INST_BANKA_UTCL1_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6147           SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_UTCL1_MISS_FIFO_SED_COUNT),
6148           0, 0
6149         },
6150         { "SQC_INST_BANKA_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6151           SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_MISS_FIFO_SED_COUNT),
6152           0, 0
6153         },
6154         { "SQC_DATA_BANKA_HIT_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6155           SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_HIT_FIFO_SED_COUNT),
6156           0, 0
6157         },
6158         { "SQC_DATA_BANKA_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6159           SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_MISS_FIFO_SED_COUNT),
6160           0, 0
6161         },
6162         { "SQC_DATA_BANKA_DIRTY_BIT_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6163           SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_DIRTY_BIT_RAM_SED_COUNT),
6164           0, 0
6165         },
6166         { "SQC_INST_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6167           SOC15_REG_FIELD(SQC_EDC_CNT2, INST_UTCL1_LFIFO_SEC_COUNT),
6168           SOC15_REG_FIELD(SQC_EDC_CNT2, INST_UTCL1_LFIFO_DED_COUNT)
6169         },
6170         { "SQC_INST_BANKB_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6171           SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_TAG_RAM_SEC_COUNT),
6172           SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_TAG_RAM_DED_COUNT)
6173         },
6174         { "SQC_INST_BANKB_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6175           SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_BANK_RAM_SEC_COUNT),
6176           SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_BANK_RAM_DED_COUNT)
6177         },
6178         { "SQC_DATA_BANKB_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6179           SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_TAG_RAM_SEC_COUNT),
6180           SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_TAG_RAM_DED_COUNT)
6181         },
6182         { "SQC_DATA_BANKB_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6183           SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_BANK_RAM_SEC_COUNT),
6184           SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_BANK_RAM_DED_COUNT)
6185         },
6186         { "SQC_INST_BANKB_UTCL1_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6187           SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_UTCL1_MISS_FIFO_SED_COUNT),
6188           0, 0
6189         },
6190         { "SQC_INST_BANKB_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6191           SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_MISS_FIFO_SED_COUNT),
6192           0, 0
6193         },
6194         { "SQC_DATA_BANKB_HIT_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6195           SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_HIT_FIFO_SED_COUNT),
6196           0, 0
6197         },
6198         { "SQC_DATA_BANKB_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6199           SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_MISS_FIFO_SED_COUNT),
6200           0, 0
6201         },
6202         { "SQC_DATA_BANKB_DIRTY_BIT_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6203           SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_DIRTY_BIT_RAM_SED_COUNT),
6204           0, 0
6205         },
6206         { "EA_DRAMRD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6207           SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_CMDMEM_SEC_COUNT),
6208           SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_CMDMEM_DED_COUNT)
6209         },
6210         { "EA_DRAMWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6211           SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_CMDMEM_SEC_COUNT),
6212           SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_CMDMEM_DED_COUNT)
6213         },
6214         { "EA_DRAMWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6215           SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_DATAMEM_SEC_COUNT),
6216           SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_DATAMEM_DED_COUNT)
6217         },
6218         { "EA_RRET_TAGMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6219           SOC15_REG_FIELD(GCEA_EDC_CNT, RRET_TAGMEM_SEC_COUNT),
6220           SOC15_REG_FIELD(GCEA_EDC_CNT, RRET_TAGMEM_DED_COUNT)
6221         },
6222         { "EA_WRET_TAGMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6223           SOC15_REG_FIELD(GCEA_EDC_CNT, WRET_TAGMEM_SEC_COUNT),
6224           SOC15_REG_FIELD(GCEA_EDC_CNT, WRET_TAGMEM_DED_COUNT)
6225         },
6226         { "EA_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6227           SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_PAGEMEM_SED_COUNT),
6228           0, 0
6229         },
6230         { "EA_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6231           SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_PAGEMEM_SED_COUNT),
6232           0, 0
6233         },
6234         { "EA_IORD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6235           SOC15_REG_FIELD(GCEA_EDC_CNT, IORD_CMDMEM_SED_COUNT),
6236           0, 0
6237         },
6238         { "EA_IOWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6239           SOC15_REG_FIELD(GCEA_EDC_CNT, IOWR_CMDMEM_SED_COUNT),
6240           0, 0
6241         },
6242         { "EA_IOWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6243           SOC15_REG_FIELD(GCEA_EDC_CNT, IOWR_DATAMEM_SED_COUNT),
6244           0, 0
6245         },
6246         { "GMIRD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6247           SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_CMDMEM_SEC_COUNT),
6248           SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_CMDMEM_DED_COUNT)
6249         },
6250         { "GMIWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6251           SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_CMDMEM_SEC_COUNT),
6252           SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_CMDMEM_DED_COUNT)
6253         },
6254         { "GMIWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6255           SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_DATAMEM_SEC_COUNT),
6256           SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_DATAMEM_DED_COUNT)
6257         },
6258         { "GMIRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6259           SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_PAGEMEM_SED_COUNT),
6260           0, 0
6261         },
6262         { "GMIWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6263           SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_PAGEMEM_SED_COUNT),
6264           0, 0
6265         },
6266         { "MAM_D0MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6267           SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D0MEM_SED_COUNT),
6268           0, 0
6269         },
6270         { "MAM_D1MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6271           SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D1MEM_SED_COUNT),
6272           0, 0
6273         },
6274         { "MAM_D2MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6275           SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D2MEM_SED_COUNT),
6276           0, 0
6277         },
6278         { "MAM_D3MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6279           SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D3MEM_SED_COUNT),
6280           0, 0
6281         }
6282 };
6283
6284 static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev,
6285                                      void *inject_if)
6286 {
6287         struct ras_inject_if *info = (struct ras_inject_if *)inject_if;
6288         int ret;
6289         struct ta_ras_trigger_error_input block_info = { 0 };
6290
6291         if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
6292                 return -EINVAL;
6293
6294         if (info->head.sub_block_index >= ARRAY_SIZE(ras_gfx_subblocks))
6295                 return -EINVAL;
6296
6297         if (!ras_gfx_subblocks[info->head.sub_block_index].name)
6298                 return -EPERM;
6299
6300         if (!(ras_gfx_subblocks[info->head.sub_block_index].hw_supported_error_type &
6301               info->head.type)) {
6302                 DRM_ERROR("GFX Subblock %s, hardware do not support type 0x%x\n",
6303                         ras_gfx_subblocks[info->head.sub_block_index].name,
6304                         info->head.type);
6305                 return -EPERM;
6306         }
6307
6308         if (!(ras_gfx_subblocks[info->head.sub_block_index].sw_supported_error_type &
6309               info->head.type)) {
6310                 DRM_ERROR("GFX Subblock %s, driver do not support type 0x%x\n",
6311                         ras_gfx_subblocks[info->head.sub_block_index].name,
6312                         info->head.type);
6313                 return -EPERM;
6314         }
6315
6316         block_info.block_id = amdgpu_ras_block_to_ta(info->head.block);
6317         block_info.sub_block_index =
6318                 ras_gfx_subblocks[info->head.sub_block_index].ta_subblock;
6319         block_info.inject_error_type = amdgpu_ras_error_to_ta(info->head.type);
6320         block_info.address = info->address;
6321         block_info.value = info->value;
6322
6323         mutex_lock(&adev->grbm_idx_mutex);
6324         ret = psp_ras_trigger_error(&adev->psp, &block_info);
6325         mutex_unlock(&adev->grbm_idx_mutex);
6326
6327         return ret;
6328 }
6329
6330 static const char *vml2_mems[] = {
6331         "UTC_VML2_BANK_CACHE_0_BIGK_MEM0",
6332         "UTC_VML2_BANK_CACHE_0_BIGK_MEM1",
6333         "UTC_VML2_BANK_CACHE_0_4K_MEM0",
6334         "UTC_VML2_BANK_CACHE_0_4K_MEM1",
6335         "UTC_VML2_BANK_CACHE_1_BIGK_MEM0",
6336         "UTC_VML2_BANK_CACHE_1_BIGK_MEM1",
6337         "UTC_VML2_BANK_CACHE_1_4K_MEM0",
6338         "UTC_VML2_BANK_CACHE_1_4K_MEM1",
6339         "UTC_VML2_BANK_CACHE_2_BIGK_MEM0",
6340         "UTC_VML2_BANK_CACHE_2_BIGK_MEM1",
6341         "UTC_VML2_BANK_CACHE_2_4K_MEM0",
6342         "UTC_VML2_BANK_CACHE_2_4K_MEM1",
6343         "UTC_VML2_BANK_CACHE_3_BIGK_MEM0",
6344         "UTC_VML2_BANK_CACHE_3_BIGK_MEM1",
6345         "UTC_VML2_BANK_CACHE_3_4K_MEM0",
6346         "UTC_VML2_BANK_CACHE_3_4K_MEM1",
6347 };
6348
6349 static const char *vml2_walker_mems[] = {
6350         "UTC_VML2_CACHE_PDE0_MEM0",
6351         "UTC_VML2_CACHE_PDE0_MEM1",
6352         "UTC_VML2_CACHE_PDE1_MEM0",
6353         "UTC_VML2_CACHE_PDE1_MEM1",
6354         "UTC_VML2_CACHE_PDE2_MEM0",
6355         "UTC_VML2_CACHE_PDE2_MEM1",
6356         "UTC_VML2_RDIF_LOG_FIFO",
6357 };
6358
6359 static const char *atc_l2_cache_2m_mems[] = {
6360         "UTC_ATCL2_CACHE_2M_BANK0_WAY0_MEM",
6361         "UTC_ATCL2_CACHE_2M_BANK0_WAY1_MEM",
6362         "UTC_ATCL2_CACHE_2M_BANK1_WAY0_MEM",
6363         "UTC_ATCL2_CACHE_2M_BANK1_WAY1_MEM",
6364 };
6365
6366 static const char *atc_l2_cache_4k_mems[] = {
6367         "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM0",
6368         "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM1",
6369         "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM2",
6370         "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM3",
6371         "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM4",
6372         "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM5",
6373         "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM6",
6374         "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM7",
6375         "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM0",
6376         "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM1",
6377         "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM2",
6378         "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM3",
6379         "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM4",
6380         "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM5",
6381         "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM6",
6382         "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM7",
6383         "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM0",
6384         "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM1",
6385         "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM2",
6386         "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM3",
6387         "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM4",
6388         "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM5",
6389         "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM6",
6390         "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM7",
6391         "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM0",
6392         "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM1",
6393         "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM2",
6394         "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM3",
6395         "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM4",
6396         "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM5",
6397         "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM6",
6398         "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM7",
6399 };
6400
6401 static int gfx_v9_0_query_utc_edc_status(struct amdgpu_device *adev,
6402                                          struct ras_err_data *err_data)
6403 {
6404         uint32_t i, data;
6405         uint32_t sec_count, ded_count;
6406
6407         WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
6408         WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT, 0);
6409         WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
6410         WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT, 0);
6411         WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
6412         WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT, 0);
6413         WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
6414         WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT, 0);
6415
6416         for (i = 0; i < ARRAY_SIZE(vml2_mems); i++) {
6417                 WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, i);
6418                 data = RREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT);
6419
6420                 sec_count = REG_GET_FIELD(data, VM_L2_MEM_ECC_CNT, SEC_COUNT);
6421                 if (sec_count) {
6422                         dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6423                                 "SEC %d\n", i, vml2_mems[i], sec_count);
6424                         err_data->ce_count += sec_count;
6425                 }
6426
6427                 ded_count = REG_GET_FIELD(data, VM_L2_MEM_ECC_CNT, DED_COUNT);
6428                 if (ded_count) {
6429                         dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6430                                 "DED %d\n", i, vml2_mems[i], ded_count);
6431                         err_data->ue_count += ded_count;
6432                 }
6433         }
6434
6435         for (i = 0; i < ARRAY_SIZE(vml2_walker_mems); i++) {
6436                 WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, i);
6437                 data = RREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT);
6438
6439                 sec_count = REG_GET_FIELD(data, VM_L2_WALKER_MEM_ECC_CNT,
6440                                                 SEC_COUNT);
6441                 if (sec_count) {
6442                         dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6443                                 "SEC %d\n", i, vml2_walker_mems[i], sec_count);
6444                         err_data->ce_count += sec_count;
6445                 }
6446
6447                 ded_count = REG_GET_FIELD(data, VM_L2_WALKER_MEM_ECC_CNT,
6448                                                 DED_COUNT);
6449                 if (ded_count) {
6450                         dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6451                                 "DED %d\n", i, vml2_walker_mems[i], ded_count);
6452                         err_data->ue_count += ded_count;
6453                 }
6454         }
6455
6456         for (i = 0; i < ARRAY_SIZE(atc_l2_cache_2m_mems); i++) {
6457                 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, i);
6458                 data = RREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT);
6459
6460                 sec_count = (data & 0x00006000L) >> 0xd;
6461                 if (sec_count) {
6462                         dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6463                                 "SEC %d\n", i, atc_l2_cache_2m_mems[i],
6464                                 sec_count);
6465                         err_data->ce_count += sec_count;
6466                 }
6467         }
6468
6469         for (i = 0; i < ARRAY_SIZE(atc_l2_cache_4k_mems); i++) {
6470                 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, i);
6471                 data = RREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT);
6472
6473                 sec_count = (data & 0x00006000L) >> 0xd;
6474                 if (sec_count) {
6475                         dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6476                                 "SEC %d\n", i, atc_l2_cache_4k_mems[i],
6477                                 sec_count);
6478                         err_data->ce_count += sec_count;
6479                 }
6480
6481                 ded_count = (data & 0x00018000L) >> 0xf;
6482                 if (ded_count) {
6483                         dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6484                                 "DED %d\n", i, atc_l2_cache_4k_mems[i],
6485                                 ded_count);
6486                         err_data->ue_count += ded_count;
6487                 }
6488         }
6489
6490         WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
6491         WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
6492         WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
6493         WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
6494
6495         return 0;
6496 }
6497
6498 static int gfx_v9_0_ras_error_count(struct amdgpu_device *adev,
6499         const struct soc15_reg_entry *reg,
6500         uint32_t se_id, uint32_t inst_id, uint32_t value,
6501         uint32_t *sec_count, uint32_t *ded_count)
6502 {
6503         uint32_t i;
6504         uint32_t sec_cnt, ded_cnt;
6505
6506         for (i = 0; i < ARRAY_SIZE(gfx_v9_0_ras_fields); i++) {
6507                 if(gfx_v9_0_ras_fields[i].reg_offset != reg->reg_offset ||
6508                         gfx_v9_0_ras_fields[i].seg != reg->seg ||
6509                         gfx_v9_0_ras_fields[i].inst != reg->inst)
6510                         continue;
6511
6512                 sec_cnt = (value &
6513                                 gfx_v9_0_ras_fields[i].sec_count_mask) >>
6514                                 gfx_v9_0_ras_fields[i].sec_count_shift;
6515                 if (sec_cnt) {
6516                         dev_info(adev->dev, "GFX SubBlock %s, "
6517                                 "Instance[%d][%d], SEC %d\n",
6518                                 gfx_v9_0_ras_fields[i].name,
6519                                 se_id, inst_id,
6520                                 sec_cnt);
6521                         *sec_count += sec_cnt;
6522                 }
6523
6524                 ded_cnt = (value &
6525                                 gfx_v9_0_ras_fields[i].ded_count_mask) >>
6526                                 gfx_v9_0_ras_fields[i].ded_count_shift;
6527                 if (ded_cnt) {
6528                         dev_info(adev->dev, "GFX SubBlock %s, "
6529                                 "Instance[%d][%d], DED %d\n",
6530                                 gfx_v9_0_ras_fields[i].name,
6531                                 se_id, inst_id,
6532                                 ded_cnt);
6533                         *ded_count += ded_cnt;
6534                 }
6535         }
6536
6537         return 0;
6538 }
6539
6540 static void gfx_v9_0_reset_ras_error_count(struct amdgpu_device *adev)
6541 {
6542         int i, j, k;
6543
6544         if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
6545                 return;
6546
6547         /* read back registers to clear the counters */
6548         mutex_lock(&adev->grbm_idx_mutex);
6549         for (i = 0; i < ARRAY_SIZE(gfx_v9_0_edc_counter_regs); i++) {
6550                 for (j = 0; j < gfx_v9_0_edc_counter_regs[i].se_num; j++) {
6551                         for (k = 0; k < gfx_v9_0_edc_counter_regs[i].instance; k++) {
6552                                 gfx_v9_0_select_se_sh(adev, j, 0x0, k);
6553                                 RREG32(SOC15_REG_ENTRY_OFFSET(gfx_v9_0_edc_counter_regs[i]));
6554                         }
6555                 }
6556         }
6557         WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, 0xe0000000);
6558         mutex_unlock(&adev->grbm_idx_mutex);
6559
6560         WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
6561         WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT, 0);
6562         WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
6563         WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT, 0);
6564         WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
6565         WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT, 0);
6566         WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
6567         WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT, 0);
6568
6569         for (i = 0; i < ARRAY_SIZE(vml2_mems); i++) {
6570                 WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, i);
6571                 RREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT);
6572         }
6573
6574         for (i = 0; i < ARRAY_SIZE(vml2_walker_mems); i++) {
6575                 WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, i);
6576                 RREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT);
6577         }
6578
6579         for (i = 0; i < ARRAY_SIZE(atc_l2_cache_2m_mems); i++) {
6580                 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, i);
6581                 RREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT);
6582         }
6583
6584         for (i = 0; i < ARRAY_SIZE(atc_l2_cache_4k_mems); i++) {
6585                 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, i);
6586                 RREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT);
6587         }
6588
6589         WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
6590         WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
6591         WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
6592         WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
6593 }
6594
6595 static int gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev,
6596                                           void *ras_error_status)
6597 {
6598         struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
6599         uint32_t sec_count = 0, ded_count = 0;
6600         uint32_t i, j, k;
6601         uint32_t reg_value;
6602
6603         if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
6604                 return -EINVAL;
6605
6606         err_data->ue_count = 0;
6607         err_data->ce_count = 0;
6608
6609         mutex_lock(&adev->grbm_idx_mutex);
6610
6611         for (i = 0; i < ARRAY_SIZE(gfx_v9_0_edc_counter_regs); i++) {
6612                 for (j = 0; j < gfx_v9_0_edc_counter_regs[i].se_num; j++) {
6613                         for (k = 0; k < gfx_v9_0_edc_counter_regs[i].instance; k++) {
6614                                 gfx_v9_0_select_se_sh(adev, j, 0, k);
6615                                 reg_value =
6616                                         RREG32(SOC15_REG_ENTRY_OFFSET(gfx_v9_0_edc_counter_regs[i]));
6617                                 if (reg_value)
6618                                         gfx_v9_0_ras_error_count(adev,
6619                                                 &gfx_v9_0_edc_counter_regs[i],
6620                                                 j, k, reg_value,
6621                                                 &sec_count, &ded_count);
6622                         }
6623                 }
6624         }
6625
6626         err_data->ce_count += sec_count;
6627         err_data->ue_count += ded_count;
6628
6629         gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
6630         mutex_unlock(&adev->grbm_idx_mutex);
6631
6632         gfx_v9_0_query_utc_edc_status(adev, err_data);
6633
6634         return 0;
6635 }
6636
6637 static void gfx_v9_0_emit_mem_sync(struct amdgpu_ring *ring)
6638 {
6639         const unsigned int cp_coher_cntl =
6640                         PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_SH_ICACHE_ACTION_ENA(1) |
6641                         PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_SH_KCACHE_ACTION_ENA(1) |
6642                         PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TC_ACTION_ENA(1) |
6643                         PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TCL1_ACTION_ENA(1) |
6644                         PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TC_WB_ACTION_ENA(1);
6645
6646         /* ACQUIRE_MEM -make one or more surfaces valid for use by the subsequent operations */
6647         amdgpu_ring_write(ring, PACKET3(PACKET3_ACQUIRE_MEM, 5));
6648         amdgpu_ring_write(ring, cp_coher_cntl); /* CP_COHER_CNTL */
6649         amdgpu_ring_write(ring, 0xffffffff);  /* CP_COHER_SIZE */
6650         amdgpu_ring_write(ring, 0xffffff);  /* CP_COHER_SIZE_HI */
6651         amdgpu_ring_write(ring, 0); /* CP_COHER_BASE */
6652         amdgpu_ring_write(ring, 0);  /* CP_COHER_BASE_HI */
6653         amdgpu_ring_write(ring, 0x0000000A); /* POLL_INTERVAL */
6654 }
6655
6656 static const struct amd_ip_funcs gfx_v9_0_ip_funcs = {
6657         .name = "gfx_v9_0",
6658         .early_init = gfx_v9_0_early_init,
6659         .late_init = gfx_v9_0_late_init,
6660         .sw_init = gfx_v9_0_sw_init,
6661         .sw_fini = gfx_v9_0_sw_fini,
6662         .hw_init = gfx_v9_0_hw_init,
6663         .hw_fini = gfx_v9_0_hw_fini,
6664         .suspend = gfx_v9_0_suspend,
6665         .resume = gfx_v9_0_resume,
6666         .is_idle = gfx_v9_0_is_idle,
6667         .wait_for_idle = gfx_v9_0_wait_for_idle,
6668         .soft_reset = gfx_v9_0_soft_reset,
6669         .set_clockgating_state = gfx_v9_0_set_clockgating_state,
6670         .set_powergating_state = gfx_v9_0_set_powergating_state,
6671         .get_clockgating_state = gfx_v9_0_get_clockgating_state,
6672 };
6673
6674 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = {
6675         .type = AMDGPU_RING_TYPE_GFX,
6676         .align_mask = 0xff,
6677         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6678         .support_64bit_ptrs = true,
6679         .vmhub = AMDGPU_GFXHUB_0,
6680         .get_rptr = gfx_v9_0_ring_get_rptr_gfx,
6681         .get_wptr = gfx_v9_0_ring_get_wptr_gfx,
6682         .set_wptr = gfx_v9_0_ring_set_wptr_gfx,
6683         .emit_frame_size = /* totally 242 maximum if 16 IBs */
6684                 5 +  /* COND_EXEC */
6685                 7 +  /* PIPELINE_SYNC */
6686                 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
6687                 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
6688                 2 + /* VM_FLUSH */
6689                 8 +  /* FENCE for VM_FLUSH */
6690                 20 + /* GDS switch */
6691                 4 + /* double SWITCH_BUFFER,
6692                        the first COND_EXEC jump to the place just
6693                            prior to this double SWITCH_BUFFER  */
6694                 5 + /* COND_EXEC */
6695                 7 +      /*     HDP_flush */
6696                 4 +      /*     VGT_flush */
6697                 14 + /* CE_META */
6698                 31 + /* DE_META */
6699                 3 + /* CNTX_CTRL */
6700                 5 + /* HDP_INVL */
6701                 8 + 8 + /* FENCE x2 */
6702                 2 + /* SWITCH_BUFFER */
6703                 7, /* gfx_v9_0_emit_mem_sync */
6704         .emit_ib_size = 4, /* gfx_v9_0_ring_emit_ib_gfx */
6705         .emit_ib = gfx_v9_0_ring_emit_ib_gfx,
6706         .emit_fence = gfx_v9_0_ring_emit_fence,
6707         .emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
6708         .emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
6709         .emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
6710         .emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
6711         .test_ring = gfx_v9_0_ring_test_ring,
6712         .test_ib = gfx_v9_0_ring_test_ib,
6713         .insert_nop = amdgpu_ring_insert_nop,
6714         .pad_ib = amdgpu_ring_generic_pad_ib,
6715         .emit_switch_buffer = gfx_v9_ring_emit_sb,
6716         .emit_cntxcntl = gfx_v9_ring_emit_cntxcntl,
6717         .init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec,
6718         .patch_cond_exec = gfx_v9_0_ring_emit_patch_cond_exec,
6719         .emit_frame_cntl = gfx_v9_0_ring_emit_frame_cntl,
6720         .emit_wreg = gfx_v9_0_ring_emit_wreg,
6721         .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
6722         .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
6723         .soft_recovery = gfx_v9_0_ring_soft_recovery,
6724         .emit_mem_sync = gfx_v9_0_emit_mem_sync,
6725 };
6726
6727 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = {
6728         .type = AMDGPU_RING_TYPE_COMPUTE,
6729         .align_mask = 0xff,
6730         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6731         .support_64bit_ptrs = true,
6732         .vmhub = AMDGPU_GFXHUB_0,
6733         .get_rptr = gfx_v9_0_ring_get_rptr_compute,
6734         .get_wptr = gfx_v9_0_ring_get_wptr_compute,
6735         .set_wptr = gfx_v9_0_ring_set_wptr_compute,
6736         .emit_frame_size =
6737                 20 + /* gfx_v9_0_ring_emit_gds_switch */
6738                 7 + /* gfx_v9_0_ring_emit_hdp_flush */
6739                 5 + /* hdp invalidate */
6740                 7 + /* gfx_v9_0_ring_emit_pipeline_sync */
6741                 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
6742                 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
6743                 2 + /* gfx_v9_0_ring_emit_vm_flush */
6744                 8 + 8 + 8 + /* gfx_v9_0_ring_emit_fence x3 for user fence, vm fence */
6745                 7, /* gfx_v9_0_emit_mem_sync */
6746         .emit_ib_size = 7, /* gfx_v9_0_ring_emit_ib_compute */
6747         .emit_ib = gfx_v9_0_ring_emit_ib_compute,
6748         .emit_fence = gfx_v9_0_ring_emit_fence,
6749         .emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
6750         .emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
6751         .emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
6752         .emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
6753         .test_ring = gfx_v9_0_ring_test_ring,
6754         .test_ib = gfx_v9_0_ring_test_ib,
6755         .insert_nop = amdgpu_ring_insert_nop,
6756         .pad_ib = amdgpu_ring_generic_pad_ib,
6757         .emit_wreg = gfx_v9_0_ring_emit_wreg,
6758         .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
6759         .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
6760         .emit_mem_sync = gfx_v9_0_emit_mem_sync,
6761 };
6762
6763 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = {
6764         .type = AMDGPU_RING_TYPE_KIQ,
6765         .align_mask = 0xff,
6766         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6767         .support_64bit_ptrs = true,
6768         .vmhub = AMDGPU_GFXHUB_0,
6769         .get_rptr = gfx_v9_0_ring_get_rptr_compute,
6770         .get_wptr = gfx_v9_0_ring_get_wptr_compute,
6771         .set_wptr = gfx_v9_0_ring_set_wptr_compute,
6772         .emit_frame_size =
6773                 20 + /* gfx_v9_0_ring_emit_gds_switch */
6774                 7 + /* gfx_v9_0_ring_emit_hdp_flush */
6775                 5 + /* hdp invalidate */
6776                 7 + /* gfx_v9_0_ring_emit_pipeline_sync */
6777                 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
6778                 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
6779                 2 + /* gfx_v9_0_ring_emit_vm_flush */
6780                 8 + 8 + 8, /* gfx_v9_0_ring_emit_fence_kiq x3 for user fence, vm fence */
6781         .emit_ib_size = 7, /* gfx_v9_0_ring_emit_ib_compute */
6782         .emit_fence = gfx_v9_0_ring_emit_fence_kiq,
6783         .test_ring = gfx_v9_0_ring_test_ring,
6784         .insert_nop = amdgpu_ring_insert_nop,
6785         .pad_ib = amdgpu_ring_generic_pad_ib,
6786         .emit_rreg = gfx_v9_0_ring_emit_rreg,
6787         .emit_wreg = gfx_v9_0_ring_emit_wreg,
6788         .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
6789         .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
6790 };
6791
6792 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev)
6793 {
6794         int i;
6795
6796         adev->gfx.kiq.ring.funcs = &gfx_v9_0_ring_funcs_kiq;
6797
6798         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
6799                 adev->gfx.gfx_ring[i].funcs = &gfx_v9_0_ring_funcs_gfx;
6800
6801         for (i = 0; i < adev->gfx.num_compute_rings; i++)
6802                 adev->gfx.compute_ring[i].funcs = &gfx_v9_0_ring_funcs_compute;
6803 }
6804
6805 static const struct amdgpu_irq_src_funcs gfx_v9_0_eop_irq_funcs = {
6806         .set = gfx_v9_0_set_eop_interrupt_state,
6807         .process = gfx_v9_0_eop_irq,
6808 };
6809
6810 static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_reg_irq_funcs = {
6811         .set = gfx_v9_0_set_priv_reg_fault_state,
6812         .process = gfx_v9_0_priv_reg_irq,
6813 };
6814
6815 static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_inst_irq_funcs = {
6816         .set = gfx_v9_0_set_priv_inst_fault_state,
6817         .process = gfx_v9_0_priv_inst_irq,
6818 };
6819
6820 static const struct amdgpu_irq_src_funcs gfx_v9_0_cp_ecc_error_irq_funcs = {
6821         .set = gfx_v9_0_set_cp_ecc_error_state,
6822         .process = amdgpu_gfx_cp_ecc_error_irq,
6823 };
6824
6825
6826 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev)
6827 {
6828         adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
6829         adev->gfx.eop_irq.funcs = &gfx_v9_0_eop_irq_funcs;
6830
6831         adev->gfx.priv_reg_irq.num_types = 1;
6832         adev->gfx.priv_reg_irq.funcs = &gfx_v9_0_priv_reg_irq_funcs;
6833
6834         adev->gfx.priv_inst_irq.num_types = 1;
6835         adev->gfx.priv_inst_irq.funcs = &gfx_v9_0_priv_inst_irq_funcs;
6836
6837         adev->gfx.cp_ecc_error_irq.num_types = 2; /*C5 ECC error and C9 FUE error*/
6838         adev->gfx.cp_ecc_error_irq.funcs = &gfx_v9_0_cp_ecc_error_irq_funcs;
6839 }
6840
6841 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev)
6842 {
6843         switch (adev->asic_type) {
6844         case CHIP_VEGA10:
6845         case CHIP_VEGA12:
6846         case CHIP_VEGA20:
6847         case CHIP_RAVEN:
6848         case CHIP_ARCTURUS:
6849         case CHIP_RENOIR:
6850                 adev->gfx.rlc.funcs = &gfx_v9_0_rlc_funcs;
6851                 break;
6852         default:
6853                 break;
6854         }
6855 }
6856
6857 static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev)
6858 {
6859         /* init asci gds info */
6860         switch (adev->asic_type) {
6861         case CHIP_VEGA10:
6862         case CHIP_VEGA12:
6863         case CHIP_VEGA20:
6864                 adev->gds.gds_size = 0x10000;
6865                 break;
6866         case CHIP_RAVEN:
6867         case CHIP_ARCTURUS:
6868                 adev->gds.gds_size = 0x1000;
6869                 break;
6870         default:
6871                 adev->gds.gds_size = 0x10000;
6872                 break;
6873         }
6874
6875         switch (adev->asic_type) {
6876         case CHIP_VEGA10:
6877         case CHIP_VEGA20:
6878                 adev->gds.gds_compute_max_wave_id = 0x7ff;
6879                 break;
6880         case CHIP_VEGA12:
6881                 adev->gds.gds_compute_max_wave_id = 0x27f;
6882                 break;
6883         case CHIP_RAVEN:
6884                 if (adev->apu_flags & AMD_APU_IS_RAVEN2)
6885                         adev->gds.gds_compute_max_wave_id = 0x77; /* raven2 */
6886                 else
6887                         adev->gds.gds_compute_max_wave_id = 0x15f; /* raven1 */
6888                 break;
6889         case CHIP_ARCTURUS:
6890                 adev->gds.gds_compute_max_wave_id = 0xfff;
6891                 break;
6892         default:
6893                 /* this really depends on the chip */
6894                 adev->gds.gds_compute_max_wave_id = 0x7ff;
6895                 break;
6896         }
6897
6898         adev->gds.gws_size = 64;
6899         adev->gds.oa_size = 16;
6900 }
6901
6902 static void gfx_v9_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
6903                                                  u32 bitmap)
6904 {
6905         u32 data;
6906
6907         if (!bitmap)
6908                 return;
6909
6910         data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
6911         data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
6912
6913         WREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG, data);
6914 }
6915
6916 static u32 gfx_v9_0_get_cu_active_bitmap(struct amdgpu_device *adev)
6917 {
6918         u32 data, mask;
6919
6920         data = RREG32_SOC15(GC, 0, mmCC_GC_SHADER_ARRAY_CONFIG);
6921         data |= RREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG);
6922
6923         data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
6924         data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
6925
6926         mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh);
6927
6928         return (~data) & mask;
6929 }
6930
6931 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
6932                                  struct amdgpu_cu_info *cu_info)
6933 {
6934         int i, j, k, counter, active_cu_number = 0;
6935         u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
6936         unsigned disable_masks[4 * 4];
6937
6938         if (!adev || !cu_info)
6939                 return -EINVAL;
6940
6941         /*
6942          * 16 comes from bitmap array size 4*4, and it can cover all gfx9 ASICs
6943          */
6944         if (adev->gfx.config.max_shader_engines *
6945                 adev->gfx.config.max_sh_per_se > 16)
6946                 return -EINVAL;
6947
6948         amdgpu_gfx_parse_disable_cu(disable_masks,
6949                                     adev->gfx.config.max_shader_engines,
6950                                     adev->gfx.config.max_sh_per_se);
6951
6952         mutex_lock(&adev->grbm_idx_mutex);
6953         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
6954                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
6955                         mask = 1;
6956                         ao_bitmap = 0;
6957                         counter = 0;
6958                         gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
6959                         gfx_v9_0_set_user_cu_inactive_bitmap(
6960                                 adev, disable_masks[i * adev->gfx.config.max_sh_per_se + j]);
6961                         bitmap = gfx_v9_0_get_cu_active_bitmap(adev);
6962
6963                         /*
6964                          * The bitmap(and ao_cu_bitmap) in cu_info structure is
6965                          * 4x4 size array, and it's usually suitable for Vega
6966                          * ASICs which has 4*2 SE/SH layout.
6967                          * But for Arcturus, SE/SH layout is changed to 8*1.
6968                          * To mostly reduce the impact, we make it compatible
6969                          * with current bitmap array as below:
6970                          *    SE4,SH0 --> bitmap[0][1]
6971                          *    SE5,SH0 --> bitmap[1][1]
6972                          *    SE6,SH0 --> bitmap[2][1]
6973                          *    SE7,SH0 --> bitmap[3][1]
6974                          */
6975                         cu_info->bitmap[i % 4][j + i / 4] = bitmap;
6976
6977                         for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
6978                                 if (bitmap & mask) {
6979                                         if (counter < adev->gfx.config.max_cu_per_sh)
6980                                                 ao_bitmap |= mask;
6981                                         counter ++;
6982                                 }
6983                                 mask <<= 1;
6984                         }
6985                         active_cu_number += counter;
6986                         if (i < 2 && j < 2)
6987                                 ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
6988                         cu_info->ao_cu_bitmap[i % 4][j + i / 4] = ao_bitmap;
6989                 }
6990         }
6991         gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
6992         mutex_unlock(&adev->grbm_idx_mutex);
6993
6994         cu_info->number = active_cu_number;
6995         cu_info->ao_cu_mask = ao_cu_mask;
6996         cu_info->simd_per_cu = NUM_SIMD_PER_CU;
6997
6998         return 0;
6999 }
7000
7001 const struct amdgpu_ip_block_version gfx_v9_0_ip_block =
7002 {
7003         .type = AMD_IP_BLOCK_TYPE_GFX,
7004         .major = 9,
7005         .minor = 0,
7006         .rev = 0,
7007         .funcs = &gfx_v9_0_ip_funcs,
7008 };
This page took 0.462553 seconds and 4 git commands to generate.