]> Git Repo - linux.git/blob - drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
Merge tag 'gvt-fixes-2020-09-17' of https://github.com/intel/gvt-linux into drm-intel...
[linux.git] / drivers / gpu / drm / amd / amdgpu / gfx_v9_0.c
1 /*
2  * Copyright 2016 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23
24 #include <linux/delay.h>
25 #include <linux/kernel.h>
26 #include <linux/firmware.h>
27 #include <linux/module.h>
28 #include <linux/pci.h>
29
30 #include "amdgpu.h"
31 #include "amdgpu_gfx.h"
32 #include "soc15.h"
33 #include "soc15d.h"
34 #include "amdgpu_atomfirmware.h"
35 #include "amdgpu_pm.h"
36
37 #include "gc/gc_9_0_offset.h"
38 #include "gc/gc_9_0_sh_mask.h"
39
40 #include "vega10_enum.h"
41 #include "hdp/hdp_4_0_offset.h"
42
43 #include "soc15_common.h"
44 #include "clearstate_gfx9.h"
45 #include "v9_structs.h"
46
47 #include "ivsrcid/gfx/irqsrcs_gfx_9_0.h"
48
49 #include "amdgpu_ras.h"
50
51 #include "gfx_v9_4.h"
52
53 #include "asic_reg/pwr/pwr_10_0_offset.h"
54 #include "asic_reg/pwr/pwr_10_0_sh_mask.h"
55
56 #define GFX9_NUM_GFX_RINGS     1
57 #define GFX9_MEC_HPD_SIZE 4096
58 #define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L
59 #define RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET 0x00000000L
60
61 #define mmGCEA_PROBE_MAP                        0x070c
62 #define mmGCEA_PROBE_MAP_BASE_IDX               0
63
64 MODULE_FIRMWARE("amdgpu/vega10_ce.bin");
65 MODULE_FIRMWARE("amdgpu/vega10_pfp.bin");
66 MODULE_FIRMWARE("amdgpu/vega10_me.bin");
67 MODULE_FIRMWARE("amdgpu/vega10_mec.bin");
68 MODULE_FIRMWARE("amdgpu/vega10_mec2.bin");
69 MODULE_FIRMWARE("amdgpu/vega10_rlc.bin");
70
71 MODULE_FIRMWARE("amdgpu/vega12_ce.bin");
72 MODULE_FIRMWARE("amdgpu/vega12_pfp.bin");
73 MODULE_FIRMWARE("amdgpu/vega12_me.bin");
74 MODULE_FIRMWARE("amdgpu/vega12_mec.bin");
75 MODULE_FIRMWARE("amdgpu/vega12_mec2.bin");
76 MODULE_FIRMWARE("amdgpu/vega12_rlc.bin");
77
78 MODULE_FIRMWARE("amdgpu/vega20_ce.bin");
79 MODULE_FIRMWARE("amdgpu/vega20_pfp.bin");
80 MODULE_FIRMWARE("amdgpu/vega20_me.bin");
81 MODULE_FIRMWARE("amdgpu/vega20_mec.bin");
82 MODULE_FIRMWARE("amdgpu/vega20_mec2.bin");
83 MODULE_FIRMWARE("amdgpu/vega20_rlc.bin");
84
85 MODULE_FIRMWARE("amdgpu/raven_ce.bin");
86 MODULE_FIRMWARE("amdgpu/raven_pfp.bin");
87 MODULE_FIRMWARE("amdgpu/raven_me.bin");
88 MODULE_FIRMWARE("amdgpu/raven_mec.bin");
89 MODULE_FIRMWARE("amdgpu/raven_mec2.bin");
90 MODULE_FIRMWARE("amdgpu/raven_rlc.bin");
91
92 MODULE_FIRMWARE("amdgpu/picasso_ce.bin");
93 MODULE_FIRMWARE("amdgpu/picasso_pfp.bin");
94 MODULE_FIRMWARE("amdgpu/picasso_me.bin");
95 MODULE_FIRMWARE("amdgpu/picasso_mec.bin");
96 MODULE_FIRMWARE("amdgpu/picasso_mec2.bin");
97 MODULE_FIRMWARE("amdgpu/picasso_rlc.bin");
98 MODULE_FIRMWARE("amdgpu/picasso_rlc_am4.bin");
99
100 MODULE_FIRMWARE("amdgpu/raven2_ce.bin");
101 MODULE_FIRMWARE("amdgpu/raven2_pfp.bin");
102 MODULE_FIRMWARE("amdgpu/raven2_me.bin");
103 MODULE_FIRMWARE("amdgpu/raven2_mec.bin");
104 MODULE_FIRMWARE("amdgpu/raven2_mec2.bin");
105 MODULE_FIRMWARE("amdgpu/raven2_rlc.bin");
106 MODULE_FIRMWARE("amdgpu/raven_kicker_rlc.bin");
107
108 MODULE_FIRMWARE("amdgpu/arcturus_mec.bin");
109 MODULE_FIRMWARE("amdgpu/arcturus_mec2.bin");
110 MODULE_FIRMWARE("amdgpu/arcturus_rlc.bin");
111
112 MODULE_FIRMWARE("amdgpu/renoir_ce.bin");
113 MODULE_FIRMWARE("amdgpu/renoir_pfp.bin");
114 MODULE_FIRMWARE("amdgpu/renoir_me.bin");
115 MODULE_FIRMWARE("amdgpu/renoir_mec.bin");
116 MODULE_FIRMWARE("amdgpu/renoir_mec2.bin");
117 MODULE_FIRMWARE("amdgpu/renoir_rlc.bin");
118
119 #define mmTCP_CHAN_STEER_0_ARCT                                                         0x0b03
120 #define mmTCP_CHAN_STEER_0_ARCT_BASE_IDX                                                        0
121 #define mmTCP_CHAN_STEER_1_ARCT                                                         0x0b04
122 #define mmTCP_CHAN_STEER_1_ARCT_BASE_IDX                                                        0
123 #define mmTCP_CHAN_STEER_2_ARCT                                                         0x0b09
124 #define mmTCP_CHAN_STEER_2_ARCT_BASE_IDX                                                        0
125 #define mmTCP_CHAN_STEER_3_ARCT                                                         0x0b0a
126 #define mmTCP_CHAN_STEER_3_ARCT_BASE_IDX                                                        0
127 #define mmTCP_CHAN_STEER_4_ARCT                                                         0x0b0b
128 #define mmTCP_CHAN_STEER_4_ARCT_BASE_IDX                                                        0
129 #define mmTCP_CHAN_STEER_5_ARCT                                                         0x0b0c
130 #define mmTCP_CHAN_STEER_5_ARCT_BASE_IDX                                                        0
131
132 enum ta_ras_gfx_subblock {
133         /*CPC*/
134         TA_RAS_BLOCK__GFX_CPC_INDEX_START = 0,
135         TA_RAS_BLOCK__GFX_CPC_SCRATCH = TA_RAS_BLOCK__GFX_CPC_INDEX_START,
136         TA_RAS_BLOCK__GFX_CPC_UCODE,
137         TA_RAS_BLOCK__GFX_DC_STATE_ME1,
138         TA_RAS_BLOCK__GFX_DC_CSINVOC_ME1,
139         TA_RAS_BLOCK__GFX_DC_RESTORE_ME1,
140         TA_RAS_BLOCK__GFX_DC_STATE_ME2,
141         TA_RAS_BLOCK__GFX_DC_CSINVOC_ME2,
142         TA_RAS_BLOCK__GFX_DC_RESTORE_ME2,
143         TA_RAS_BLOCK__GFX_CPC_INDEX_END = TA_RAS_BLOCK__GFX_DC_RESTORE_ME2,
144         /* CPF*/
145         TA_RAS_BLOCK__GFX_CPF_INDEX_START,
146         TA_RAS_BLOCK__GFX_CPF_ROQ_ME2 = TA_RAS_BLOCK__GFX_CPF_INDEX_START,
147         TA_RAS_BLOCK__GFX_CPF_ROQ_ME1,
148         TA_RAS_BLOCK__GFX_CPF_TAG,
149         TA_RAS_BLOCK__GFX_CPF_INDEX_END = TA_RAS_BLOCK__GFX_CPF_TAG,
150         /* CPG*/
151         TA_RAS_BLOCK__GFX_CPG_INDEX_START,
152         TA_RAS_BLOCK__GFX_CPG_DMA_ROQ = TA_RAS_BLOCK__GFX_CPG_INDEX_START,
153         TA_RAS_BLOCK__GFX_CPG_DMA_TAG,
154         TA_RAS_BLOCK__GFX_CPG_TAG,
155         TA_RAS_BLOCK__GFX_CPG_INDEX_END = TA_RAS_BLOCK__GFX_CPG_TAG,
156         /* GDS*/
157         TA_RAS_BLOCK__GFX_GDS_INDEX_START,
158         TA_RAS_BLOCK__GFX_GDS_MEM = TA_RAS_BLOCK__GFX_GDS_INDEX_START,
159         TA_RAS_BLOCK__GFX_GDS_INPUT_QUEUE,
160         TA_RAS_BLOCK__GFX_GDS_OA_PHY_CMD_RAM_MEM,
161         TA_RAS_BLOCK__GFX_GDS_OA_PHY_DATA_RAM_MEM,
162         TA_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM,
163         TA_RAS_BLOCK__GFX_GDS_INDEX_END = TA_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM,
164         /* SPI*/
165         TA_RAS_BLOCK__GFX_SPI_SR_MEM,
166         /* SQ*/
167         TA_RAS_BLOCK__GFX_SQ_INDEX_START,
168         TA_RAS_BLOCK__GFX_SQ_SGPR = TA_RAS_BLOCK__GFX_SQ_INDEX_START,
169         TA_RAS_BLOCK__GFX_SQ_LDS_D,
170         TA_RAS_BLOCK__GFX_SQ_LDS_I,
171         TA_RAS_BLOCK__GFX_SQ_VGPR, /* VGPR = SP*/
172         TA_RAS_BLOCK__GFX_SQ_INDEX_END = TA_RAS_BLOCK__GFX_SQ_VGPR,
173         /* SQC (3 ranges)*/
174         TA_RAS_BLOCK__GFX_SQC_INDEX_START,
175         /* SQC range 0*/
176         TA_RAS_BLOCK__GFX_SQC_INDEX0_START = TA_RAS_BLOCK__GFX_SQC_INDEX_START,
177         TA_RAS_BLOCK__GFX_SQC_INST_UTCL1_LFIFO =
178                 TA_RAS_BLOCK__GFX_SQC_INDEX0_START,
179         TA_RAS_BLOCK__GFX_SQC_DATA_CU0_WRITE_DATA_BUF,
180         TA_RAS_BLOCK__GFX_SQC_DATA_CU0_UTCL1_LFIFO,
181         TA_RAS_BLOCK__GFX_SQC_DATA_CU1_WRITE_DATA_BUF,
182         TA_RAS_BLOCK__GFX_SQC_DATA_CU1_UTCL1_LFIFO,
183         TA_RAS_BLOCK__GFX_SQC_DATA_CU2_WRITE_DATA_BUF,
184         TA_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO,
185         TA_RAS_BLOCK__GFX_SQC_INDEX0_END =
186                 TA_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO,
187         /* SQC range 1*/
188         TA_RAS_BLOCK__GFX_SQC_INDEX1_START,
189         TA_RAS_BLOCK__GFX_SQC_INST_BANKA_TAG_RAM =
190                 TA_RAS_BLOCK__GFX_SQC_INDEX1_START,
191         TA_RAS_BLOCK__GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO,
192         TA_RAS_BLOCK__GFX_SQC_INST_BANKA_MISS_FIFO,
193         TA_RAS_BLOCK__GFX_SQC_INST_BANKA_BANK_RAM,
194         TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_TAG_RAM,
195         TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_HIT_FIFO,
196         TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_MISS_FIFO,
197         TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM,
198         TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM,
199         TA_RAS_BLOCK__GFX_SQC_INDEX1_END =
200                 TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM,
201         /* SQC range 2*/
202         TA_RAS_BLOCK__GFX_SQC_INDEX2_START,
203         TA_RAS_BLOCK__GFX_SQC_INST_BANKB_TAG_RAM =
204                 TA_RAS_BLOCK__GFX_SQC_INDEX2_START,
205         TA_RAS_BLOCK__GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO,
206         TA_RAS_BLOCK__GFX_SQC_INST_BANKB_MISS_FIFO,
207         TA_RAS_BLOCK__GFX_SQC_INST_BANKB_BANK_RAM,
208         TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_TAG_RAM,
209         TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_HIT_FIFO,
210         TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_MISS_FIFO,
211         TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM,
212         TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM,
213         TA_RAS_BLOCK__GFX_SQC_INDEX2_END =
214                 TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM,
215         TA_RAS_BLOCK__GFX_SQC_INDEX_END = TA_RAS_BLOCK__GFX_SQC_INDEX2_END,
216         /* TA*/
217         TA_RAS_BLOCK__GFX_TA_INDEX_START,
218         TA_RAS_BLOCK__GFX_TA_FS_DFIFO = TA_RAS_BLOCK__GFX_TA_INDEX_START,
219         TA_RAS_BLOCK__GFX_TA_FS_AFIFO,
220         TA_RAS_BLOCK__GFX_TA_FL_LFIFO,
221         TA_RAS_BLOCK__GFX_TA_FX_LFIFO,
222         TA_RAS_BLOCK__GFX_TA_FS_CFIFO,
223         TA_RAS_BLOCK__GFX_TA_INDEX_END = TA_RAS_BLOCK__GFX_TA_FS_CFIFO,
224         /* TCA*/
225         TA_RAS_BLOCK__GFX_TCA_INDEX_START,
226         TA_RAS_BLOCK__GFX_TCA_HOLE_FIFO = TA_RAS_BLOCK__GFX_TCA_INDEX_START,
227         TA_RAS_BLOCK__GFX_TCA_REQ_FIFO,
228         TA_RAS_BLOCK__GFX_TCA_INDEX_END = TA_RAS_BLOCK__GFX_TCA_REQ_FIFO,
229         /* TCC (5 sub-ranges)*/
230         TA_RAS_BLOCK__GFX_TCC_INDEX_START,
231         /* TCC range 0*/
232         TA_RAS_BLOCK__GFX_TCC_INDEX0_START = TA_RAS_BLOCK__GFX_TCC_INDEX_START,
233         TA_RAS_BLOCK__GFX_TCC_CACHE_DATA = TA_RAS_BLOCK__GFX_TCC_INDEX0_START,
234         TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_0_1,
235         TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_0,
236         TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_1,
237         TA_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_0,
238         TA_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_1,
239         TA_RAS_BLOCK__GFX_TCC_HIGH_RATE_TAG,
240         TA_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG,
241         TA_RAS_BLOCK__GFX_TCC_INDEX0_END = TA_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG,
242         /* TCC range 1*/
243         TA_RAS_BLOCK__GFX_TCC_INDEX1_START,
244         TA_RAS_BLOCK__GFX_TCC_IN_USE_DEC = TA_RAS_BLOCK__GFX_TCC_INDEX1_START,
245         TA_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER,
246         TA_RAS_BLOCK__GFX_TCC_INDEX1_END =
247                 TA_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER,
248         /* TCC range 2*/
249         TA_RAS_BLOCK__GFX_TCC_INDEX2_START,
250         TA_RAS_BLOCK__GFX_TCC_RETURN_DATA = TA_RAS_BLOCK__GFX_TCC_INDEX2_START,
251         TA_RAS_BLOCK__GFX_TCC_RETURN_CONTROL,
252         TA_RAS_BLOCK__GFX_TCC_UC_ATOMIC_FIFO,
253         TA_RAS_BLOCK__GFX_TCC_WRITE_RETURN,
254         TA_RAS_BLOCK__GFX_TCC_WRITE_CACHE_READ,
255         TA_RAS_BLOCK__GFX_TCC_SRC_FIFO,
256         TA_RAS_BLOCK__GFX_TCC_SRC_FIFO_NEXT_RAM,
257         TA_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO,
258         TA_RAS_BLOCK__GFX_TCC_INDEX2_END =
259                 TA_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO,
260         /* TCC range 3*/
261         TA_RAS_BLOCK__GFX_TCC_INDEX3_START,
262         TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO = TA_RAS_BLOCK__GFX_TCC_INDEX3_START,
263         TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM,
264         TA_RAS_BLOCK__GFX_TCC_INDEX3_END =
265                 TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM,
266         /* TCC range 4*/
267         TA_RAS_BLOCK__GFX_TCC_INDEX4_START,
268         TA_RAS_BLOCK__GFX_TCC_WRRET_TAG_WRITE_RETURN =
269                 TA_RAS_BLOCK__GFX_TCC_INDEX4_START,
270         TA_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER,
271         TA_RAS_BLOCK__GFX_TCC_INDEX4_END =
272                 TA_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER,
273         TA_RAS_BLOCK__GFX_TCC_INDEX_END = TA_RAS_BLOCK__GFX_TCC_INDEX4_END,
274         /* TCI*/
275         TA_RAS_BLOCK__GFX_TCI_WRITE_RAM,
276         /* TCP*/
277         TA_RAS_BLOCK__GFX_TCP_INDEX_START,
278         TA_RAS_BLOCK__GFX_TCP_CACHE_RAM = TA_RAS_BLOCK__GFX_TCP_INDEX_START,
279         TA_RAS_BLOCK__GFX_TCP_LFIFO_RAM,
280         TA_RAS_BLOCK__GFX_TCP_CMD_FIFO,
281         TA_RAS_BLOCK__GFX_TCP_VM_FIFO,
282         TA_RAS_BLOCK__GFX_TCP_DB_RAM,
283         TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO0,
284         TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1,
285         TA_RAS_BLOCK__GFX_TCP_INDEX_END = TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1,
286         /* TD*/
287         TA_RAS_BLOCK__GFX_TD_INDEX_START,
288         TA_RAS_BLOCK__GFX_TD_SS_FIFO_LO = TA_RAS_BLOCK__GFX_TD_INDEX_START,
289         TA_RAS_BLOCK__GFX_TD_SS_FIFO_HI,
290         TA_RAS_BLOCK__GFX_TD_CS_FIFO,
291         TA_RAS_BLOCK__GFX_TD_INDEX_END = TA_RAS_BLOCK__GFX_TD_CS_FIFO,
292         /* EA (3 sub-ranges)*/
293         TA_RAS_BLOCK__GFX_EA_INDEX_START,
294         /* EA range 0*/
295         TA_RAS_BLOCK__GFX_EA_INDEX0_START = TA_RAS_BLOCK__GFX_EA_INDEX_START,
296         TA_RAS_BLOCK__GFX_EA_DRAMRD_CMDMEM = TA_RAS_BLOCK__GFX_EA_INDEX0_START,
297         TA_RAS_BLOCK__GFX_EA_DRAMWR_CMDMEM,
298         TA_RAS_BLOCK__GFX_EA_DRAMWR_DATAMEM,
299         TA_RAS_BLOCK__GFX_EA_RRET_TAGMEM,
300         TA_RAS_BLOCK__GFX_EA_WRET_TAGMEM,
301         TA_RAS_BLOCK__GFX_EA_GMIRD_CMDMEM,
302         TA_RAS_BLOCK__GFX_EA_GMIWR_CMDMEM,
303         TA_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM,
304         TA_RAS_BLOCK__GFX_EA_INDEX0_END = TA_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM,
305         /* EA range 1*/
306         TA_RAS_BLOCK__GFX_EA_INDEX1_START,
307         TA_RAS_BLOCK__GFX_EA_DRAMRD_PAGEMEM = TA_RAS_BLOCK__GFX_EA_INDEX1_START,
308         TA_RAS_BLOCK__GFX_EA_DRAMWR_PAGEMEM,
309         TA_RAS_BLOCK__GFX_EA_IORD_CMDMEM,
310         TA_RAS_BLOCK__GFX_EA_IOWR_CMDMEM,
311         TA_RAS_BLOCK__GFX_EA_IOWR_DATAMEM,
312         TA_RAS_BLOCK__GFX_EA_GMIRD_PAGEMEM,
313         TA_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM,
314         TA_RAS_BLOCK__GFX_EA_INDEX1_END = TA_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM,
315         /* EA range 2*/
316         TA_RAS_BLOCK__GFX_EA_INDEX2_START,
317         TA_RAS_BLOCK__GFX_EA_MAM_D0MEM = TA_RAS_BLOCK__GFX_EA_INDEX2_START,
318         TA_RAS_BLOCK__GFX_EA_MAM_D1MEM,
319         TA_RAS_BLOCK__GFX_EA_MAM_D2MEM,
320         TA_RAS_BLOCK__GFX_EA_MAM_D3MEM,
321         TA_RAS_BLOCK__GFX_EA_INDEX2_END = TA_RAS_BLOCK__GFX_EA_MAM_D3MEM,
322         TA_RAS_BLOCK__GFX_EA_INDEX_END = TA_RAS_BLOCK__GFX_EA_INDEX2_END,
323         /* UTC VM L2 bank*/
324         TA_RAS_BLOCK__UTC_VML2_BANK_CACHE,
325         /* UTC VM walker*/
326         TA_RAS_BLOCK__UTC_VML2_WALKER,
327         /* UTC ATC L2 2MB cache*/
328         TA_RAS_BLOCK__UTC_ATCL2_CACHE_2M_BANK,
329         /* UTC ATC L2 4KB cache*/
330         TA_RAS_BLOCK__UTC_ATCL2_CACHE_4K_BANK,
331         TA_RAS_BLOCK__GFX_MAX
332 };
333
334 struct ras_gfx_subblock {
335         unsigned char *name;
336         int ta_subblock;
337         int hw_supported_error_type;
338         int sw_supported_error_type;
339 };
340
341 #define AMDGPU_RAS_SUB_BLOCK(subblock, a, b, c, d, e, f, g, h)                             \
342         [AMDGPU_RAS_BLOCK__##subblock] = {                                     \
343                 #subblock,                                                     \
344                 TA_RAS_BLOCK__##subblock,                                      \
345                 ((a) | ((b) << 1) | ((c) << 2) | ((d) << 3)),                  \
346                 (((e) << 1) | ((f) << 3) | (g) | ((h) << 2)),                  \
347         }
348
349 static const struct ras_gfx_subblock ras_gfx_subblocks[] = {
350         AMDGPU_RAS_SUB_BLOCK(GFX_CPC_SCRATCH, 0, 1, 1, 1, 1, 0, 0, 1),
351         AMDGPU_RAS_SUB_BLOCK(GFX_CPC_UCODE, 0, 1, 1, 1, 1, 0, 0, 1),
352         AMDGPU_RAS_SUB_BLOCK(GFX_DC_STATE_ME1, 1, 0, 0, 1, 0, 0, 1, 0),
353         AMDGPU_RAS_SUB_BLOCK(GFX_DC_CSINVOC_ME1, 1, 0, 0, 1, 0, 0, 0, 0),
354         AMDGPU_RAS_SUB_BLOCK(GFX_DC_RESTORE_ME1, 1, 0, 0, 1, 0, 0, 0, 0),
355         AMDGPU_RAS_SUB_BLOCK(GFX_DC_STATE_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
356         AMDGPU_RAS_SUB_BLOCK(GFX_DC_CSINVOC_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
357         AMDGPU_RAS_SUB_BLOCK(GFX_DC_RESTORE_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
358         AMDGPU_RAS_SUB_BLOCK(GFX_CPF_ROQ_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
359         AMDGPU_RAS_SUB_BLOCK(GFX_CPF_ROQ_ME1, 1, 0, 0, 1, 0, 0, 1, 0),
360         AMDGPU_RAS_SUB_BLOCK(GFX_CPF_TAG, 0, 1, 1, 1, 1, 0, 0, 1),
361         AMDGPU_RAS_SUB_BLOCK(GFX_CPG_DMA_ROQ, 1, 0, 0, 1, 0, 0, 1, 0),
362         AMDGPU_RAS_SUB_BLOCK(GFX_CPG_DMA_TAG, 0, 1, 1, 1, 0, 1, 0, 1),
363         AMDGPU_RAS_SUB_BLOCK(GFX_CPG_TAG, 0, 1, 1, 1, 1, 1, 0, 1),
364         AMDGPU_RAS_SUB_BLOCK(GFX_GDS_MEM, 0, 1, 1, 1, 0, 0, 0, 0),
365         AMDGPU_RAS_SUB_BLOCK(GFX_GDS_INPUT_QUEUE, 1, 0, 0, 1, 0, 0, 0, 0),
366         AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PHY_CMD_RAM_MEM, 0, 1, 1, 1, 0, 0, 0,
367                              0),
368         AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PHY_DATA_RAM_MEM, 1, 0, 0, 1, 0, 0, 0,
369                              0),
370         AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PIPE_MEM, 0, 1, 1, 1, 0, 0, 0, 0),
371         AMDGPU_RAS_SUB_BLOCK(GFX_SPI_SR_MEM, 1, 0, 0, 1, 0, 0, 0, 0),
372         AMDGPU_RAS_SUB_BLOCK(GFX_SQ_SGPR, 0, 1, 1, 1, 0, 0, 0, 0),
373         AMDGPU_RAS_SUB_BLOCK(GFX_SQ_LDS_D, 0, 1, 1, 1, 1, 0, 0, 1),
374         AMDGPU_RAS_SUB_BLOCK(GFX_SQ_LDS_I, 0, 1, 1, 1, 0, 0, 0, 0),
375         AMDGPU_RAS_SUB_BLOCK(GFX_SQ_VGPR, 0, 1, 1, 1, 0, 0, 0, 0),
376         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0, 1),
377         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU0_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0,
378                              0, 0),
379         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU0_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0,
380                              0),
381         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU1_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0,
382                              0, 0),
383         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU1_UTCL1_LFIFO, 0, 1, 1, 1, 1, 0, 0,
384                              0),
385         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU2_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0,
386                              0, 0),
387         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU2_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0,
388                              0),
389         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_TAG_RAM, 0, 1, 1, 1, 1, 0, 0,
390                              1),
391         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO, 1, 0, 0, 1, 0,
392                              0, 0, 0),
393         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
394                              0),
395         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
396                              0),
397         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_TAG_RAM, 0, 1, 1, 1, 0, 0, 0,
398                              0),
399         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_HIT_FIFO, 1, 0, 0, 1, 0, 0, 0,
400                              0),
401         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
402                              0),
403         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM, 1, 0, 0, 1, 0, 0,
404                              0, 0),
405         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
406                              0),
407         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_TAG_RAM, 0, 1, 1, 1, 1, 0, 0,
408                              0),
409         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO, 1, 0, 0, 1, 0,
410                              0, 0, 0),
411         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
412                              0),
413         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
414                              0),
415         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_TAG_RAM, 0, 1, 1, 1, 0, 0, 0,
416                              0),
417         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_HIT_FIFO, 1, 0, 0, 1, 0, 0, 0,
418                              0),
419         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
420                              0),
421         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM, 1, 0, 0, 1, 0, 0,
422                              0, 0),
423         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
424                              0),
425         AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_DFIFO, 0, 1, 1, 1, 1, 0, 0, 1),
426         AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_AFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
427         AMDGPU_RAS_SUB_BLOCK(GFX_TA_FL_LFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
428         AMDGPU_RAS_SUB_BLOCK(GFX_TA_FX_LFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
429         AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_CFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
430         AMDGPU_RAS_SUB_BLOCK(GFX_TCA_HOLE_FIFO, 1, 0, 0, 1, 0, 1, 1, 0),
431         AMDGPU_RAS_SUB_BLOCK(GFX_TCA_REQ_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
432         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA, 0, 1, 1, 1, 1, 0, 0, 1),
433         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_0_1, 0, 1, 1, 1, 1, 0, 0,
434                              1),
435         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_1_0, 0, 1, 1, 1, 1, 0, 0,
436                              1),
437         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_1_1, 0, 1, 1, 1, 1, 0, 0,
438                              1),
439         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DIRTY_BANK_0, 0, 1, 1, 1, 0, 0, 0,
440                              0),
441         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DIRTY_BANK_1, 0, 1, 1, 1, 0, 0, 0,
442                              0),
443         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_HIGH_RATE_TAG, 0, 1, 1, 1, 0, 0, 0, 0),
444         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LOW_RATE_TAG, 0, 1, 1, 1, 0, 0, 0, 0),
445         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_IN_USE_DEC, 1, 0, 0, 1, 0, 0, 0, 0),
446         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_IN_USE_TRANSFER, 1, 0, 0, 1, 0, 0, 0, 0),
447         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_RETURN_DATA, 1, 0, 0, 1, 0, 0, 0, 0),
448         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_RETURN_CONTROL, 1, 0, 0, 1, 0, 0, 0, 0),
449         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_UC_ATOMIC_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
450         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRITE_RETURN, 1, 0, 0, 1, 0, 1, 1, 0),
451         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRITE_CACHE_READ, 1, 0, 0, 1, 0, 0, 0, 0),
452         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_SRC_FIFO, 0, 1, 1, 1, 0, 0, 0, 0),
453         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_SRC_FIFO_NEXT_RAM, 1, 0, 0, 1, 0, 0, 1, 0),
454         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_TAG_PROBE_FIFO, 1, 0, 0, 1, 0, 0, 0,
455                              0),
456         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LATENCY_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
457         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LATENCY_FIFO_NEXT_RAM, 1, 0, 0, 1, 0, 0, 0,
458                              0),
459         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRRET_TAG_WRITE_RETURN, 1, 0, 0, 1, 0, 0,
460                              0, 0),
461         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_ATOMIC_RETURN_BUFFER, 1, 0, 0, 1, 0, 0, 0,
462                              0),
463         AMDGPU_RAS_SUB_BLOCK(GFX_TCI_WRITE_RAM, 1, 0, 0, 1, 0, 0, 0, 0),
464         AMDGPU_RAS_SUB_BLOCK(GFX_TCP_CACHE_RAM, 0, 1, 1, 1, 1, 0, 0, 1),
465         AMDGPU_RAS_SUB_BLOCK(GFX_TCP_LFIFO_RAM, 0, 1, 1, 1, 0, 0, 0, 0),
466         AMDGPU_RAS_SUB_BLOCK(GFX_TCP_CMD_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
467         AMDGPU_RAS_SUB_BLOCK(GFX_TCP_VM_FIFO, 0, 1, 1, 1, 0, 0, 0, 0),
468         AMDGPU_RAS_SUB_BLOCK(GFX_TCP_DB_RAM, 1, 0, 0, 1, 0, 0, 0, 0),
469         AMDGPU_RAS_SUB_BLOCK(GFX_TCP_UTCL1_LFIFO0, 0, 1, 1, 1, 0, 0, 0, 0),
470         AMDGPU_RAS_SUB_BLOCK(GFX_TCP_UTCL1_LFIFO1, 0, 1, 1, 1, 0, 0, 0, 0),
471         AMDGPU_RAS_SUB_BLOCK(GFX_TD_SS_FIFO_LO, 0, 1, 1, 1, 1, 0, 0, 1),
472         AMDGPU_RAS_SUB_BLOCK(GFX_TD_SS_FIFO_HI, 0, 1, 1, 1, 0, 0, 0, 0),
473         AMDGPU_RAS_SUB_BLOCK(GFX_TD_CS_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
474         AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMRD_CMDMEM, 0, 1, 1, 1, 1, 0, 0, 1),
475         AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0),
476         AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_DATAMEM, 0, 1, 1, 1, 0, 0, 0, 0),
477         AMDGPU_RAS_SUB_BLOCK(GFX_EA_RRET_TAGMEM, 0, 1, 1, 1, 0, 0, 0, 0),
478         AMDGPU_RAS_SUB_BLOCK(GFX_EA_WRET_TAGMEM, 0, 1, 1, 1, 0, 0, 0, 0),
479         AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIRD_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0),
480         AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0),
481         AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_DATAMEM, 0, 1, 1, 1, 0, 0, 0, 0),
482         AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMRD_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
483         AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
484         AMDGPU_RAS_SUB_BLOCK(GFX_EA_IORD_CMDMEM, 1, 0, 0, 1, 0, 0, 0, 0),
485         AMDGPU_RAS_SUB_BLOCK(GFX_EA_IOWR_CMDMEM, 1, 0, 0, 1, 0, 0, 0, 0),
486         AMDGPU_RAS_SUB_BLOCK(GFX_EA_IOWR_DATAMEM, 1, 0, 0, 1, 0, 0, 0, 0),
487         AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIRD_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
488         AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
489         AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D0MEM, 1, 0, 0, 1, 0, 0, 0, 0),
490         AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D1MEM, 1, 0, 0, 1, 0, 0, 0, 0),
491         AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D2MEM, 1, 0, 0, 1, 0, 0, 0, 0),
492         AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D3MEM, 1, 0, 0, 1, 0, 0, 0, 0),
493         AMDGPU_RAS_SUB_BLOCK(UTC_VML2_BANK_CACHE, 0, 1, 1, 1, 0, 0, 0, 0),
494         AMDGPU_RAS_SUB_BLOCK(UTC_VML2_WALKER, 0, 1, 1, 1, 0, 0, 0, 0),
495         AMDGPU_RAS_SUB_BLOCK(UTC_ATCL2_CACHE_2M_BANK, 1, 0, 0, 1, 0, 0, 0, 0),
496         AMDGPU_RAS_SUB_BLOCK(UTC_ATCL2_CACHE_4K_BANK, 0, 1, 1, 1, 0, 0, 0, 0),
497 };
498
499 static const struct soc15_reg_golden golden_settings_gc_9_0[] =
500 {
501         SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000400),
502         SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0x80000000, 0x80000000),
503         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
504         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
505         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
506         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
507         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000),
508         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800),
509         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800),
510         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x00ffff87),
511         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x00ffff8f),
512         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000),
513         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
514         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68),
515         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197),
516         SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
517         SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff),
518         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800),
519         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800),
520         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000)
521 };
522
523 static const struct soc15_reg_golden golden_settings_gc_9_0_vg10[] =
524 {
525         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0x0000f000, 0x00012107),
526         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
527         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080),
528         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080),
529         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080),
530         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042),
531         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x2a114042),
532         SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080),
533         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0x00008000, 0x00048000),
534         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080),
535         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080),
536         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080),
537         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080),
538         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080),
539         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000),
540         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x01000107),
541         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x00001800, 0x00000800),
542         SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080)
543 };
544
545 static const struct soc15_reg_golden golden_settings_gc_9_0_vg20[] =
546 {
547         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x0f000080, 0x04000080),
548         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000),
549         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
550         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xf3e777ff, 0x22014042),
551         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xf3e777ff, 0x22014042),
552         SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0x00003e00, 0x00000400),
553         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xff840000, 0x04040000),
554         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00030000),
555         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff010f, 0x01000107),
556         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0x000b0000, 0x000b0000),
557         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01000000, 0x01000000)
558 };
559
560 static const struct soc15_reg_golden golden_settings_gc_9_1[] =
561 {
562         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
563         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080),
564         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080),
565         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080),
566         SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420),
567         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
568         SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080),
569         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
570         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
571         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
572         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080),
573         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080),
574         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080),
575         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080),
576         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080),
577         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
578         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
579         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003120),
580         SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
581         SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000000ff),
582         SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080),
583         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800),
584         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800),
585         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000)
586 };
587
588 static const struct soc15_reg_golden golden_settings_gc_9_1_rv1[] =
589 {
590         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
591         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24000042),
592         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24000042),
593         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04048000),
594         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_MODE_CNTL_1, 0x06000000, 0x06000000),
595         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000),
596         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x00000800)
597 };
598
599 static const struct soc15_reg_golden golden_settings_gc_9_1_rv2[] =
600 {
601         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0xff7fffff, 0x04000000),
602         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
603         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0xff7fffff, 0x0a000000),
604         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x7f0fffff, 0x08000080),
605         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0xff8fffff, 0x08000080),
606         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x7f8fffff, 0x08000080),
607         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x26013041),
608         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x26013041),
609         SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x3f8fffff, 0x08000080),
610         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
611         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0xff0fffff, 0x08000080),
612         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0xff0fffff, 0x08000080),
613         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0xff0fffff, 0x08000080),
614         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0xff0fffff, 0x08000080),
615         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0xff0fffff, 0x08000080),
616         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
617         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010),
618         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000),
619         SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x3f8fffff, 0x08000080),
620 };
621
622 static const struct soc15_reg_golden golden_settings_gc_9_1_rn[] =
623 {
624         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
625         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0xff7fffff, 0x0a000000),
626         SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000400),
627         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xf3e777ff, 0x24000042),
628         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xf3e777ff, 0x24000042),
629         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
630         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
631         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
632         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
633         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
634         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003120),
635         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCEA_PROBE_MAP, 0xffffffff, 0x0000cccc),
636 };
637
638 static const struct soc15_reg_golden golden_settings_gc_9_x_common[] =
639 {
640         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_SD_CNTL, 0xffffffff, 0x000001ff),
641         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_INDEX, 0xffffffff, 0x00000000),
642         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_DATA, 0xffffffff, 0x2544c382)
643 };
644
645 static const struct soc15_reg_golden golden_settings_gc_9_2_1[] =
646 {
647         SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420),
648         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
649         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
650         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
651         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
652         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000),
653         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800),
654         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800),
655         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x0000ff87),
656         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x0000ff8f),
657         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000),
658         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
659         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68),
660         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197),
661         SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
662         SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff)
663 };
664
665 static const struct soc15_reg_golden golden_settings_gc_9_2_1_vg12[] =
666 {
667         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x00000080, 0x04000080),
668         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
669         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000),
670         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24104041),
671         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24104041),
672         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
673         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff03ff, 0x01000107),
674         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
675         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x76325410),
676         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000),
677         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800),
678         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800),
679         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000)
680 };
681
682 static const struct soc15_reg_golden golden_settings_gc_9_4_1_arct[] =
683 {
684         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042),
685         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x10b0000),
686         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_0_ARCT, 0x3fffffff, 0x346f0a4e),
687         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_1_ARCT, 0x3fffffff, 0x1c642ca),
688         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_2_ARCT, 0x3fffffff, 0x26f45098),
689         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_3_ARCT, 0x3fffffff, 0x2ebd9fe3),
690         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_4_ARCT, 0x3fffffff, 0xb90f5b1),
691         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_5_ARCT, 0x3ff, 0x135),
692         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_CONFIG, 0xffffffff, 0x011A0000),
693         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_FIFO_SIZES, 0xffffffff, 0x00000f00),
694         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_UTCL1_CNTL1, 0x30000000, 0x30000000)
695 };
696
697 static const struct soc15_reg_rlcg rlcg_access_gc_9_0[] = {
698         {SOC15_REG_ENTRY(GC, 0, mmGRBM_GFX_INDEX)},
699         {SOC15_REG_ENTRY(GC, 0, mmSQ_IND_INDEX)},
700 };
701
702 static const u32 GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[] =
703 {
704         mmRLC_SRM_INDEX_CNTL_ADDR_0 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
705         mmRLC_SRM_INDEX_CNTL_ADDR_1 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
706         mmRLC_SRM_INDEX_CNTL_ADDR_2 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
707         mmRLC_SRM_INDEX_CNTL_ADDR_3 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
708         mmRLC_SRM_INDEX_CNTL_ADDR_4 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
709         mmRLC_SRM_INDEX_CNTL_ADDR_5 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
710         mmRLC_SRM_INDEX_CNTL_ADDR_6 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
711         mmRLC_SRM_INDEX_CNTL_ADDR_7 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
712 };
713
714 static const u32 GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[] =
715 {
716         mmRLC_SRM_INDEX_CNTL_DATA_0 - mmRLC_SRM_INDEX_CNTL_DATA_0,
717         mmRLC_SRM_INDEX_CNTL_DATA_1 - mmRLC_SRM_INDEX_CNTL_DATA_0,
718         mmRLC_SRM_INDEX_CNTL_DATA_2 - mmRLC_SRM_INDEX_CNTL_DATA_0,
719         mmRLC_SRM_INDEX_CNTL_DATA_3 - mmRLC_SRM_INDEX_CNTL_DATA_0,
720         mmRLC_SRM_INDEX_CNTL_DATA_4 - mmRLC_SRM_INDEX_CNTL_DATA_0,
721         mmRLC_SRM_INDEX_CNTL_DATA_5 - mmRLC_SRM_INDEX_CNTL_DATA_0,
722         mmRLC_SRM_INDEX_CNTL_DATA_6 - mmRLC_SRM_INDEX_CNTL_DATA_0,
723         mmRLC_SRM_INDEX_CNTL_DATA_7 - mmRLC_SRM_INDEX_CNTL_DATA_0,
724 };
725
726 static void gfx_v9_0_rlcg_wreg(struct amdgpu_device *adev, u32 offset, u32 v)
727 {
728         static void *scratch_reg0;
729         static void *scratch_reg1;
730         static void *scratch_reg2;
731         static void *scratch_reg3;
732         static void *spare_int;
733         static uint32_t grbm_cntl;
734         static uint32_t grbm_idx;
735
736         scratch_reg0 = adev->rmmio + (adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG0_BASE_IDX] + mmSCRATCH_REG0)*4;
737         scratch_reg1 = adev->rmmio + (adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG1_BASE_IDX] + mmSCRATCH_REG1)*4;
738         scratch_reg2 = adev->rmmio + (adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG1_BASE_IDX] + mmSCRATCH_REG2)*4;
739         scratch_reg3 = adev->rmmio + (adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG1_BASE_IDX] + mmSCRATCH_REG3)*4;
740         spare_int = adev->rmmio + (adev->reg_offset[GC_HWIP][0][mmRLC_SPARE_INT_BASE_IDX] + mmRLC_SPARE_INT)*4;
741
742         grbm_cntl = adev->reg_offset[GC_HWIP][0][mmGRBM_GFX_CNTL_BASE_IDX] + mmGRBM_GFX_CNTL;
743         grbm_idx = adev->reg_offset[GC_HWIP][0][mmGRBM_GFX_INDEX_BASE_IDX] + mmGRBM_GFX_INDEX;
744
745         if (amdgpu_sriov_runtime(adev)) {
746                 pr_err("shouldn't call rlcg write register during runtime\n");
747                 return;
748         }
749
750         if (offset == grbm_cntl || offset == grbm_idx) {
751                 if (offset  == grbm_cntl)
752                         writel(v, scratch_reg2);
753                 else if (offset == grbm_idx)
754                         writel(v, scratch_reg3);
755
756                 writel(v, ((void __iomem *)adev->rmmio) + (offset * 4));
757         } else {
758                 uint32_t i = 0;
759                 uint32_t retries = 50000;
760
761                 writel(v, scratch_reg0);
762                 writel(offset | 0x80000000, scratch_reg1);
763                 writel(1, spare_int);
764                 for (i = 0; i < retries; i++) {
765                         u32 tmp;
766
767                         tmp = readl(scratch_reg1);
768                         if (!(tmp & 0x80000000))
769                                 break;
770
771                         udelay(10);
772                 }
773                 if (i >= retries)
774                         pr_err("timeout: rlcg program reg:0x%05x failed !\n", offset);
775         }
776
777 }
778
779 #define VEGA10_GB_ADDR_CONFIG_GOLDEN 0x2a114042
780 #define VEGA12_GB_ADDR_CONFIG_GOLDEN 0x24104041
781 #define RAVEN_GB_ADDR_CONFIG_GOLDEN 0x24000042
782 #define RAVEN2_GB_ADDR_CONFIG_GOLDEN 0x26013041
783
784 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev);
785 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev);
786 static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev);
787 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev);
788 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
789                                  struct amdgpu_cu_info *cu_info);
790 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev);
791 static void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num, u32 instance);
792 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring);
793 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring);
794 static int gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev,
795                                           void *ras_error_status);
796 static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev,
797                                      void *inject_if);
798 static void gfx_v9_0_reset_ras_error_count(struct amdgpu_device *adev);
799
800 static void gfx_v9_0_kiq_set_resources(struct amdgpu_ring *kiq_ring,
801                                 uint64_t queue_mask)
802 {
803         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
804         amdgpu_ring_write(kiq_ring,
805                 PACKET3_SET_RESOURCES_VMID_MASK(0) |
806                 /* vmid_mask:0* queue_type:0 (KIQ) */
807                 PACKET3_SET_RESOURCES_QUEUE_TYPE(0));
808         amdgpu_ring_write(kiq_ring,
809                         lower_32_bits(queue_mask));     /* queue mask lo */
810         amdgpu_ring_write(kiq_ring,
811                         upper_32_bits(queue_mask));     /* queue mask hi */
812         amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */
813         amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */
814         amdgpu_ring_write(kiq_ring, 0); /* oac mask */
815         amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */
816 }
817
818 static void gfx_v9_0_kiq_map_queues(struct amdgpu_ring *kiq_ring,
819                                  struct amdgpu_ring *ring)
820 {
821         struct amdgpu_device *adev = kiq_ring->adev;
822         uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
823         uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
824         uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
825
826         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
827         /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
828         amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
829                          PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */
830                          PACKET3_MAP_QUEUES_VMID(0) | /* VMID */
831                          PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
832                          PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
833                          PACKET3_MAP_QUEUES_ME((ring->me == 1 ? 0 : 1)) |
834                          /*queue_type: normal compute queue */
835                          PACKET3_MAP_QUEUES_QUEUE_TYPE(0) |
836                          /* alloc format: all_on_one_pipe */
837                          PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) |
838                          PACKET3_MAP_QUEUES_ENGINE_SEL(eng_sel) |
839                          /* num_queues: must be 1 */
840                          PACKET3_MAP_QUEUES_NUM_QUEUES(1));
841         amdgpu_ring_write(kiq_ring,
842                         PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index));
843         amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
844         amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
845         amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
846         amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
847 }
848
849 static void gfx_v9_0_kiq_unmap_queues(struct amdgpu_ring *kiq_ring,
850                                    struct amdgpu_ring *ring,
851                                    enum amdgpu_unmap_queues_action action,
852                                    u64 gpu_addr, u64 seq)
853 {
854         uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
855
856         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
857         amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
858                           PACKET3_UNMAP_QUEUES_ACTION(action) |
859                           PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) |
860                           PACKET3_UNMAP_QUEUES_ENGINE_SEL(eng_sel) |
861                           PACKET3_UNMAP_QUEUES_NUM_QUEUES(1));
862         amdgpu_ring_write(kiq_ring,
863                         PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
864
865         if (action == PREEMPT_QUEUES_NO_UNMAP) {
866                 amdgpu_ring_write(kiq_ring, lower_32_bits(gpu_addr));
867                 amdgpu_ring_write(kiq_ring, upper_32_bits(gpu_addr));
868                 amdgpu_ring_write(kiq_ring, seq);
869         } else {
870                 amdgpu_ring_write(kiq_ring, 0);
871                 amdgpu_ring_write(kiq_ring, 0);
872                 amdgpu_ring_write(kiq_ring, 0);
873         }
874 }
875
876 static void gfx_v9_0_kiq_query_status(struct amdgpu_ring *kiq_ring,
877                                    struct amdgpu_ring *ring,
878                                    u64 addr,
879                                    u64 seq)
880 {
881         uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
882
883         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_QUERY_STATUS, 5));
884         amdgpu_ring_write(kiq_ring,
885                           PACKET3_QUERY_STATUS_CONTEXT_ID(0) |
886                           PACKET3_QUERY_STATUS_INTERRUPT_SEL(0) |
887                           PACKET3_QUERY_STATUS_COMMAND(2));
888         /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
889         amdgpu_ring_write(kiq_ring,
890                         PACKET3_QUERY_STATUS_DOORBELL_OFFSET(ring->doorbell_index) |
891                         PACKET3_QUERY_STATUS_ENG_SEL(eng_sel));
892         amdgpu_ring_write(kiq_ring, lower_32_bits(addr));
893         amdgpu_ring_write(kiq_ring, upper_32_bits(addr));
894         amdgpu_ring_write(kiq_ring, lower_32_bits(seq));
895         amdgpu_ring_write(kiq_ring, upper_32_bits(seq));
896 }
897
898 static void gfx_v9_0_kiq_invalidate_tlbs(struct amdgpu_ring *kiq_ring,
899                                 uint16_t pasid, uint32_t flush_type,
900                                 bool all_hub)
901 {
902         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0));
903         amdgpu_ring_write(kiq_ring,
904                         PACKET3_INVALIDATE_TLBS_DST_SEL(1) |
905                         PACKET3_INVALIDATE_TLBS_ALL_HUB(all_hub) |
906                         PACKET3_INVALIDATE_TLBS_PASID(pasid) |
907                         PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type));
908 }
909
910 static const struct kiq_pm4_funcs gfx_v9_0_kiq_pm4_funcs = {
911         .kiq_set_resources = gfx_v9_0_kiq_set_resources,
912         .kiq_map_queues = gfx_v9_0_kiq_map_queues,
913         .kiq_unmap_queues = gfx_v9_0_kiq_unmap_queues,
914         .kiq_query_status = gfx_v9_0_kiq_query_status,
915         .kiq_invalidate_tlbs = gfx_v9_0_kiq_invalidate_tlbs,
916         .set_resources_size = 8,
917         .map_queues_size = 7,
918         .unmap_queues_size = 6,
919         .query_status_size = 7,
920         .invalidate_tlbs_size = 2,
921 };
922
923 static void gfx_v9_0_set_kiq_pm4_funcs(struct amdgpu_device *adev)
924 {
925         adev->gfx.kiq.pmf = &gfx_v9_0_kiq_pm4_funcs;
926 }
927
928 static void gfx_v9_0_init_golden_registers(struct amdgpu_device *adev)
929 {
930         switch (adev->asic_type) {
931         case CHIP_VEGA10:
932                 soc15_program_register_sequence(adev,
933                                                 golden_settings_gc_9_0,
934                                                 ARRAY_SIZE(golden_settings_gc_9_0));
935                 soc15_program_register_sequence(adev,
936                                                 golden_settings_gc_9_0_vg10,
937                                                 ARRAY_SIZE(golden_settings_gc_9_0_vg10));
938                 break;
939         case CHIP_VEGA12:
940                 soc15_program_register_sequence(adev,
941                                                 golden_settings_gc_9_2_1,
942                                                 ARRAY_SIZE(golden_settings_gc_9_2_1));
943                 soc15_program_register_sequence(adev,
944                                                 golden_settings_gc_9_2_1_vg12,
945                                                 ARRAY_SIZE(golden_settings_gc_9_2_1_vg12));
946                 break;
947         case CHIP_VEGA20:
948                 soc15_program_register_sequence(adev,
949                                                 golden_settings_gc_9_0,
950                                                 ARRAY_SIZE(golden_settings_gc_9_0));
951                 soc15_program_register_sequence(adev,
952                                                 golden_settings_gc_9_0_vg20,
953                                                 ARRAY_SIZE(golden_settings_gc_9_0_vg20));
954                 break;
955         case CHIP_ARCTURUS:
956                 soc15_program_register_sequence(adev,
957                                                 golden_settings_gc_9_4_1_arct,
958                                                 ARRAY_SIZE(golden_settings_gc_9_4_1_arct));
959                 break;
960         case CHIP_RAVEN:
961                 soc15_program_register_sequence(adev, golden_settings_gc_9_1,
962                                                 ARRAY_SIZE(golden_settings_gc_9_1));
963                 if (adev->apu_flags & AMD_APU_IS_RAVEN2)
964                         soc15_program_register_sequence(adev,
965                                                         golden_settings_gc_9_1_rv2,
966                                                         ARRAY_SIZE(golden_settings_gc_9_1_rv2));
967                 else
968                         soc15_program_register_sequence(adev,
969                                                         golden_settings_gc_9_1_rv1,
970                                                         ARRAY_SIZE(golden_settings_gc_9_1_rv1));
971                 break;
972          case CHIP_RENOIR:
973                 soc15_program_register_sequence(adev,
974                                                 golden_settings_gc_9_1_rn,
975                                                 ARRAY_SIZE(golden_settings_gc_9_1_rn));
976                 return; /* for renoir, don't need common goldensetting */
977         default:
978                 break;
979         }
980
981         if (adev->asic_type != CHIP_ARCTURUS)
982                 soc15_program_register_sequence(adev, golden_settings_gc_9_x_common,
983                                                 (const u32)ARRAY_SIZE(golden_settings_gc_9_x_common));
984 }
985
986 static void gfx_v9_0_scratch_init(struct amdgpu_device *adev)
987 {
988         adev->gfx.scratch.num_reg = 8;
989         adev->gfx.scratch.reg_base = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0);
990         adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1;
991 }
992
993 static void gfx_v9_0_write_data_to_reg(struct amdgpu_ring *ring, int eng_sel,
994                                        bool wc, uint32_t reg, uint32_t val)
995 {
996         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
997         amdgpu_ring_write(ring, WRITE_DATA_ENGINE_SEL(eng_sel) |
998                                 WRITE_DATA_DST_SEL(0) |
999                                 (wc ? WR_CONFIRM : 0));
1000         amdgpu_ring_write(ring, reg);
1001         amdgpu_ring_write(ring, 0);
1002         amdgpu_ring_write(ring, val);
1003 }
1004
1005 static void gfx_v9_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel,
1006                                   int mem_space, int opt, uint32_t addr0,
1007                                   uint32_t addr1, uint32_t ref, uint32_t mask,
1008                                   uint32_t inv)
1009 {
1010         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
1011         amdgpu_ring_write(ring,
1012                                  /* memory (1) or register (0) */
1013                                  (WAIT_REG_MEM_MEM_SPACE(mem_space) |
1014                                  WAIT_REG_MEM_OPERATION(opt) | /* wait */
1015                                  WAIT_REG_MEM_FUNCTION(3) |  /* equal */
1016                                  WAIT_REG_MEM_ENGINE(eng_sel)));
1017
1018         if (mem_space)
1019                 BUG_ON(addr0 & 0x3); /* Dword align */
1020         amdgpu_ring_write(ring, addr0);
1021         amdgpu_ring_write(ring, addr1);
1022         amdgpu_ring_write(ring, ref);
1023         amdgpu_ring_write(ring, mask);
1024         amdgpu_ring_write(ring, inv); /* poll interval */
1025 }
1026
1027 static int gfx_v9_0_ring_test_ring(struct amdgpu_ring *ring)
1028 {
1029         struct amdgpu_device *adev = ring->adev;
1030         uint32_t scratch;
1031         uint32_t tmp = 0;
1032         unsigned i;
1033         int r;
1034
1035         r = amdgpu_gfx_scratch_get(adev, &scratch);
1036         if (r)
1037                 return r;
1038
1039         WREG32(scratch, 0xCAFEDEAD);
1040         r = amdgpu_ring_alloc(ring, 3);
1041         if (r)
1042                 goto error_free_scratch;
1043
1044         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
1045         amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
1046         amdgpu_ring_write(ring, 0xDEADBEEF);
1047         amdgpu_ring_commit(ring);
1048
1049         for (i = 0; i < adev->usec_timeout; i++) {
1050                 tmp = RREG32(scratch);
1051                 if (tmp == 0xDEADBEEF)
1052                         break;
1053                 udelay(1);
1054         }
1055
1056         if (i >= adev->usec_timeout)
1057                 r = -ETIMEDOUT;
1058
1059 error_free_scratch:
1060         amdgpu_gfx_scratch_free(adev, scratch);
1061         return r;
1062 }
1063
1064 static int gfx_v9_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
1065 {
1066         struct amdgpu_device *adev = ring->adev;
1067         struct amdgpu_ib ib;
1068         struct dma_fence *f = NULL;
1069
1070         unsigned index;
1071         uint64_t gpu_addr;
1072         uint32_t tmp;
1073         long r;
1074
1075         r = amdgpu_device_wb_get(adev, &index);
1076         if (r)
1077                 return r;
1078
1079         gpu_addr = adev->wb.gpu_addr + (index * 4);
1080         adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
1081         memset(&ib, 0, sizeof(ib));
1082         r = amdgpu_ib_get(adev, NULL, 16,
1083                                         AMDGPU_IB_POOL_DIRECT, &ib);
1084         if (r)
1085                 goto err1;
1086
1087         ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3);
1088         ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
1089         ib.ptr[2] = lower_32_bits(gpu_addr);
1090         ib.ptr[3] = upper_32_bits(gpu_addr);
1091         ib.ptr[4] = 0xDEADBEEF;
1092         ib.length_dw = 5;
1093
1094         r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
1095         if (r)
1096                 goto err2;
1097
1098         r = dma_fence_wait_timeout(f, false, timeout);
1099         if (r == 0) {
1100                 r = -ETIMEDOUT;
1101                 goto err2;
1102         } else if (r < 0) {
1103                 goto err2;
1104         }
1105
1106         tmp = adev->wb.wb[index];
1107         if (tmp == 0xDEADBEEF)
1108                 r = 0;
1109         else
1110                 r = -EINVAL;
1111
1112 err2:
1113         amdgpu_ib_free(adev, &ib, NULL);
1114         dma_fence_put(f);
1115 err1:
1116         amdgpu_device_wb_free(adev, index);
1117         return r;
1118 }
1119
1120
1121 static void gfx_v9_0_free_microcode(struct amdgpu_device *adev)
1122 {
1123         release_firmware(adev->gfx.pfp_fw);
1124         adev->gfx.pfp_fw = NULL;
1125         release_firmware(adev->gfx.me_fw);
1126         adev->gfx.me_fw = NULL;
1127         release_firmware(adev->gfx.ce_fw);
1128         adev->gfx.ce_fw = NULL;
1129         release_firmware(adev->gfx.rlc_fw);
1130         adev->gfx.rlc_fw = NULL;
1131         release_firmware(adev->gfx.mec_fw);
1132         adev->gfx.mec_fw = NULL;
1133         release_firmware(adev->gfx.mec2_fw);
1134         adev->gfx.mec2_fw = NULL;
1135
1136         kfree(adev->gfx.rlc.register_list_format);
1137 }
1138
1139 static void gfx_v9_0_init_rlc_ext_microcode(struct amdgpu_device *adev)
1140 {
1141         const struct rlc_firmware_header_v2_1 *rlc_hdr;
1142
1143         rlc_hdr = (const struct rlc_firmware_header_v2_1 *)adev->gfx.rlc_fw->data;
1144         adev->gfx.rlc_srlc_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_ucode_ver);
1145         adev->gfx.rlc_srlc_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_feature_ver);
1146         adev->gfx.rlc.save_restore_list_cntl_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_cntl_size_bytes);
1147         adev->gfx.rlc.save_restore_list_cntl = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_cntl_offset_bytes);
1148         adev->gfx.rlc_srlg_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_ucode_ver);
1149         adev->gfx.rlc_srlg_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_feature_ver);
1150         adev->gfx.rlc.save_restore_list_gpm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_gpm_size_bytes);
1151         adev->gfx.rlc.save_restore_list_gpm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_gpm_offset_bytes);
1152         adev->gfx.rlc_srls_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_ucode_ver);
1153         adev->gfx.rlc_srls_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_feature_ver);
1154         adev->gfx.rlc.save_restore_list_srm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_srm_size_bytes);
1155         adev->gfx.rlc.save_restore_list_srm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_srm_offset_bytes);
1156         adev->gfx.rlc.reg_list_format_direct_reg_list_length =
1157                         le32_to_cpu(rlc_hdr->reg_list_format_direct_reg_list_length);
1158 }
1159
1160 static void gfx_v9_0_check_fw_write_wait(struct amdgpu_device *adev)
1161 {
1162         adev->gfx.me_fw_write_wait = false;
1163         adev->gfx.mec_fw_write_wait = false;
1164
1165         if ((adev->asic_type != CHIP_ARCTURUS) &&
1166             ((adev->gfx.mec_fw_version < 0x000001a5) ||
1167             (adev->gfx.mec_feature_version < 46) ||
1168             (adev->gfx.pfp_fw_version < 0x000000b7) ||
1169             (adev->gfx.pfp_feature_version < 46)))
1170                 DRM_WARN_ONCE("CP firmware version too old, please update!");
1171
1172         switch (adev->asic_type) {
1173         case CHIP_VEGA10:
1174                 if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1175                     (adev->gfx.me_feature_version >= 42) &&
1176                     (adev->gfx.pfp_fw_version >=  0x000000b1) &&
1177                     (adev->gfx.pfp_feature_version >= 42))
1178                         adev->gfx.me_fw_write_wait = true;
1179
1180                 if ((adev->gfx.mec_fw_version >=  0x00000193) &&
1181                     (adev->gfx.mec_feature_version >= 42))
1182                         adev->gfx.mec_fw_write_wait = true;
1183                 break;
1184         case CHIP_VEGA12:
1185                 if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1186                     (adev->gfx.me_feature_version >= 44) &&
1187                     (adev->gfx.pfp_fw_version >=  0x000000b2) &&
1188                     (adev->gfx.pfp_feature_version >= 44))
1189                         adev->gfx.me_fw_write_wait = true;
1190
1191                 if ((adev->gfx.mec_fw_version >=  0x00000196) &&
1192                     (adev->gfx.mec_feature_version >= 44))
1193                         adev->gfx.mec_fw_write_wait = true;
1194                 break;
1195         case CHIP_VEGA20:
1196                 if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1197                     (adev->gfx.me_feature_version >= 44) &&
1198                     (adev->gfx.pfp_fw_version >=  0x000000b2) &&
1199                     (adev->gfx.pfp_feature_version >= 44))
1200                         adev->gfx.me_fw_write_wait = true;
1201
1202                 if ((adev->gfx.mec_fw_version >=  0x00000197) &&
1203                     (adev->gfx.mec_feature_version >= 44))
1204                         adev->gfx.mec_fw_write_wait = true;
1205                 break;
1206         case CHIP_RAVEN:
1207                 if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1208                     (adev->gfx.me_feature_version >= 42) &&
1209                     (adev->gfx.pfp_fw_version >=  0x000000b1) &&
1210                     (adev->gfx.pfp_feature_version >= 42))
1211                         adev->gfx.me_fw_write_wait = true;
1212
1213                 if ((adev->gfx.mec_fw_version >=  0x00000192) &&
1214                     (adev->gfx.mec_feature_version >= 42))
1215                         adev->gfx.mec_fw_write_wait = true;
1216                 break;
1217         default:
1218                 adev->gfx.me_fw_write_wait = true;
1219                 adev->gfx.mec_fw_write_wait = true;
1220                 break;
1221         }
1222 }
1223
1224 struct amdgpu_gfxoff_quirk {
1225         u16 chip_vendor;
1226         u16 chip_device;
1227         u16 subsys_vendor;
1228         u16 subsys_device;
1229         u8 revision;
1230 };
1231
1232 static const struct amdgpu_gfxoff_quirk amdgpu_gfxoff_quirk_list[] = {
1233         /* https://bugzilla.kernel.org/show_bug.cgi?id=204689 */
1234         { 0x1002, 0x15dd, 0x1002, 0x15dd, 0xc8 },
1235         /* https://bugzilla.kernel.org/show_bug.cgi?id=207171 */
1236         { 0x1002, 0x15dd, 0x103c, 0x83e7, 0xd3 },
1237         /* GFXOFF is unstable on C6 parts with a VBIOS 113-RAVEN-114 */
1238         { 0x1002, 0x15dd, 0x1002, 0x15dd, 0xc6 },
1239         { 0, 0, 0, 0, 0 },
1240 };
1241
1242 static bool gfx_v9_0_should_disable_gfxoff(struct pci_dev *pdev)
1243 {
1244         const struct amdgpu_gfxoff_quirk *p = amdgpu_gfxoff_quirk_list;
1245
1246         while (p && p->chip_device != 0) {
1247                 if (pdev->vendor == p->chip_vendor &&
1248                     pdev->device == p->chip_device &&
1249                     pdev->subsystem_vendor == p->subsys_vendor &&
1250                     pdev->subsystem_device == p->subsys_device &&
1251                     pdev->revision == p->revision) {
1252                         return true;
1253                 }
1254                 ++p;
1255         }
1256         return false;
1257 }
1258
1259 static bool is_raven_kicker(struct amdgpu_device *adev)
1260 {
1261         if (adev->pm.fw_version >= 0x41e2b)
1262                 return true;
1263         else
1264                 return false;
1265 }
1266
1267 static void gfx_v9_0_check_if_need_gfxoff(struct amdgpu_device *adev)
1268 {
1269         if (gfx_v9_0_should_disable_gfxoff(adev->pdev))
1270                 adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
1271
1272         switch (adev->asic_type) {
1273         case CHIP_VEGA10:
1274         case CHIP_VEGA12:
1275         case CHIP_VEGA20:
1276                 break;
1277         case CHIP_RAVEN:
1278                 if (!((adev->apu_flags & AMD_APU_IS_RAVEN2) ||
1279                       (adev->apu_flags & AMD_APU_IS_PICASSO)) &&
1280                     ((!is_raven_kicker(adev) &&
1281                       adev->gfx.rlc_fw_version < 531) ||
1282                      (adev->gfx.rlc_feature_version < 1) ||
1283                      !adev->gfx.rlc.is_rlc_v2_1))
1284                         adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
1285
1286                 if (adev->pm.pp_feature & PP_GFXOFF_MASK)
1287                         adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG |
1288                                 AMD_PG_SUPPORT_CP |
1289                                 AMD_PG_SUPPORT_RLC_SMU_HS;
1290                 break;
1291         case CHIP_RENOIR:
1292                 if (adev->pm.pp_feature & PP_GFXOFF_MASK)
1293                         adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG |
1294                                 AMD_PG_SUPPORT_CP |
1295                                 AMD_PG_SUPPORT_RLC_SMU_HS;
1296                 break;
1297         default:
1298                 break;
1299         }
1300 }
1301
1302 static int gfx_v9_0_init_cp_gfx_microcode(struct amdgpu_device *adev,
1303                                           const char *chip_name)
1304 {
1305         char fw_name[30];
1306         int err;
1307         struct amdgpu_firmware_info *info = NULL;
1308         const struct common_firmware_header *header = NULL;
1309         const struct gfx_firmware_header_v1_0 *cp_hdr;
1310
1311         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
1312         err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
1313         if (err)
1314                 goto out;
1315         err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
1316         if (err)
1317                 goto out;
1318         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
1319         adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1320         adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1321
1322         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
1323         err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
1324         if (err)
1325                 goto out;
1326         err = amdgpu_ucode_validate(adev->gfx.me_fw);
1327         if (err)
1328                 goto out;
1329         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
1330         adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1331         adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1332
1333         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
1334         err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
1335         if (err)
1336                 goto out;
1337         err = amdgpu_ucode_validate(adev->gfx.ce_fw);
1338         if (err)
1339                 goto out;
1340         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
1341         adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1342         adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1343
1344         if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
1345                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
1346                 info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
1347                 info->fw = adev->gfx.pfp_fw;
1348                 header = (const struct common_firmware_header *)info->fw->data;
1349                 adev->firmware.fw_size +=
1350                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1351
1352                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
1353                 info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
1354                 info->fw = adev->gfx.me_fw;
1355                 header = (const struct common_firmware_header *)info->fw->data;
1356                 adev->firmware.fw_size +=
1357                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1358
1359                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
1360                 info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
1361                 info->fw = adev->gfx.ce_fw;
1362                 header = (const struct common_firmware_header *)info->fw->data;
1363                 adev->firmware.fw_size +=
1364                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1365         }
1366
1367 out:
1368         if (err) {
1369                 dev_err(adev->dev,
1370                         "gfx9: Failed to load firmware \"%s\"\n",
1371                         fw_name);
1372                 release_firmware(adev->gfx.pfp_fw);
1373                 adev->gfx.pfp_fw = NULL;
1374                 release_firmware(adev->gfx.me_fw);
1375                 adev->gfx.me_fw = NULL;
1376                 release_firmware(adev->gfx.ce_fw);
1377                 adev->gfx.ce_fw = NULL;
1378         }
1379         return err;
1380 }
1381
1382 static int gfx_v9_0_init_rlc_microcode(struct amdgpu_device *adev,
1383                                           const char *chip_name)
1384 {
1385         char fw_name[30];
1386         int err;
1387         struct amdgpu_firmware_info *info = NULL;
1388         const struct common_firmware_header *header = NULL;
1389         const struct rlc_firmware_header_v2_0 *rlc_hdr;
1390         unsigned int *tmp = NULL;
1391         unsigned int i = 0;
1392         uint16_t version_major;
1393         uint16_t version_minor;
1394         uint32_t smu_version;
1395
1396         /*
1397          * For Picasso && AM4 SOCKET board, we use picasso_rlc_am4.bin
1398          * instead of picasso_rlc.bin.
1399          * Judgment method:
1400          * PCO AM4: revision >= 0xC8 && revision <= 0xCF
1401          *          or revision >= 0xD8 && revision <= 0xDF
1402          * otherwise is PCO FP5
1403          */
1404         if (!strcmp(chip_name, "picasso") &&
1405                 (((adev->pdev->revision >= 0xC8) && (adev->pdev->revision <= 0xCF)) ||
1406                 ((adev->pdev->revision >= 0xD8) && (adev->pdev->revision <= 0xDF))))
1407                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc_am4.bin", chip_name);
1408         else if (!strcmp(chip_name, "raven") && (amdgpu_pm_load_smu_firmware(adev, &smu_version) == 0) &&
1409                 (smu_version >= 0x41e2b))
1410                 /**
1411                 *SMC is loaded by SBIOS on APU and it's able to get the SMU version directly.
1412                 */
1413                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_kicker_rlc.bin", chip_name);
1414         else
1415                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
1416         err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
1417         if (err)
1418                 goto out;
1419         err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
1420         rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
1421
1422         version_major = le16_to_cpu(rlc_hdr->header.header_version_major);
1423         version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor);
1424         if (version_major == 2 && version_minor == 1)
1425                 adev->gfx.rlc.is_rlc_v2_1 = true;
1426
1427         adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
1428         adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
1429         adev->gfx.rlc.save_and_restore_offset =
1430                         le32_to_cpu(rlc_hdr->save_and_restore_offset);
1431         adev->gfx.rlc.clear_state_descriptor_offset =
1432                         le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
1433         adev->gfx.rlc.avail_scratch_ram_locations =
1434                         le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
1435         adev->gfx.rlc.reg_restore_list_size =
1436                         le32_to_cpu(rlc_hdr->reg_restore_list_size);
1437         adev->gfx.rlc.reg_list_format_start =
1438                         le32_to_cpu(rlc_hdr->reg_list_format_start);
1439         adev->gfx.rlc.reg_list_format_separate_start =
1440                         le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
1441         adev->gfx.rlc.starting_offsets_start =
1442                         le32_to_cpu(rlc_hdr->starting_offsets_start);
1443         adev->gfx.rlc.reg_list_format_size_bytes =
1444                         le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
1445         adev->gfx.rlc.reg_list_size_bytes =
1446                         le32_to_cpu(rlc_hdr->reg_list_size_bytes);
1447         adev->gfx.rlc.register_list_format =
1448                         kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
1449                                 adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
1450         if (!adev->gfx.rlc.register_list_format) {
1451                 err = -ENOMEM;
1452                 goto out;
1453         }
1454
1455         tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1456                         le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
1457         for (i = 0 ; i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2); i++)
1458                 adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]);
1459
1460         adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
1461
1462         tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1463                         le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
1464         for (i = 0 ; i < (adev->gfx.rlc.reg_list_size_bytes >> 2); i++)
1465                 adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
1466
1467         if (adev->gfx.rlc.is_rlc_v2_1)
1468                 gfx_v9_0_init_rlc_ext_microcode(adev);
1469
1470         if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
1471                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
1472                 info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
1473                 info->fw = adev->gfx.rlc_fw;
1474                 header = (const struct common_firmware_header *)info->fw->data;
1475                 adev->firmware.fw_size +=
1476                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1477
1478                 if (adev->gfx.rlc.is_rlc_v2_1 &&
1479                     adev->gfx.rlc.save_restore_list_cntl_size_bytes &&
1480                     adev->gfx.rlc.save_restore_list_gpm_size_bytes &&
1481                     adev->gfx.rlc.save_restore_list_srm_size_bytes) {
1482                         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL];
1483                         info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL;
1484                         info->fw = adev->gfx.rlc_fw;
1485                         adev->firmware.fw_size +=
1486                                 ALIGN(adev->gfx.rlc.save_restore_list_cntl_size_bytes, PAGE_SIZE);
1487
1488                         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM];
1489                         info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM;
1490                         info->fw = adev->gfx.rlc_fw;
1491                         adev->firmware.fw_size +=
1492                                 ALIGN(adev->gfx.rlc.save_restore_list_gpm_size_bytes, PAGE_SIZE);
1493
1494                         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM];
1495                         info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM;
1496                         info->fw = adev->gfx.rlc_fw;
1497                         adev->firmware.fw_size +=
1498                                 ALIGN(adev->gfx.rlc.save_restore_list_srm_size_bytes, PAGE_SIZE);
1499                 }
1500         }
1501
1502 out:
1503         if (err) {
1504                 dev_err(adev->dev,
1505                         "gfx9: Failed to load firmware \"%s\"\n",
1506                         fw_name);
1507                 release_firmware(adev->gfx.rlc_fw);
1508                 adev->gfx.rlc_fw = NULL;
1509         }
1510         return err;
1511 }
1512
1513 static int gfx_v9_0_init_cp_compute_microcode(struct amdgpu_device *adev,
1514                                           const char *chip_name)
1515 {
1516         char fw_name[30];
1517         int err;
1518         struct amdgpu_firmware_info *info = NULL;
1519         const struct common_firmware_header *header = NULL;
1520         const struct gfx_firmware_header_v1_0 *cp_hdr;
1521
1522         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
1523         err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1524         if (err)
1525                 goto out;
1526         err = amdgpu_ucode_validate(adev->gfx.mec_fw);
1527         if (err)
1528                 goto out;
1529         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1530         adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1531         adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1532
1533
1534         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1535         err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1536         if (!err) {
1537                 err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
1538                 if (err)
1539                         goto out;
1540                 cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1541                 adev->gfx.mec2_fw->data;
1542                 adev->gfx.mec2_fw_version =
1543                 le32_to_cpu(cp_hdr->header.ucode_version);
1544                 adev->gfx.mec2_feature_version =
1545                 le32_to_cpu(cp_hdr->ucode_feature_version);
1546         } else {
1547                 err = 0;
1548                 adev->gfx.mec2_fw = NULL;
1549         }
1550
1551         if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
1552                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
1553                 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
1554                 info->fw = adev->gfx.mec_fw;
1555                 header = (const struct common_firmware_header *)info->fw->data;
1556                 cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data;
1557                 adev->firmware.fw_size +=
1558                         ALIGN(le32_to_cpu(header->ucode_size_bytes) - le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
1559
1560                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1_JT];
1561                 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1_JT;
1562                 info->fw = adev->gfx.mec_fw;
1563                 adev->firmware.fw_size +=
1564                         ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
1565
1566                 if (adev->gfx.mec2_fw) {
1567                         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
1568                         info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
1569                         info->fw = adev->gfx.mec2_fw;
1570                         header = (const struct common_firmware_header *)info->fw->data;
1571                         cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data;
1572                         adev->firmware.fw_size +=
1573                                 ALIGN(le32_to_cpu(header->ucode_size_bytes) - le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
1574
1575                         /* TODO: Determine if MEC2 JT FW loading can be removed
1576                                  for all GFX V9 asic and above */
1577                         if (adev->asic_type != CHIP_ARCTURUS &&
1578                             adev->asic_type != CHIP_RENOIR) {
1579                                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2_JT];
1580                                 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2_JT;
1581                                 info->fw = adev->gfx.mec2_fw;
1582                                 adev->firmware.fw_size +=
1583                                         ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4,
1584                                         PAGE_SIZE);
1585                         }
1586                 }
1587         }
1588
1589 out:
1590         gfx_v9_0_check_if_need_gfxoff(adev);
1591         gfx_v9_0_check_fw_write_wait(adev);
1592         if (err) {
1593                 dev_err(adev->dev,
1594                         "gfx9: Failed to load firmware \"%s\"\n",
1595                         fw_name);
1596                 release_firmware(adev->gfx.mec_fw);
1597                 adev->gfx.mec_fw = NULL;
1598                 release_firmware(adev->gfx.mec2_fw);
1599                 adev->gfx.mec2_fw = NULL;
1600         }
1601         return err;
1602 }
1603
1604 static int gfx_v9_0_init_microcode(struct amdgpu_device *adev)
1605 {
1606         const char *chip_name;
1607         int r;
1608
1609         DRM_DEBUG("\n");
1610
1611         switch (adev->asic_type) {
1612         case CHIP_VEGA10:
1613                 chip_name = "vega10";
1614                 break;
1615         case CHIP_VEGA12:
1616                 chip_name = "vega12";
1617                 break;
1618         case CHIP_VEGA20:
1619                 chip_name = "vega20";
1620                 break;
1621         case CHIP_RAVEN:
1622                 if (adev->apu_flags & AMD_APU_IS_RAVEN2)
1623                         chip_name = "raven2";
1624                 else if (adev->apu_flags & AMD_APU_IS_PICASSO)
1625                         chip_name = "picasso";
1626                 else
1627                         chip_name = "raven";
1628                 break;
1629         case CHIP_ARCTURUS:
1630                 chip_name = "arcturus";
1631                 break;
1632         case CHIP_RENOIR:
1633                 chip_name = "renoir";
1634                 break;
1635         default:
1636                 BUG();
1637         }
1638
1639         /* No CPG in Arcturus */
1640         if (adev->asic_type != CHIP_ARCTURUS) {
1641                 r = gfx_v9_0_init_cp_gfx_microcode(adev, chip_name);
1642                 if (r)
1643                         return r;
1644         }
1645
1646         r = gfx_v9_0_init_rlc_microcode(adev, chip_name);
1647         if (r)
1648                 return r;
1649
1650         r = gfx_v9_0_init_cp_compute_microcode(adev, chip_name);
1651         if (r)
1652                 return r;
1653
1654         return r;
1655 }
1656
1657 static u32 gfx_v9_0_get_csb_size(struct amdgpu_device *adev)
1658 {
1659         u32 count = 0;
1660         const struct cs_section_def *sect = NULL;
1661         const struct cs_extent_def *ext = NULL;
1662
1663         /* begin clear state */
1664         count += 2;
1665         /* context control state */
1666         count += 3;
1667
1668         for (sect = gfx9_cs_data; sect->section != NULL; ++sect) {
1669                 for (ext = sect->section; ext->extent != NULL; ++ext) {
1670                         if (sect->id == SECT_CONTEXT)
1671                                 count += 2 + ext->reg_count;
1672                         else
1673                                 return 0;
1674                 }
1675         }
1676
1677         /* end clear state */
1678         count += 2;
1679         /* clear state */
1680         count += 2;
1681
1682         return count;
1683 }
1684
1685 static void gfx_v9_0_get_csb_buffer(struct amdgpu_device *adev,
1686                                     volatile u32 *buffer)
1687 {
1688         u32 count = 0, i;
1689         const struct cs_section_def *sect = NULL;
1690         const struct cs_extent_def *ext = NULL;
1691
1692         if (adev->gfx.rlc.cs_data == NULL)
1693                 return;
1694         if (buffer == NULL)
1695                 return;
1696
1697         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1698         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1699
1700         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
1701         buffer[count++] = cpu_to_le32(0x80000000);
1702         buffer[count++] = cpu_to_le32(0x80000000);
1703
1704         for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
1705                 for (ext = sect->section; ext->extent != NULL; ++ext) {
1706                         if (sect->id == SECT_CONTEXT) {
1707                                 buffer[count++] =
1708                                         cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
1709                                 buffer[count++] = cpu_to_le32(ext->reg_index -
1710                                                 PACKET3_SET_CONTEXT_REG_START);
1711                                 for (i = 0; i < ext->reg_count; i++)
1712                                         buffer[count++] = cpu_to_le32(ext->extent[i]);
1713                         } else {
1714                                 return;
1715                         }
1716                 }
1717         }
1718
1719         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1720         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
1721
1722         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
1723         buffer[count++] = cpu_to_le32(0);
1724 }
1725
1726 static void gfx_v9_0_init_always_on_cu_mask(struct amdgpu_device *adev)
1727 {
1728         struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
1729         uint32_t pg_always_on_cu_num = 2;
1730         uint32_t always_on_cu_num;
1731         uint32_t i, j, k;
1732         uint32_t mask, cu_bitmap, counter;
1733
1734         if (adev->flags & AMD_IS_APU)
1735                 always_on_cu_num = 4;
1736         else if (adev->asic_type == CHIP_VEGA12)
1737                 always_on_cu_num = 8;
1738         else
1739                 always_on_cu_num = 12;
1740
1741         mutex_lock(&adev->grbm_idx_mutex);
1742         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
1743                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
1744                         mask = 1;
1745                         cu_bitmap = 0;
1746                         counter = 0;
1747                         gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
1748
1749                         for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
1750                                 if (cu_info->bitmap[i][j] & mask) {
1751                                         if (counter == pg_always_on_cu_num)
1752                                                 WREG32_SOC15(GC, 0, mmRLC_PG_ALWAYS_ON_CU_MASK, cu_bitmap);
1753                                         if (counter < always_on_cu_num)
1754                                                 cu_bitmap |= mask;
1755                                         else
1756                                                 break;
1757                                         counter++;
1758                                 }
1759                                 mask <<= 1;
1760                         }
1761
1762                         WREG32_SOC15(GC, 0, mmRLC_LB_ALWAYS_ACTIVE_CU_MASK, cu_bitmap);
1763                         cu_info->ao_cu_bitmap[i][j] = cu_bitmap;
1764                 }
1765         }
1766         gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1767         mutex_unlock(&adev->grbm_idx_mutex);
1768 }
1769
1770 static void gfx_v9_0_init_lbpw(struct amdgpu_device *adev)
1771 {
1772         uint32_t data;
1773
1774         /* set mmRLC_LB_THR_CONFIG_1/2/3/4 */
1775         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F);
1776         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x0333A5A7);
1777         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077);
1778         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x30 | 0x40 << 8 | 0x02FA << 16));
1779
1780         /* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */
1781         WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000);
1782
1783         /* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */
1784         WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000500);
1785
1786         mutex_lock(&adev->grbm_idx_mutex);
1787         /* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/
1788         gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1789         WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff);
1790
1791         /* set mmRLC_LB_PARAMS = 0x003F_1006 */
1792         data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003);
1793         data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010);
1794         data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F);
1795         WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data);
1796
1797         /* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */
1798         data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7);
1799         data &= 0x0000FFFF;
1800         data |= 0x00C00000;
1801         WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data);
1802
1803         /*
1804          * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xF (4 CUs AON for Raven),
1805          * programmed in gfx_v9_0_init_always_on_cu_mask()
1806          */
1807
1808         /* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved,
1809          * but used for RLC_LB_CNTL configuration */
1810         data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK;
1811         data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09);
1812         data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000);
1813         WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data);
1814         mutex_unlock(&adev->grbm_idx_mutex);
1815
1816         gfx_v9_0_init_always_on_cu_mask(adev);
1817 }
1818
1819 static void gfx_v9_4_init_lbpw(struct amdgpu_device *adev)
1820 {
1821         uint32_t data;
1822
1823         /* set mmRLC_LB_THR_CONFIG_1/2/3/4 */
1824         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F);
1825         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x033388F8);
1826         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077);
1827         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x10 | 0x27 << 8 | 0x02FA << 16));
1828
1829         /* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */
1830         WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000);
1831
1832         /* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */
1833         WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000800);
1834
1835         mutex_lock(&adev->grbm_idx_mutex);
1836         /* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/
1837         gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1838         WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff);
1839
1840         /* set mmRLC_LB_PARAMS = 0x003F_1006 */
1841         data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003);
1842         data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010);
1843         data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F);
1844         WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data);
1845
1846         /* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */
1847         data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7);
1848         data &= 0x0000FFFF;
1849         data |= 0x00C00000;
1850         WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data);
1851
1852         /*
1853          * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xFFF (12 CUs AON),
1854          * programmed in gfx_v9_0_init_always_on_cu_mask()
1855          */
1856
1857         /* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved,
1858          * but used for RLC_LB_CNTL configuration */
1859         data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK;
1860         data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09);
1861         data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000);
1862         WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data);
1863         mutex_unlock(&adev->grbm_idx_mutex);
1864
1865         gfx_v9_0_init_always_on_cu_mask(adev);
1866 }
1867
1868 static void gfx_v9_0_enable_lbpw(struct amdgpu_device *adev, bool enable)
1869 {
1870         WREG32_FIELD15(GC, 0, RLC_LB_CNTL, LOAD_BALANCE_ENABLE, enable ? 1 : 0);
1871 }
1872
1873 static int gfx_v9_0_cp_jump_table_num(struct amdgpu_device *adev)
1874 {
1875         return 5;
1876 }
1877
1878 static int gfx_v9_0_rlc_init(struct amdgpu_device *adev)
1879 {
1880         const struct cs_section_def *cs_data;
1881         int r;
1882
1883         adev->gfx.rlc.cs_data = gfx9_cs_data;
1884
1885         cs_data = adev->gfx.rlc.cs_data;
1886
1887         if (cs_data) {
1888                 /* init clear state block */
1889                 r = amdgpu_gfx_rlc_init_csb(adev);
1890                 if (r)
1891                         return r;
1892         }
1893
1894         if (adev->flags & AMD_IS_APU) {
1895                 /* TODO: double check the cp_table_size for RV */
1896                 adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
1897                 r = amdgpu_gfx_rlc_init_cpt(adev);
1898                 if (r)
1899                         return r;
1900         }
1901
1902         switch (adev->asic_type) {
1903         case CHIP_RAVEN:
1904                 gfx_v9_0_init_lbpw(adev);
1905                 break;
1906         case CHIP_VEGA20:
1907                 gfx_v9_4_init_lbpw(adev);
1908                 break;
1909         default:
1910                 break;
1911         }
1912
1913         /* init spm vmid with 0xf */
1914         if (adev->gfx.rlc.funcs->update_spm_vmid)
1915                 adev->gfx.rlc.funcs->update_spm_vmid(adev, 0xf);
1916
1917         return 0;
1918 }
1919
1920 static void gfx_v9_0_mec_fini(struct amdgpu_device *adev)
1921 {
1922         amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
1923         amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_obj, NULL, NULL);
1924 }
1925
1926 static int gfx_v9_0_mec_init(struct amdgpu_device *adev)
1927 {
1928         int r;
1929         u32 *hpd;
1930         const __le32 *fw_data;
1931         unsigned fw_size;
1932         u32 *fw;
1933         size_t mec_hpd_size;
1934
1935         const struct gfx_firmware_header_v1_0 *mec_hdr;
1936
1937         bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
1938
1939         /* take ownership of the relevant compute queues */
1940         amdgpu_gfx_compute_queue_acquire(adev);
1941         mec_hpd_size = adev->gfx.num_compute_rings * GFX9_MEC_HPD_SIZE;
1942
1943         r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
1944                                       AMDGPU_GEM_DOMAIN_VRAM,
1945                                       &adev->gfx.mec.hpd_eop_obj,
1946                                       &adev->gfx.mec.hpd_eop_gpu_addr,
1947                                       (void **)&hpd);
1948         if (r) {
1949                 dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
1950                 gfx_v9_0_mec_fini(adev);
1951                 return r;
1952         }
1953
1954         memset(hpd, 0, mec_hpd_size);
1955
1956         amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
1957         amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1958
1959         mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1960
1961         fw_data = (const __le32 *)
1962                 (adev->gfx.mec_fw->data +
1963                  le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
1964         fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes);
1965
1966         r = amdgpu_bo_create_reserved(adev, mec_hdr->header.ucode_size_bytes,
1967                                       PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
1968                                       &adev->gfx.mec.mec_fw_obj,
1969                                       &adev->gfx.mec.mec_fw_gpu_addr,
1970                                       (void **)&fw);
1971         if (r) {
1972                 dev_warn(adev->dev, "(%d) create mec firmware bo failed\n", r);
1973                 gfx_v9_0_mec_fini(adev);
1974                 return r;
1975         }
1976
1977         memcpy(fw, fw_data, fw_size);
1978
1979         amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj);
1980         amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj);
1981
1982         return 0;
1983 }
1984
1985 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
1986 {
1987         WREG32_SOC15_RLC(GC, 0, mmSQ_IND_INDEX,
1988                 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
1989                 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
1990                 (address << SQ_IND_INDEX__INDEX__SHIFT) |
1991                 (SQ_IND_INDEX__FORCE_READ_MASK));
1992         return RREG32_SOC15(GC, 0, mmSQ_IND_DATA);
1993 }
1994
1995 static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
1996                            uint32_t wave, uint32_t thread,
1997                            uint32_t regno, uint32_t num, uint32_t *out)
1998 {
1999         WREG32_SOC15_RLC(GC, 0, mmSQ_IND_INDEX,
2000                 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
2001                 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
2002                 (regno << SQ_IND_INDEX__INDEX__SHIFT) |
2003                 (thread << SQ_IND_INDEX__THREAD_ID__SHIFT) |
2004                 (SQ_IND_INDEX__FORCE_READ_MASK) |
2005                 (SQ_IND_INDEX__AUTO_INCR_MASK));
2006         while (num--)
2007                 *(out++) = RREG32_SOC15(GC, 0, mmSQ_IND_DATA);
2008 }
2009
2010 static void gfx_v9_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
2011 {
2012         /* type 1 wave data */
2013         dst[(*no_fields)++] = 1;
2014         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS);
2015         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO);
2016         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI);
2017         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO);
2018         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI);
2019         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID);
2020         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0);
2021         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1);
2022         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC);
2023         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC);
2024         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS);
2025         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS);
2026         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0);
2027         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0);
2028 }
2029
2030 static void gfx_v9_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
2031                                      uint32_t wave, uint32_t start,
2032                                      uint32_t size, uint32_t *dst)
2033 {
2034         wave_read_regs(
2035                 adev, simd, wave, 0,
2036                 start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
2037 }
2038
2039 static void gfx_v9_0_read_wave_vgprs(struct amdgpu_device *adev, uint32_t simd,
2040                                      uint32_t wave, uint32_t thread,
2041                                      uint32_t start, uint32_t size,
2042                                      uint32_t *dst)
2043 {
2044         wave_read_regs(
2045                 adev, simd, wave, thread,
2046                 start + SQIND_WAVE_VGPRS_OFFSET, size, dst);
2047 }
2048
2049 static void gfx_v9_0_select_me_pipe_q(struct amdgpu_device *adev,
2050                                   u32 me, u32 pipe, u32 q, u32 vm)
2051 {
2052         soc15_grbm_select(adev, me, pipe, q, vm);
2053 }
2054
2055 static const struct amdgpu_gfx_funcs gfx_v9_0_gfx_funcs = {
2056         .get_gpu_clock_counter = &gfx_v9_0_get_gpu_clock_counter,
2057         .select_se_sh = &gfx_v9_0_select_se_sh,
2058         .read_wave_data = &gfx_v9_0_read_wave_data,
2059         .read_wave_sgprs = &gfx_v9_0_read_wave_sgprs,
2060         .read_wave_vgprs = &gfx_v9_0_read_wave_vgprs,
2061         .select_me_pipe_q = &gfx_v9_0_select_me_pipe_q,
2062         .ras_error_inject = &gfx_v9_0_ras_error_inject,
2063         .query_ras_error_count = &gfx_v9_0_query_ras_error_count,
2064         .reset_ras_error_count = &gfx_v9_0_reset_ras_error_count,
2065 };
2066
2067 static const struct amdgpu_gfx_funcs gfx_v9_4_gfx_funcs = {
2068         .get_gpu_clock_counter = &gfx_v9_0_get_gpu_clock_counter,
2069         .select_se_sh = &gfx_v9_0_select_se_sh,
2070         .read_wave_data = &gfx_v9_0_read_wave_data,
2071         .read_wave_sgprs = &gfx_v9_0_read_wave_sgprs,
2072         .read_wave_vgprs = &gfx_v9_0_read_wave_vgprs,
2073         .select_me_pipe_q = &gfx_v9_0_select_me_pipe_q,
2074         .ras_error_inject = &gfx_v9_4_ras_error_inject,
2075         .query_ras_error_count = &gfx_v9_4_query_ras_error_count,
2076         .reset_ras_error_count = &gfx_v9_4_reset_ras_error_count,
2077 };
2078
2079 static int gfx_v9_0_gpu_early_init(struct amdgpu_device *adev)
2080 {
2081         u32 gb_addr_config;
2082         int err;
2083
2084         adev->gfx.funcs = &gfx_v9_0_gfx_funcs;
2085
2086         switch (adev->asic_type) {
2087         case CHIP_VEGA10:
2088                 adev->gfx.config.max_hw_contexts = 8;
2089                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2090                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2091                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2092                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2093                 gb_addr_config = VEGA10_GB_ADDR_CONFIG_GOLDEN;
2094                 break;
2095         case CHIP_VEGA12:
2096                 adev->gfx.config.max_hw_contexts = 8;
2097                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2098                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2099                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2100                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2101                 gb_addr_config = VEGA12_GB_ADDR_CONFIG_GOLDEN;
2102                 DRM_INFO("fix gfx.config for vega12\n");
2103                 break;
2104         case CHIP_VEGA20:
2105                 adev->gfx.config.max_hw_contexts = 8;
2106                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2107                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2108                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2109                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2110                 gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
2111                 gb_addr_config &= ~0xf3e777ff;
2112                 gb_addr_config |= 0x22014042;
2113                 /* check vbios table if gpu info is not available */
2114                 err = amdgpu_atomfirmware_get_gfx_info(adev);
2115                 if (err)
2116                         return err;
2117                 break;
2118         case CHIP_RAVEN:
2119                 adev->gfx.config.max_hw_contexts = 8;
2120                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2121                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2122                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2123                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2124                 if (adev->apu_flags & AMD_APU_IS_RAVEN2)
2125                         gb_addr_config = RAVEN2_GB_ADDR_CONFIG_GOLDEN;
2126                 else
2127                         gb_addr_config = RAVEN_GB_ADDR_CONFIG_GOLDEN;
2128                 break;
2129         case CHIP_ARCTURUS:
2130                 adev->gfx.funcs = &gfx_v9_4_gfx_funcs;
2131                 adev->gfx.config.max_hw_contexts = 8;
2132                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2133                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2134                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2135                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2136                 gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
2137                 gb_addr_config &= ~0xf3e777ff;
2138                 gb_addr_config |= 0x22014042;
2139                 break;
2140         case CHIP_RENOIR:
2141                 adev->gfx.config.max_hw_contexts = 8;
2142                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2143                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2144                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x80;
2145                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2146                 gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
2147                 gb_addr_config &= ~0xf3e777ff;
2148                 gb_addr_config |= 0x22010042;
2149                 break;
2150         default:
2151                 BUG();
2152                 break;
2153         }
2154
2155         adev->gfx.config.gb_addr_config = gb_addr_config;
2156
2157         adev->gfx.config.gb_addr_config_fields.num_pipes = 1 <<
2158                         REG_GET_FIELD(
2159                                         adev->gfx.config.gb_addr_config,
2160                                         GB_ADDR_CONFIG,
2161                                         NUM_PIPES);
2162
2163         adev->gfx.config.max_tile_pipes =
2164                 adev->gfx.config.gb_addr_config_fields.num_pipes;
2165
2166         adev->gfx.config.gb_addr_config_fields.num_banks = 1 <<
2167                         REG_GET_FIELD(
2168                                         adev->gfx.config.gb_addr_config,
2169                                         GB_ADDR_CONFIG,
2170                                         NUM_BANKS);
2171         adev->gfx.config.gb_addr_config_fields.max_compress_frags = 1 <<
2172                         REG_GET_FIELD(
2173                                         adev->gfx.config.gb_addr_config,
2174                                         GB_ADDR_CONFIG,
2175                                         MAX_COMPRESSED_FRAGS);
2176         adev->gfx.config.gb_addr_config_fields.num_rb_per_se = 1 <<
2177                         REG_GET_FIELD(
2178                                         adev->gfx.config.gb_addr_config,
2179                                         GB_ADDR_CONFIG,
2180                                         NUM_RB_PER_SE);
2181         adev->gfx.config.gb_addr_config_fields.num_se = 1 <<
2182                         REG_GET_FIELD(
2183                                         adev->gfx.config.gb_addr_config,
2184                                         GB_ADDR_CONFIG,
2185                                         NUM_SHADER_ENGINES);
2186         adev->gfx.config.gb_addr_config_fields.pipe_interleave_size = 1 << (8 +
2187                         REG_GET_FIELD(
2188                                         adev->gfx.config.gb_addr_config,
2189                                         GB_ADDR_CONFIG,
2190                                         PIPE_INTERLEAVE_SIZE));
2191
2192         return 0;
2193 }
2194
2195 static int gfx_v9_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
2196                                       int mec, int pipe, int queue)
2197 {
2198         int r;
2199         unsigned irq_type;
2200         struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
2201         unsigned int hw_prio;
2202
2203         ring = &adev->gfx.compute_ring[ring_id];
2204
2205         /* mec0 is me1 */
2206         ring->me = mec + 1;
2207         ring->pipe = pipe;
2208         ring->queue = queue;
2209
2210         ring->ring_obj = NULL;
2211         ring->use_doorbell = true;
2212         ring->doorbell_index = (adev->doorbell_index.mec_ring0 + ring_id) << 1;
2213         ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
2214                                 + (ring_id * GFX9_MEC_HPD_SIZE);
2215         sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
2216
2217         irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
2218                 + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
2219                 + ring->pipe;
2220         hw_prio = amdgpu_gfx_is_high_priority_compute_queue(adev, ring->queue) ?
2221                         AMDGPU_GFX_PIPE_PRIO_HIGH : AMDGPU_GFX_PIPE_PRIO_NORMAL;
2222         /* type-2 packets are deprecated on MEC, use type-3 instead */
2223         r = amdgpu_ring_init(adev, ring, 1024,
2224                              &adev->gfx.eop_irq, irq_type, hw_prio);
2225         if (r)
2226                 return r;
2227
2228
2229         return 0;
2230 }
2231
2232 static int gfx_v9_0_sw_init(void *handle)
2233 {
2234         int i, j, k, r, ring_id;
2235         struct amdgpu_ring *ring;
2236         struct amdgpu_kiq *kiq;
2237         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2238
2239         switch (adev->asic_type) {
2240         case CHIP_VEGA10:
2241         case CHIP_VEGA12:
2242         case CHIP_VEGA20:
2243         case CHIP_RAVEN:
2244         case CHIP_ARCTURUS:
2245         case CHIP_RENOIR:
2246                 adev->gfx.mec.num_mec = 2;
2247                 break;
2248         default:
2249                 adev->gfx.mec.num_mec = 1;
2250                 break;
2251         }
2252
2253         adev->gfx.mec.num_pipe_per_mec = 4;
2254         adev->gfx.mec.num_queue_per_pipe = 8;
2255
2256         /* EOP Event */
2257         r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_EOP_INTERRUPT, &adev->gfx.eop_irq);
2258         if (r)
2259                 return r;
2260
2261         /* Privileged reg */
2262         r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_REG_FAULT,
2263                               &adev->gfx.priv_reg_irq);
2264         if (r)
2265                 return r;
2266
2267         /* Privileged inst */
2268         r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_INSTR_FAULT,
2269                               &adev->gfx.priv_inst_irq);
2270         if (r)
2271                 return r;
2272
2273         /* ECC error */
2274         r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_ECC_ERROR,
2275                               &adev->gfx.cp_ecc_error_irq);
2276         if (r)
2277                 return r;
2278
2279         /* FUE error */
2280         r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_FUE_ERROR,
2281                               &adev->gfx.cp_ecc_error_irq);
2282         if (r)
2283                 return r;
2284
2285         adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
2286
2287         gfx_v9_0_scratch_init(adev);
2288
2289         r = gfx_v9_0_init_microcode(adev);
2290         if (r) {
2291                 DRM_ERROR("Failed to load gfx firmware!\n");
2292                 return r;
2293         }
2294
2295         r = adev->gfx.rlc.funcs->init(adev);
2296         if (r) {
2297                 DRM_ERROR("Failed to init rlc BOs!\n");
2298                 return r;
2299         }
2300
2301         r = gfx_v9_0_mec_init(adev);
2302         if (r) {
2303                 DRM_ERROR("Failed to init MEC BOs!\n");
2304                 return r;
2305         }
2306
2307         /* set up the gfx ring */
2308         for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
2309                 ring = &adev->gfx.gfx_ring[i];
2310                 ring->ring_obj = NULL;
2311                 if (!i)
2312                         sprintf(ring->name, "gfx");
2313                 else
2314                         sprintf(ring->name, "gfx_%d", i);
2315                 ring->use_doorbell = true;
2316                 ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1;
2317                 r = amdgpu_ring_init(adev, ring, 1024,
2318                                      &adev->gfx.eop_irq,
2319                                      AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP,
2320                                      AMDGPU_RING_PRIO_DEFAULT);
2321                 if (r)
2322                         return r;
2323         }
2324
2325         /* set up the compute queues - allocate horizontally across pipes */
2326         ring_id = 0;
2327         for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
2328                 for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
2329                         for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
2330                                 if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, j))
2331                                         continue;
2332
2333                                 r = gfx_v9_0_compute_ring_init(adev,
2334                                                                ring_id,
2335                                                                i, k, j);
2336                                 if (r)
2337                                         return r;
2338
2339                                 ring_id++;
2340                         }
2341                 }
2342         }
2343
2344         r = amdgpu_gfx_kiq_init(adev, GFX9_MEC_HPD_SIZE);
2345         if (r) {
2346                 DRM_ERROR("Failed to init KIQ BOs!\n");
2347                 return r;
2348         }
2349
2350         kiq = &adev->gfx.kiq;
2351         r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
2352         if (r)
2353                 return r;
2354
2355         /* create MQD for all compute queues as wel as KIQ for SRIOV case */
2356         r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct v9_mqd_allocation));
2357         if (r)
2358                 return r;
2359
2360         adev->gfx.ce_ram_size = 0x8000;
2361
2362         r = gfx_v9_0_gpu_early_init(adev);
2363         if (r)
2364                 return r;
2365
2366         return 0;
2367 }
2368
2369
2370 static int gfx_v9_0_sw_fini(void *handle)
2371 {
2372         int i;
2373         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2374
2375         amdgpu_gfx_ras_fini(adev);
2376
2377         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2378                 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
2379         for (i = 0; i < adev->gfx.num_compute_rings; i++)
2380                 amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
2381
2382         amdgpu_gfx_mqd_sw_fini(adev);
2383         amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring);
2384         amdgpu_gfx_kiq_fini(adev);
2385
2386         gfx_v9_0_mec_fini(adev);
2387         amdgpu_bo_unref(&adev->gfx.rlc.clear_state_obj);
2388         if (adev->flags & AMD_IS_APU) {
2389                 amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj,
2390                                 &adev->gfx.rlc.cp_table_gpu_addr,
2391                                 (void **)&adev->gfx.rlc.cp_table_ptr);
2392         }
2393         gfx_v9_0_free_microcode(adev);
2394
2395         return 0;
2396 }
2397
2398
2399 static void gfx_v9_0_tiling_mode_table_init(struct amdgpu_device *adev)
2400 {
2401         /* TODO */
2402 }
2403
2404 static void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num, u32 instance)
2405 {
2406         u32 data;
2407
2408         if (instance == 0xffffffff)
2409                 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
2410         else
2411                 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
2412
2413         if (se_num == 0xffffffff)
2414                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
2415         else
2416                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
2417
2418         if (sh_num == 0xffffffff)
2419                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
2420         else
2421                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
2422
2423         WREG32_SOC15_RLC_SHADOW(GC, 0, mmGRBM_GFX_INDEX, data);
2424 }
2425
2426 static u32 gfx_v9_0_get_rb_active_bitmap(struct amdgpu_device *adev)
2427 {
2428         u32 data, mask;
2429
2430         data = RREG32_SOC15(GC, 0, mmCC_RB_BACKEND_DISABLE);
2431         data |= RREG32_SOC15(GC, 0, mmGC_USER_RB_BACKEND_DISABLE);
2432
2433         data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK;
2434         data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT;
2435
2436         mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se /
2437                                          adev->gfx.config.max_sh_per_se);
2438
2439         return (~data) & mask;
2440 }
2441
2442 static void gfx_v9_0_setup_rb(struct amdgpu_device *adev)
2443 {
2444         int i, j;
2445         u32 data;
2446         u32 active_rbs = 0;
2447         u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
2448                                         adev->gfx.config.max_sh_per_se;
2449
2450         mutex_lock(&adev->grbm_idx_mutex);
2451         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
2452                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
2453                         gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
2454                         data = gfx_v9_0_get_rb_active_bitmap(adev);
2455                         active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
2456                                                rb_bitmap_width_per_sh);
2457                 }
2458         }
2459         gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
2460         mutex_unlock(&adev->grbm_idx_mutex);
2461
2462         adev->gfx.config.backend_enable_mask = active_rbs;
2463         adev->gfx.config.num_rbs = hweight32(active_rbs);
2464 }
2465
2466 #define DEFAULT_SH_MEM_BASES    (0x6000)
2467 static void gfx_v9_0_init_compute_vmid(struct amdgpu_device *adev)
2468 {
2469         int i;
2470         uint32_t sh_mem_config;
2471         uint32_t sh_mem_bases;
2472
2473         /*
2474          * Configure apertures:
2475          * LDS:         0x60000000'00000000 - 0x60000001'00000000 (4GB)
2476          * Scratch:     0x60000001'00000000 - 0x60000002'00000000 (4GB)
2477          * GPUVM:       0x60010000'00000000 - 0x60020000'00000000 (1TB)
2478          */
2479         sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
2480
2481         sh_mem_config = SH_MEM_ADDRESS_MODE_64 |
2482                         SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
2483                         SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT;
2484
2485         mutex_lock(&adev->srbm_mutex);
2486         for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
2487                 soc15_grbm_select(adev, 0, 0, 0, i);
2488                 /* CP and shaders */
2489                 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, sh_mem_config);
2490                 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, sh_mem_bases);
2491         }
2492         soc15_grbm_select(adev, 0, 0, 0, 0);
2493         mutex_unlock(&adev->srbm_mutex);
2494
2495         /* Initialize all compute VMIDs to have no GDS, GWS, or OA
2496            acccess. These should be enabled by FW for target VMIDs. */
2497         for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
2498                 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * i, 0);
2499                 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * i, 0);
2500                 WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, i, 0);
2501                 WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, i, 0);
2502         }
2503 }
2504
2505 static void gfx_v9_0_init_gds_vmid(struct amdgpu_device *adev)
2506 {
2507         int vmid;
2508
2509         /*
2510          * Initialize all compute and user-gfx VMIDs to have no GDS, GWS, or OA
2511          * access. Compute VMIDs should be enabled by FW for target VMIDs,
2512          * the driver can enable them for graphics. VMID0 should maintain
2513          * access so that HWS firmware can save/restore entries.
2514          */
2515         for (vmid = 1; vmid < 16; vmid++) {
2516                 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * vmid, 0);
2517                 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * vmid, 0);
2518                 WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, vmid, 0);
2519                 WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, vmid, 0);
2520         }
2521 }
2522
2523 static void gfx_v9_0_init_sq_config(struct amdgpu_device *adev)
2524 {
2525         uint32_t tmp;
2526
2527         switch (adev->asic_type) {
2528         case CHIP_ARCTURUS:
2529                 tmp = RREG32_SOC15(GC, 0, mmSQ_CONFIG);
2530                 tmp = REG_SET_FIELD(tmp, SQ_CONFIG,
2531                                         DISABLE_BARRIER_WAITCNT, 1);
2532                 WREG32_SOC15(GC, 0, mmSQ_CONFIG, tmp);
2533                 break;
2534         default:
2535                 break;
2536         }
2537 }
2538
2539 static void gfx_v9_0_constants_init(struct amdgpu_device *adev)
2540 {
2541         u32 tmp;
2542         int i;
2543
2544         WREG32_FIELD15_RLC(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff);
2545
2546         gfx_v9_0_tiling_mode_table_init(adev);
2547
2548         gfx_v9_0_setup_rb(adev);
2549         gfx_v9_0_get_cu_info(adev, &adev->gfx.cu_info);
2550         adev->gfx.config.db_debug2 = RREG32_SOC15(GC, 0, mmDB_DEBUG2);
2551
2552         /* XXX SH_MEM regs */
2553         /* where to put LDS, scratch, GPUVM in FSA64 space */
2554         mutex_lock(&adev->srbm_mutex);
2555         for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB_0].num_ids; i++) {
2556                 soc15_grbm_select(adev, 0, 0, 0, i);
2557                 /* CP and shaders */
2558                 if (i == 0) {
2559                         tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
2560                                             SH_MEM_ALIGNMENT_MODE_UNALIGNED);
2561                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE,
2562                                             !!amdgpu_noretry);
2563                         WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp);
2564                         WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, 0);
2565                 } else {
2566                         tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
2567                                             SH_MEM_ALIGNMENT_MODE_UNALIGNED);
2568                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE,
2569                                             !!amdgpu_noretry);
2570                         WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp);
2571                         tmp = REG_SET_FIELD(0, SH_MEM_BASES, PRIVATE_BASE,
2572                                 (adev->gmc.private_aperture_start >> 48));
2573                         tmp = REG_SET_FIELD(tmp, SH_MEM_BASES, SHARED_BASE,
2574                                 (adev->gmc.shared_aperture_start >> 48));
2575                         WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, tmp);
2576                 }
2577         }
2578         soc15_grbm_select(adev, 0, 0, 0, 0);
2579
2580         mutex_unlock(&adev->srbm_mutex);
2581
2582         gfx_v9_0_init_compute_vmid(adev);
2583         gfx_v9_0_init_gds_vmid(adev);
2584         gfx_v9_0_init_sq_config(adev);
2585 }
2586
2587 static void gfx_v9_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
2588 {
2589         u32 i, j, k;
2590         u32 mask;
2591
2592         mutex_lock(&adev->grbm_idx_mutex);
2593         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
2594                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
2595                         gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
2596                         for (k = 0; k < adev->usec_timeout; k++) {
2597                                 if (RREG32_SOC15(GC, 0, mmRLC_SERDES_CU_MASTER_BUSY) == 0)
2598                                         break;
2599                                 udelay(1);
2600                         }
2601                         if (k == adev->usec_timeout) {
2602                                 gfx_v9_0_select_se_sh(adev, 0xffffffff,
2603                                                       0xffffffff, 0xffffffff);
2604                                 mutex_unlock(&adev->grbm_idx_mutex);
2605                                 DRM_INFO("Timeout wait for RLC serdes %u,%u\n",
2606                                          i, j);
2607                                 return;
2608                         }
2609                 }
2610         }
2611         gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
2612         mutex_unlock(&adev->grbm_idx_mutex);
2613
2614         mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
2615                 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
2616                 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
2617                 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
2618         for (k = 0; k < adev->usec_timeout; k++) {
2619                 if ((RREG32_SOC15(GC, 0, mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
2620                         break;
2621                 udelay(1);
2622         }
2623 }
2624
2625 static void gfx_v9_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
2626                                                bool enable)
2627 {
2628         u32 tmp = RREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0);
2629
2630         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
2631         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
2632         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
2633         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
2634
2635         WREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0, tmp);
2636 }
2637
2638 static void gfx_v9_0_init_csb(struct amdgpu_device *adev)
2639 {
2640         adev->gfx.rlc.funcs->get_csb_buffer(adev, adev->gfx.rlc.cs_ptr);
2641         /* csib */
2642         WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_HI),
2643                         adev->gfx.rlc.clear_state_gpu_addr >> 32);
2644         WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_LO),
2645                         adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
2646         WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_LENGTH),
2647                         adev->gfx.rlc.clear_state_size);
2648 }
2649
2650 static void gfx_v9_1_parse_ind_reg_list(int *register_list_format,
2651                                 int indirect_offset,
2652                                 int list_size,
2653                                 int *unique_indirect_regs,
2654                                 int unique_indirect_reg_count,
2655                                 int *indirect_start_offsets,
2656                                 int *indirect_start_offsets_count,
2657                                 int max_start_offsets_count)
2658 {
2659         int idx;
2660
2661         for (; indirect_offset < list_size; indirect_offset++) {
2662                 WARN_ON(*indirect_start_offsets_count >= max_start_offsets_count);
2663                 indirect_start_offsets[*indirect_start_offsets_count] = indirect_offset;
2664                 *indirect_start_offsets_count = *indirect_start_offsets_count + 1;
2665
2666                 while (register_list_format[indirect_offset] != 0xFFFFFFFF) {
2667                         indirect_offset += 2;
2668
2669                         /* look for the matching indice */
2670                         for (idx = 0; idx < unique_indirect_reg_count; idx++) {
2671                                 if (unique_indirect_regs[idx] ==
2672                                         register_list_format[indirect_offset] ||
2673                                         !unique_indirect_regs[idx])
2674                                         break;
2675                         }
2676
2677                         BUG_ON(idx >= unique_indirect_reg_count);
2678
2679                         if (!unique_indirect_regs[idx])
2680                                 unique_indirect_regs[idx] = register_list_format[indirect_offset];
2681
2682                         indirect_offset++;
2683                 }
2684         }
2685 }
2686
2687 static int gfx_v9_1_init_rlc_save_restore_list(struct amdgpu_device *adev)
2688 {
2689         int unique_indirect_regs[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
2690         int unique_indirect_reg_count = 0;
2691
2692         int indirect_start_offsets[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
2693         int indirect_start_offsets_count = 0;
2694
2695         int list_size = 0;
2696         int i = 0, j = 0;
2697         u32 tmp = 0;
2698
2699         u32 *register_list_format =
2700                 kmemdup(adev->gfx.rlc.register_list_format,
2701                         adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
2702         if (!register_list_format)
2703                 return -ENOMEM;
2704
2705         /* setup unique_indirect_regs array and indirect_start_offsets array */
2706         unique_indirect_reg_count = ARRAY_SIZE(unique_indirect_regs);
2707         gfx_v9_1_parse_ind_reg_list(register_list_format,
2708                                     adev->gfx.rlc.reg_list_format_direct_reg_list_length,
2709                                     adev->gfx.rlc.reg_list_format_size_bytes >> 2,
2710                                     unique_indirect_regs,
2711                                     unique_indirect_reg_count,
2712                                     indirect_start_offsets,
2713                                     &indirect_start_offsets_count,
2714                                     ARRAY_SIZE(indirect_start_offsets));
2715
2716         /* enable auto inc in case it is disabled */
2717         tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL));
2718         tmp |= RLC_SRM_CNTL__AUTO_INCR_ADDR_MASK;
2719         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL), tmp);
2720
2721         /* write register_restore table to offset 0x0 using RLC_SRM_ARAM_ADDR/DATA */
2722         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_ADDR),
2723                 RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET);
2724         for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
2725                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_DATA),
2726                         adev->gfx.rlc.register_restore[i]);
2727
2728         /* load indirect register */
2729         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2730                 adev->gfx.rlc.reg_list_format_start);
2731
2732         /* direct register portion */
2733         for (i = 0; i < adev->gfx.rlc.reg_list_format_direct_reg_list_length; i++)
2734                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA),
2735                         register_list_format[i]);
2736
2737         /* indirect register portion */
2738         while (i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2)) {
2739                 if (register_list_format[i] == 0xFFFFFFFF) {
2740                         WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2741                         continue;
2742                 }
2743
2744                 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2745                 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2746
2747                 for (j = 0; j < unique_indirect_reg_count; j++) {
2748                         if (register_list_format[i] == unique_indirect_regs[j]) {
2749                                 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, j);
2750                                 break;
2751                         }
2752                 }
2753
2754                 BUG_ON(j >= unique_indirect_reg_count);
2755
2756                 i++;
2757         }
2758
2759         /* set save/restore list size */
2760         list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
2761         list_size = list_size >> 1;
2762         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2763                 adev->gfx.rlc.reg_restore_list_size);
2764         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA), list_size);
2765
2766         /* write the starting offsets to RLC scratch ram */
2767         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2768                 adev->gfx.rlc.starting_offsets_start);
2769         for (i = 0; i < ARRAY_SIZE(indirect_start_offsets); i++)
2770                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA),
2771                        indirect_start_offsets[i]);
2772
2773         /* load unique indirect regs*/
2774         for (i = 0; i < ARRAY_SIZE(unique_indirect_regs); i++) {
2775                 if (unique_indirect_regs[i] != 0) {
2776                         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_ADDR_0)
2777                                + GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[i],
2778                                unique_indirect_regs[i] & 0x3FFFF);
2779
2780                         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_DATA_0)
2781                                + GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[i],
2782                                unique_indirect_regs[i] >> 20);
2783                 }
2784         }
2785
2786         kfree(register_list_format);
2787         return 0;
2788 }
2789
2790 static void gfx_v9_0_enable_save_restore_machine(struct amdgpu_device *adev)
2791 {
2792         WREG32_FIELD15(GC, 0, RLC_SRM_CNTL, SRM_ENABLE, 1);
2793 }
2794
2795 static void pwr_10_0_gfxip_control_over_cgpg(struct amdgpu_device *adev,
2796                                              bool enable)
2797 {
2798         uint32_t data = 0;
2799         uint32_t default_data = 0;
2800
2801         default_data = data = RREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS));
2802         if (enable == true) {
2803                 /* enable GFXIP control over CGPG */
2804                 data |= PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK;
2805                 if(default_data != data)
2806                         WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2807
2808                 /* update status */
2809                 data &= ~PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK;
2810                 data |= (2 << PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT);
2811                 if(default_data != data)
2812                         WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2813         } else {
2814                 /* restore GFXIP control over GCPG */
2815                 data &= ~PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK;
2816                 if(default_data != data)
2817                         WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2818         }
2819 }
2820
2821 static void gfx_v9_0_init_gfx_power_gating(struct amdgpu_device *adev)
2822 {
2823         uint32_t data = 0;
2824
2825         if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
2826                               AMD_PG_SUPPORT_GFX_SMG |
2827                               AMD_PG_SUPPORT_GFX_DMG)) {
2828                 /* init IDLE_POLL_COUNT = 60 */
2829                 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL));
2830                 data &= ~CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT_MASK;
2831                 data |= (0x60 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
2832                 WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL), data);
2833
2834                 /* init RLC PG Delay */
2835                 data = 0;
2836                 data |= (0x10 << RLC_PG_DELAY__POWER_UP_DELAY__SHIFT);
2837                 data |= (0x10 << RLC_PG_DELAY__POWER_DOWN_DELAY__SHIFT);
2838                 data |= (0x10 << RLC_PG_DELAY__CMD_PROPAGATE_DELAY__SHIFT);
2839                 data |= (0x40 << RLC_PG_DELAY__MEM_SLEEP_DELAY__SHIFT);
2840                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY), data);
2841
2842                 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2));
2843                 data &= ~RLC_PG_DELAY_2__SERDES_CMD_DELAY_MASK;
2844                 data |= (0x4 << RLC_PG_DELAY_2__SERDES_CMD_DELAY__SHIFT);
2845                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2), data);
2846
2847                 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3));
2848                 data &= ~RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG_MASK;
2849                 data |= (0xff << RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG__SHIFT);
2850                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3), data);
2851
2852                 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL));
2853                 data &= ~RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD_MASK;
2854
2855                 /* program GRBM_REG_SAVE_GFX_IDLE_THRESHOLD to 0x55f0 */
2856                 data |= (0x55f0 << RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD__SHIFT);
2857                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL), data);
2858                 if (adev->asic_type != CHIP_RENOIR)
2859                         pwr_10_0_gfxip_control_over_cgpg(adev, true);
2860         }
2861 }
2862
2863 static void gfx_v9_0_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
2864                                                 bool enable)
2865 {
2866         uint32_t data = 0;
2867         uint32_t default_data = 0;
2868
2869         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2870         data = REG_SET_FIELD(data, RLC_PG_CNTL,
2871                              SMU_CLK_SLOWDOWN_ON_PU_ENABLE,
2872                              enable ? 1 : 0);
2873         if (default_data != data)
2874                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2875 }
2876
2877 static void gfx_v9_0_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
2878                                                 bool enable)
2879 {
2880         uint32_t data = 0;
2881         uint32_t default_data = 0;
2882
2883         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2884         data = REG_SET_FIELD(data, RLC_PG_CNTL,
2885                              SMU_CLK_SLOWDOWN_ON_PD_ENABLE,
2886                              enable ? 1 : 0);
2887         if(default_data != data)
2888                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2889 }
2890
2891 static void gfx_v9_0_enable_cp_power_gating(struct amdgpu_device *adev,
2892                                         bool enable)
2893 {
2894         uint32_t data = 0;
2895         uint32_t default_data = 0;
2896
2897         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2898         data = REG_SET_FIELD(data, RLC_PG_CNTL,
2899                              CP_PG_DISABLE,
2900                              enable ? 0 : 1);
2901         if(default_data != data)
2902                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2903 }
2904
2905 static void gfx_v9_0_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
2906                                                 bool enable)
2907 {
2908         uint32_t data, default_data;
2909
2910         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2911         data = REG_SET_FIELD(data, RLC_PG_CNTL,
2912                              GFX_POWER_GATING_ENABLE,
2913                              enable ? 1 : 0);
2914         if(default_data != data)
2915                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2916 }
2917
2918 static void gfx_v9_0_enable_gfx_pipeline_powergating(struct amdgpu_device *adev,
2919                                                 bool enable)
2920 {
2921         uint32_t data, default_data;
2922
2923         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2924         data = REG_SET_FIELD(data, RLC_PG_CNTL,
2925                              GFX_PIPELINE_PG_ENABLE,
2926                              enable ? 1 : 0);
2927         if(default_data != data)
2928                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2929
2930         if (!enable)
2931                 /* read any GFX register to wake up GFX */
2932                 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmDB_RENDER_CONTROL));
2933 }
2934
2935 static void gfx_v9_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
2936                                                        bool enable)
2937 {
2938         uint32_t data, default_data;
2939
2940         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2941         data = REG_SET_FIELD(data, RLC_PG_CNTL,
2942                              STATIC_PER_CU_PG_ENABLE,
2943                              enable ? 1 : 0);
2944         if(default_data != data)
2945                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2946 }
2947
2948 static void gfx_v9_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
2949                                                 bool enable)
2950 {
2951         uint32_t data, default_data;
2952
2953         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2954         data = REG_SET_FIELD(data, RLC_PG_CNTL,
2955                              DYN_PER_CU_PG_ENABLE,
2956                              enable ? 1 : 0);
2957         if(default_data != data)
2958                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2959 }
2960
2961 static void gfx_v9_0_init_pg(struct amdgpu_device *adev)
2962 {
2963         gfx_v9_0_init_csb(adev);
2964
2965         /*
2966          * Rlc save restore list is workable since v2_1.
2967          * And it's needed by gfxoff feature.
2968          */
2969         if (adev->gfx.rlc.is_rlc_v2_1) {
2970                 if (adev->asic_type == CHIP_VEGA12 ||
2971                     (adev->apu_flags & AMD_APU_IS_RAVEN2))
2972                         gfx_v9_1_init_rlc_save_restore_list(adev);
2973                 gfx_v9_0_enable_save_restore_machine(adev);
2974         }
2975
2976         if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
2977                               AMD_PG_SUPPORT_GFX_SMG |
2978                               AMD_PG_SUPPORT_GFX_DMG |
2979                               AMD_PG_SUPPORT_CP |
2980                               AMD_PG_SUPPORT_GDS |
2981                               AMD_PG_SUPPORT_RLC_SMU_HS)) {
2982                 WREG32(mmRLC_JUMP_TABLE_RESTORE,
2983                        adev->gfx.rlc.cp_table_gpu_addr >> 8);
2984                 gfx_v9_0_init_gfx_power_gating(adev);
2985         }
2986 }
2987
2988 void gfx_v9_0_rlc_stop(struct amdgpu_device *adev)
2989 {
2990         WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 0);
2991         gfx_v9_0_enable_gui_idle_interrupt(adev, false);
2992         gfx_v9_0_wait_for_rlc_serdes(adev);
2993 }
2994
2995 static void gfx_v9_0_rlc_reset(struct amdgpu_device *adev)
2996 {
2997         WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
2998         udelay(50);
2999         WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
3000         udelay(50);
3001 }
3002
3003 static void gfx_v9_0_rlc_start(struct amdgpu_device *adev)
3004 {
3005 #ifdef AMDGPU_RLC_DEBUG_RETRY
3006         u32 rlc_ucode_ver;
3007 #endif
3008
3009         WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 1);
3010         udelay(50);
3011
3012         /* carrizo do enable cp interrupt after cp inited */
3013         if (!(adev->flags & AMD_IS_APU)) {
3014                 gfx_v9_0_enable_gui_idle_interrupt(adev, true);
3015                 udelay(50);
3016         }
3017
3018 #ifdef AMDGPU_RLC_DEBUG_RETRY
3019         /* RLC_GPM_GENERAL_6 : RLC Ucode version */
3020         rlc_ucode_ver = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_6);
3021         if(rlc_ucode_ver == 0x108) {
3022                 DRM_INFO("Using rlc debug ucode. mmRLC_GPM_GENERAL_6 ==0x08%x / fw_ver == %i \n",
3023                                 rlc_ucode_ver, adev->gfx.rlc_fw_version);
3024                 /* RLC_GPM_TIMER_INT_3 : Timer interval in RefCLK cycles,
3025                  * default is 0x9C4 to create a 100us interval */
3026                 WREG32_SOC15(GC, 0, mmRLC_GPM_TIMER_INT_3, 0x9C4);
3027                 /* RLC_GPM_GENERAL_12 : Minimum gap between wptr and rptr
3028                  * to disable the page fault retry interrupts, default is
3029                  * 0x100 (256) */
3030                 WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_12, 0x100);
3031         }
3032 #endif
3033 }
3034
3035 static int gfx_v9_0_rlc_load_microcode(struct amdgpu_device *adev)
3036 {
3037         const struct rlc_firmware_header_v2_0 *hdr;
3038         const __le32 *fw_data;
3039         unsigned i, fw_size;
3040
3041         if (!adev->gfx.rlc_fw)
3042                 return -EINVAL;
3043
3044         hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
3045         amdgpu_ucode_print_rlc_hdr(&hdr->header);
3046
3047         fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
3048                            le32_to_cpu(hdr->header.ucode_array_offset_bytes));
3049         fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
3050
3051         WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR,
3052                         RLCG_UCODE_LOADING_START_ADDRESS);
3053         for (i = 0; i < fw_size; i++)
3054                 WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
3055         WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
3056
3057         return 0;
3058 }
3059
3060 static int gfx_v9_0_rlc_resume(struct amdgpu_device *adev)
3061 {
3062         int r;
3063
3064         if (amdgpu_sriov_vf(adev)) {
3065                 gfx_v9_0_init_csb(adev);
3066                 return 0;
3067         }
3068
3069         adev->gfx.rlc.funcs->stop(adev);
3070
3071         /* disable CG */
3072         WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, 0);
3073
3074         gfx_v9_0_init_pg(adev);
3075
3076         if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
3077                 /* legacy rlc firmware loading */
3078                 r = gfx_v9_0_rlc_load_microcode(adev);
3079                 if (r)
3080                         return r;
3081         }
3082
3083         switch (adev->asic_type) {
3084         case CHIP_RAVEN:
3085                 if (amdgpu_lbpw == 0)
3086                         gfx_v9_0_enable_lbpw(adev, false);
3087                 else
3088                         gfx_v9_0_enable_lbpw(adev, true);
3089                 break;
3090         case CHIP_VEGA20:
3091                 if (amdgpu_lbpw > 0)
3092                         gfx_v9_0_enable_lbpw(adev, true);
3093                 else
3094                         gfx_v9_0_enable_lbpw(adev, false);
3095                 break;
3096         default:
3097                 break;
3098         }
3099
3100         adev->gfx.rlc.funcs->start(adev);
3101
3102         return 0;
3103 }
3104
3105 static void gfx_v9_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
3106 {
3107         u32 tmp = RREG32_SOC15(GC, 0, mmCP_ME_CNTL);
3108
3109         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, enable ? 0 : 1);
3110         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, enable ? 0 : 1);
3111         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, enable ? 0 : 1);
3112         WREG32_SOC15_RLC(GC, 0, mmCP_ME_CNTL, tmp);
3113         udelay(50);
3114 }
3115
3116 static int gfx_v9_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
3117 {
3118         const struct gfx_firmware_header_v1_0 *pfp_hdr;
3119         const struct gfx_firmware_header_v1_0 *ce_hdr;
3120         const struct gfx_firmware_header_v1_0 *me_hdr;
3121         const __le32 *fw_data;
3122         unsigned i, fw_size;
3123
3124         if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
3125                 return -EINVAL;
3126
3127         pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
3128                 adev->gfx.pfp_fw->data;
3129         ce_hdr = (const struct gfx_firmware_header_v1_0 *)
3130                 adev->gfx.ce_fw->data;
3131         me_hdr = (const struct gfx_firmware_header_v1_0 *)
3132                 adev->gfx.me_fw->data;
3133
3134         amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
3135         amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
3136         amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
3137
3138         gfx_v9_0_cp_gfx_enable(adev, false);
3139
3140         /* PFP */
3141         fw_data = (const __le32 *)
3142                 (adev->gfx.pfp_fw->data +
3143                  le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
3144         fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
3145         WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, 0);
3146         for (i = 0; i < fw_size; i++)
3147                 WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
3148         WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
3149
3150         /* CE */
3151         fw_data = (const __le32 *)
3152                 (adev->gfx.ce_fw->data +
3153                  le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
3154         fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
3155         WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, 0);
3156         for (i = 0; i < fw_size; i++)
3157                 WREG32_SOC15(GC, 0, mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
3158         WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
3159
3160         /* ME */
3161         fw_data = (const __le32 *)
3162                 (adev->gfx.me_fw->data +
3163                  le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
3164         fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
3165         WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, 0);
3166         for (i = 0; i < fw_size; i++)
3167                 WREG32_SOC15(GC, 0, mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
3168         WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version);
3169
3170         return 0;
3171 }
3172
3173 static int gfx_v9_0_cp_gfx_start(struct amdgpu_device *adev)
3174 {
3175         struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
3176         const struct cs_section_def *sect = NULL;
3177         const struct cs_extent_def *ext = NULL;
3178         int r, i, tmp;
3179
3180         /* init the CP */
3181         WREG32_SOC15(GC, 0, mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
3182         WREG32_SOC15(GC, 0, mmCP_DEVICE_ID, 1);
3183
3184         gfx_v9_0_cp_gfx_enable(adev, true);
3185
3186         r = amdgpu_ring_alloc(ring, gfx_v9_0_get_csb_size(adev) + 4 + 3);
3187         if (r) {
3188                 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
3189                 return r;
3190         }
3191
3192         amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3193         amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3194
3195         amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
3196         amdgpu_ring_write(ring, 0x80000000);
3197         amdgpu_ring_write(ring, 0x80000000);
3198
3199         for (sect = gfx9_cs_data; sect->section != NULL; ++sect) {
3200                 for (ext = sect->section; ext->extent != NULL; ++ext) {
3201                         if (sect->id == SECT_CONTEXT) {
3202                                 amdgpu_ring_write(ring,
3203                                        PACKET3(PACKET3_SET_CONTEXT_REG,
3204                                                ext->reg_count));
3205                                 amdgpu_ring_write(ring,
3206                                        ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
3207                                 for (i = 0; i < ext->reg_count; i++)
3208                                         amdgpu_ring_write(ring, ext->extent[i]);
3209                         }
3210                 }
3211         }
3212
3213         amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3214         amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
3215
3216         amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3217         amdgpu_ring_write(ring, 0);
3218
3219         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
3220         amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
3221         amdgpu_ring_write(ring, 0x8000);
3222         amdgpu_ring_write(ring, 0x8000);
3223
3224         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG,1));
3225         tmp = (PACKET3_SET_UCONFIG_REG_INDEX_TYPE |
3226                 (SOC15_REG_OFFSET(GC, 0, mmVGT_INDEX_TYPE) - PACKET3_SET_UCONFIG_REG_START));
3227         amdgpu_ring_write(ring, tmp);
3228         amdgpu_ring_write(ring, 0);
3229
3230         amdgpu_ring_commit(ring);
3231
3232         return 0;
3233 }
3234
3235 static int gfx_v9_0_cp_gfx_resume(struct amdgpu_device *adev)
3236 {
3237         struct amdgpu_ring *ring;
3238         u32 tmp;
3239         u32 rb_bufsz;
3240         u64 rb_addr, rptr_addr, wptr_gpu_addr;
3241
3242         /* Set the write pointer delay */
3243         WREG32_SOC15(GC, 0, mmCP_RB_WPTR_DELAY, 0);
3244
3245         /* set the RB to use vmid 0 */
3246         WREG32_SOC15(GC, 0, mmCP_RB_VMID, 0);
3247
3248         /* Set ring buffer size */
3249         ring = &adev->gfx.gfx_ring[0];
3250         rb_bufsz = order_base_2(ring->ring_size / 8);
3251         tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
3252         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
3253 #ifdef __BIG_ENDIAN
3254         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
3255 #endif
3256         WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp);
3257
3258         /* Initialize the ring buffer's write pointers */
3259         ring->wptr = 0;
3260         WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
3261         WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
3262
3263         /* set the wb address wether it's enabled or not */
3264         rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
3265         WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
3266         WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & CP_RB_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK);
3267
3268         wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
3269         WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr));
3270         WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr));
3271
3272         mdelay(1);
3273         WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp);
3274
3275         rb_addr = ring->gpu_addr >> 8;
3276         WREG32_SOC15(GC, 0, mmCP_RB0_BASE, rb_addr);
3277         WREG32_SOC15(GC, 0, mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
3278
3279         tmp = RREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL);
3280         if (ring->use_doorbell) {
3281                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3282                                     DOORBELL_OFFSET, ring->doorbell_index);
3283                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3284                                     DOORBELL_EN, 1);
3285         } else {
3286                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0);
3287         }
3288         WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL, tmp);
3289
3290         tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
3291                         DOORBELL_RANGE_LOWER, ring->doorbell_index);
3292         WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
3293
3294         WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_UPPER,
3295                        CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
3296
3297
3298         /* start the ring */
3299         gfx_v9_0_cp_gfx_start(adev);
3300         ring->sched.ready = true;
3301
3302         return 0;
3303 }
3304
3305 static void gfx_v9_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
3306 {
3307         if (enable) {
3308                 WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL, 0);
3309         } else {
3310                 WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL,
3311                         (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
3312                 adev->gfx.kiq.ring.sched.ready = false;
3313         }
3314         udelay(50);
3315 }
3316
3317 static int gfx_v9_0_cp_compute_load_microcode(struct amdgpu_device *adev)
3318 {
3319         const struct gfx_firmware_header_v1_0 *mec_hdr;
3320         const __le32 *fw_data;
3321         unsigned i;
3322         u32 tmp;
3323
3324         if (!adev->gfx.mec_fw)
3325                 return -EINVAL;
3326
3327         gfx_v9_0_cp_compute_enable(adev, false);
3328
3329         mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
3330         amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
3331
3332         fw_data = (const __le32 *)
3333                 (adev->gfx.mec_fw->data +
3334                  le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
3335         tmp = 0;
3336         tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, VMID, 0);
3337         tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0);
3338         WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_CNTL, tmp);
3339
3340         WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_LO,
3341                 adev->gfx.mec.mec_fw_gpu_addr & 0xFFFFF000);
3342         WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_HI,
3343                 upper_32_bits(adev->gfx.mec.mec_fw_gpu_addr));
3344
3345         /* MEC1 */
3346         WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR,
3347                          mec_hdr->jt_offset);
3348         for (i = 0; i < mec_hdr->jt_size; i++)
3349                 WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_DATA,
3350                         le32_to_cpup(fw_data + mec_hdr->jt_offset + i));
3351
3352         WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR,
3353                         adev->gfx.mec_fw_version);
3354         /* Todo : Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */
3355
3356         return 0;
3357 }
3358
3359 /* KIQ functions */
3360 static void gfx_v9_0_kiq_setting(struct amdgpu_ring *ring)
3361 {
3362         uint32_t tmp;
3363         struct amdgpu_device *adev = ring->adev;
3364
3365         /* tell RLC which is KIQ queue */
3366         tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS);
3367         tmp &= 0xffffff00;
3368         tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
3369         WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
3370         tmp |= 0x80;
3371         WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
3372 }
3373
3374 static void gfx_v9_0_mqd_set_priority(struct amdgpu_ring *ring, struct v9_mqd *mqd)
3375 {
3376         struct amdgpu_device *adev = ring->adev;
3377
3378         if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
3379                 if (amdgpu_gfx_is_high_priority_compute_queue(adev, ring->queue)) {
3380                         mqd->cp_hqd_pipe_priority = AMDGPU_GFX_PIPE_PRIO_HIGH;
3381                         mqd->cp_hqd_queue_priority =
3382                                 AMDGPU_GFX_QUEUE_PRIORITY_MAXIMUM;
3383                 }
3384         }
3385 }
3386
3387 static int gfx_v9_0_mqd_init(struct amdgpu_ring *ring)
3388 {
3389         struct amdgpu_device *adev = ring->adev;
3390         struct v9_mqd *mqd = ring->mqd_ptr;
3391         uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
3392         uint32_t tmp;
3393
3394         mqd->header = 0xC0310800;
3395         mqd->compute_pipelinestat_enable = 0x00000001;
3396         mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
3397         mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
3398         mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
3399         mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
3400         mqd->compute_static_thread_mgmt_se4 = 0xffffffff;
3401         mqd->compute_static_thread_mgmt_se5 = 0xffffffff;
3402         mqd->compute_static_thread_mgmt_se6 = 0xffffffff;
3403         mqd->compute_static_thread_mgmt_se7 = 0xffffffff;
3404         mqd->compute_misc_reserved = 0x00000003;
3405
3406         mqd->dynamic_cu_mask_addr_lo =
3407                 lower_32_bits(ring->mqd_gpu_addr
3408                               + offsetof(struct v9_mqd_allocation, dynamic_cu_mask));
3409         mqd->dynamic_cu_mask_addr_hi =
3410                 upper_32_bits(ring->mqd_gpu_addr
3411                               + offsetof(struct v9_mqd_allocation, dynamic_cu_mask));
3412
3413         eop_base_addr = ring->eop_gpu_addr >> 8;
3414         mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
3415         mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
3416
3417         /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
3418         tmp = RREG32_SOC15(GC, 0, mmCP_HQD_EOP_CONTROL);
3419         tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
3420                         (order_base_2(GFX9_MEC_HPD_SIZE / 4) - 1));
3421
3422         mqd->cp_hqd_eop_control = tmp;
3423
3424         /* enable doorbell? */
3425         tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
3426
3427         if (ring->use_doorbell) {
3428                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3429                                     DOORBELL_OFFSET, ring->doorbell_index);
3430                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3431                                     DOORBELL_EN, 1);
3432                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3433                                     DOORBELL_SOURCE, 0);
3434                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3435                                     DOORBELL_HIT, 0);
3436         } else {
3437                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3438                                          DOORBELL_EN, 0);
3439         }
3440
3441         mqd->cp_hqd_pq_doorbell_control = tmp;
3442
3443         /* disable the queue if it's active */
3444         ring->wptr = 0;
3445         mqd->cp_hqd_dequeue_request = 0;
3446         mqd->cp_hqd_pq_rptr = 0;
3447         mqd->cp_hqd_pq_wptr_lo = 0;
3448         mqd->cp_hqd_pq_wptr_hi = 0;
3449
3450         /* set the pointer to the MQD */
3451         mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
3452         mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
3453
3454         /* set MQD vmid to 0 */
3455         tmp = RREG32_SOC15(GC, 0, mmCP_MQD_CONTROL);
3456         tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
3457         mqd->cp_mqd_control = tmp;
3458
3459         /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3460         hqd_gpu_addr = ring->gpu_addr >> 8;
3461         mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
3462         mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
3463
3464         /* set up the HQD, this is similar to CP_RB0_CNTL */
3465         tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_CONTROL);
3466         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
3467                             (order_base_2(ring->ring_size / 4) - 1));
3468         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
3469                         ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
3470 #ifdef __BIG_ENDIAN
3471         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
3472 #endif
3473         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
3474         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
3475         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
3476         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
3477         mqd->cp_hqd_pq_control = tmp;
3478
3479         /* set the wb address whether it's enabled or not */
3480         wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
3481         mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
3482         mqd->cp_hqd_pq_rptr_report_addr_hi =
3483                 upper_32_bits(wb_gpu_addr) & 0xffff;
3484
3485         /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
3486         wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
3487         mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
3488         mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
3489
3490         tmp = 0;
3491         /* enable the doorbell if requested */
3492         if (ring->use_doorbell) {
3493                 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
3494                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3495                                 DOORBELL_OFFSET, ring->doorbell_index);
3496
3497                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3498                                          DOORBELL_EN, 1);
3499                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3500                                          DOORBELL_SOURCE, 0);
3501                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3502                                          DOORBELL_HIT, 0);
3503         }
3504
3505         mqd->cp_hqd_pq_doorbell_control = tmp;
3506
3507         /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3508         ring->wptr = 0;
3509         mqd->cp_hqd_pq_rptr = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR);
3510
3511         /* set the vmid for the queue */
3512         mqd->cp_hqd_vmid = 0;
3513
3514         tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PERSISTENT_STATE);
3515         tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
3516         mqd->cp_hqd_persistent_state = tmp;
3517
3518         /* set MIN_IB_AVAIL_SIZE */
3519         tmp = RREG32_SOC15(GC, 0, mmCP_HQD_IB_CONTROL);
3520         tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
3521         mqd->cp_hqd_ib_control = tmp;
3522
3523         /* set static priority for a queue/ring */
3524         gfx_v9_0_mqd_set_priority(ring, mqd);
3525         mqd->cp_hqd_quantum = RREG32(mmCP_HQD_QUANTUM);
3526
3527         /* map_queues packet doesn't need activate the queue,
3528          * so only kiq need set this field.
3529          */
3530         if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)
3531                 mqd->cp_hqd_active = 1;
3532
3533         return 0;
3534 }
3535
3536 static int gfx_v9_0_kiq_init_register(struct amdgpu_ring *ring)
3537 {
3538         struct amdgpu_device *adev = ring->adev;
3539         struct v9_mqd *mqd = ring->mqd_ptr;
3540         int j;
3541
3542         /* disable wptr polling */
3543         WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
3544
3545         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR,
3546                mqd->cp_hqd_eop_base_addr_lo);
3547         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI,
3548                mqd->cp_hqd_eop_base_addr_hi);
3549
3550         /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
3551         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_CONTROL,
3552                mqd->cp_hqd_eop_control);
3553
3554         /* enable doorbell? */
3555         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
3556                mqd->cp_hqd_pq_doorbell_control);
3557
3558         /* disable the queue if it's active */
3559         if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) {
3560                 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
3561                 for (j = 0; j < adev->usec_timeout; j++) {
3562                         if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
3563                                 break;
3564                         udelay(1);
3565                 }
3566                 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST,
3567                        mqd->cp_hqd_dequeue_request);
3568                 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR,
3569                        mqd->cp_hqd_pq_rptr);
3570                 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO,
3571                        mqd->cp_hqd_pq_wptr_lo);
3572                 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI,
3573                        mqd->cp_hqd_pq_wptr_hi);
3574         }
3575
3576         /* set the pointer to the MQD */
3577         WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR,
3578                mqd->cp_mqd_base_addr_lo);
3579         WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR_HI,
3580                mqd->cp_mqd_base_addr_hi);
3581
3582         /* set MQD vmid to 0 */
3583         WREG32_SOC15_RLC(GC, 0, mmCP_MQD_CONTROL,
3584                mqd->cp_mqd_control);
3585
3586         /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3587         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE,
3588                mqd->cp_hqd_pq_base_lo);
3589         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE_HI,
3590                mqd->cp_hqd_pq_base_hi);
3591
3592         /* set up the HQD, this is similar to CP_RB0_CNTL */
3593         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_CONTROL,
3594                mqd->cp_hqd_pq_control);
3595
3596         /* set the wb address whether it's enabled or not */
3597         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR,
3598                                 mqd->cp_hqd_pq_rptr_report_addr_lo);
3599         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
3600                                 mqd->cp_hqd_pq_rptr_report_addr_hi);
3601
3602         /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
3603         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR,
3604                mqd->cp_hqd_pq_wptr_poll_addr_lo);
3605         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
3606                mqd->cp_hqd_pq_wptr_poll_addr_hi);
3607
3608         /* enable the doorbell if requested */
3609         if (ring->use_doorbell) {
3610                 WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_LOWER,
3611                                         (adev->doorbell_index.kiq * 2) << 2);
3612                 WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER,
3613                                         (adev->doorbell_index.userqueue_end * 2) << 2);
3614         }
3615
3616         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
3617                mqd->cp_hqd_pq_doorbell_control);
3618
3619         /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3620         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO,
3621                mqd->cp_hqd_pq_wptr_lo);
3622         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI,
3623                mqd->cp_hqd_pq_wptr_hi);
3624
3625         /* set the vmid for the queue */
3626         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_VMID, mqd->cp_hqd_vmid);
3627
3628         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE,
3629                mqd->cp_hqd_persistent_state);
3630
3631         /* activate the queue */
3632         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE,
3633                mqd->cp_hqd_active);
3634
3635         if (ring->use_doorbell)
3636                 WREG32_FIELD15(GC, 0, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
3637
3638         return 0;
3639 }
3640
3641 static int gfx_v9_0_kiq_fini_register(struct amdgpu_ring *ring)
3642 {
3643         struct amdgpu_device *adev = ring->adev;
3644         int j;
3645
3646         /* disable the queue if it's active */
3647         if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) {
3648
3649                 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
3650
3651                 for (j = 0; j < adev->usec_timeout; j++) {
3652                         if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
3653                                 break;
3654                         udelay(1);
3655                 }
3656
3657                 if (j == AMDGPU_MAX_USEC_TIMEOUT) {
3658                         DRM_DEBUG("KIQ dequeue request failed.\n");
3659
3660                         /* Manual disable if dequeue request times out */
3661                         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE, 0);
3662                 }
3663
3664                 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST,
3665                       0);
3666         }
3667
3668         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IQ_TIMER, 0);
3669         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IB_CONTROL, 0);
3670         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE, 0);
3671         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0x40000000);
3672         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0);
3673         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR, 0);
3674         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI, 0);
3675         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO, 0);
3676
3677         return 0;
3678 }
3679
3680 static int gfx_v9_0_kiq_init_queue(struct amdgpu_ring *ring)
3681 {
3682         struct amdgpu_device *adev = ring->adev;
3683         struct v9_mqd *mqd = ring->mqd_ptr;
3684         int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS;
3685
3686         gfx_v9_0_kiq_setting(ring);
3687
3688         if (adev->in_gpu_reset) { /* for GPU_RESET case */
3689                 /* reset MQD to a clean status */
3690                 if (adev->gfx.mec.mqd_backup[mqd_idx])
3691                         memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation));
3692
3693                 /* reset ring buffer */
3694                 ring->wptr = 0;
3695                 amdgpu_ring_clear_ring(ring);
3696
3697                 mutex_lock(&adev->srbm_mutex);
3698                 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3699                 gfx_v9_0_kiq_init_register(ring);
3700                 soc15_grbm_select(adev, 0, 0, 0, 0);
3701                 mutex_unlock(&adev->srbm_mutex);
3702         } else {
3703                 memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation));
3704                 ((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
3705                 ((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
3706                 mutex_lock(&adev->srbm_mutex);
3707                 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3708                 gfx_v9_0_mqd_init(ring);
3709                 gfx_v9_0_kiq_init_register(ring);
3710                 soc15_grbm_select(adev, 0, 0, 0, 0);
3711                 mutex_unlock(&adev->srbm_mutex);
3712
3713                 if (adev->gfx.mec.mqd_backup[mqd_idx])
3714                         memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation));
3715         }
3716
3717         return 0;
3718 }
3719
3720 static int gfx_v9_0_kcq_init_queue(struct amdgpu_ring *ring)
3721 {
3722         struct amdgpu_device *adev = ring->adev;
3723         struct v9_mqd *mqd = ring->mqd_ptr;
3724         int mqd_idx = ring - &adev->gfx.compute_ring[0];
3725
3726         if (!adev->in_gpu_reset && !adev->in_suspend) {
3727                 memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation));
3728                 ((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
3729                 ((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
3730                 mutex_lock(&adev->srbm_mutex);
3731                 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3732                 gfx_v9_0_mqd_init(ring);
3733                 soc15_grbm_select(adev, 0, 0, 0, 0);
3734                 mutex_unlock(&adev->srbm_mutex);
3735
3736                 if (adev->gfx.mec.mqd_backup[mqd_idx])
3737                         memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation));
3738         } else if (adev->in_gpu_reset) { /* for GPU_RESET case */
3739                 /* reset MQD to a clean status */
3740                 if (adev->gfx.mec.mqd_backup[mqd_idx])
3741                         memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation));
3742
3743                 /* reset ring buffer */
3744                 ring->wptr = 0;
3745                 atomic64_set((atomic64_t *)&adev->wb.wb[ring->wptr_offs], 0);
3746                 amdgpu_ring_clear_ring(ring);
3747         } else {
3748                 amdgpu_ring_clear_ring(ring);
3749         }
3750
3751         return 0;
3752 }
3753
3754 static int gfx_v9_0_kiq_resume(struct amdgpu_device *adev)
3755 {
3756         struct amdgpu_ring *ring;
3757         int r;
3758
3759         ring = &adev->gfx.kiq.ring;
3760
3761         r = amdgpu_bo_reserve(ring->mqd_obj, false);
3762         if (unlikely(r != 0))
3763                 return r;
3764
3765         r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
3766         if (unlikely(r != 0))
3767                 return r;
3768
3769         gfx_v9_0_kiq_init_queue(ring);
3770         amdgpu_bo_kunmap(ring->mqd_obj);
3771         ring->mqd_ptr = NULL;
3772         amdgpu_bo_unreserve(ring->mqd_obj);
3773         ring->sched.ready = true;
3774         return 0;
3775 }
3776
3777 static int gfx_v9_0_kcq_resume(struct amdgpu_device *adev)
3778 {
3779         struct amdgpu_ring *ring = NULL;
3780         int r = 0, i;
3781
3782         gfx_v9_0_cp_compute_enable(adev, true);
3783
3784         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3785                 ring = &adev->gfx.compute_ring[i];
3786
3787                 r = amdgpu_bo_reserve(ring->mqd_obj, false);
3788                 if (unlikely(r != 0))
3789                         goto done;
3790                 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
3791                 if (!r) {
3792                         r = gfx_v9_0_kcq_init_queue(ring);
3793                         amdgpu_bo_kunmap(ring->mqd_obj);
3794                         ring->mqd_ptr = NULL;
3795                 }
3796                 amdgpu_bo_unreserve(ring->mqd_obj);
3797                 if (r)
3798                         goto done;
3799         }
3800
3801         r = amdgpu_gfx_enable_kcq(adev);
3802 done:
3803         return r;
3804 }
3805
3806 static int gfx_v9_0_cp_resume(struct amdgpu_device *adev)
3807 {
3808         int r, i;
3809         struct amdgpu_ring *ring;
3810
3811         if (!(adev->flags & AMD_IS_APU))
3812                 gfx_v9_0_enable_gui_idle_interrupt(adev, false);
3813
3814         if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
3815                 if (adev->asic_type != CHIP_ARCTURUS) {
3816                         /* legacy firmware loading */
3817                         r = gfx_v9_0_cp_gfx_load_microcode(adev);
3818                         if (r)
3819                                 return r;
3820                 }
3821
3822                 r = gfx_v9_0_cp_compute_load_microcode(adev);
3823                 if (r)
3824                         return r;
3825         }
3826
3827         r = gfx_v9_0_kiq_resume(adev);
3828         if (r)
3829                 return r;
3830
3831         if (adev->asic_type != CHIP_ARCTURUS) {
3832                 r = gfx_v9_0_cp_gfx_resume(adev);
3833                 if (r)
3834                         return r;
3835         }
3836
3837         r = gfx_v9_0_kcq_resume(adev);
3838         if (r)
3839                 return r;
3840
3841         if (adev->asic_type != CHIP_ARCTURUS) {
3842                 ring = &adev->gfx.gfx_ring[0];
3843                 r = amdgpu_ring_test_helper(ring);
3844                 if (r)
3845                         return r;
3846         }
3847
3848         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3849                 ring = &adev->gfx.compute_ring[i];
3850                 amdgpu_ring_test_helper(ring);
3851         }
3852
3853         gfx_v9_0_enable_gui_idle_interrupt(adev, true);
3854
3855         return 0;
3856 }
3857
3858 static void gfx_v9_0_init_tcp_config(struct amdgpu_device *adev)
3859 {
3860         u32 tmp;
3861
3862         if (adev->asic_type != CHIP_ARCTURUS)
3863                 return;
3864
3865         tmp = RREG32_SOC15(GC, 0, mmTCP_ADDR_CONFIG);
3866         tmp = REG_SET_FIELD(tmp, TCP_ADDR_CONFIG, ENABLE64KHASH,
3867                                 adev->df.hash_status.hash_64k);
3868         tmp = REG_SET_FIELD(tmp, TCP_ADDR_CONFIG, ENABLE2MHASH,
3869                                 adev->df.hash_status.hash_2m);
3870         tmp = REG_SET_FIELD(tmp, TCP_ADDR_CONFIG, ENABLE1GHASH,
3871                                 adev->df.hash_status.hash_1g);
3872         WREG32_SOC15(GC, 0, mmTCP_ADDR_CONFIG, tmp);
3873 }
3874
3875 static void gfx_v9_0_cp_enable(struct amdgpu_device *adev, bool enable)
3876 {
3877         if (adev->asic_type != CHIP_ARCTURUS)
3878                 gfx_v9_0_cp_gfx_enable(adev, enable);
3879         gfx_v9_0_cp_compute_enable(adev, enable);
3880 }
3881
3882 static int gfx_v9_0_hw_init(void *handle)
3883 {
3884         int r;
3885         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3886
3887         if (!amdgpu_sriov_vf(adev))
3888                 gfx_v9_0_init_golden_registers(adev);
3889
3890         gfx_v9_0_constants_init(adev);
3891
3892         gfx_v9_0_init_tcp_config(adev);
3893
3894         r = adev->gfx.rlc.funcs->resume(adev);
3895         if (r)
3896                 return r;
3897
3898         r = gfx_v9_0_cp_resume(adev);
3899         if (r)
3900                 return r;
3901
3902         return r;
3903 }
3904
3905 static int gfx_v9_0_hw_fini(void *handle)
3906 {
3907         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3908
3909         amdgpu_irq_put(adev, &adev->gfx.cp_ecc_error_irq, 0);
3910         amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
3911         amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
3912
3913         /* DF freeze and kcq disable will fail */
3914         if (!amdgpu_ras_intr_triggered())
3915                 /* disable KCQ to avoid CPC touch memory not valid anymore */
3916                 amdgpu_gfx_disable_kcq(adev);
3917
3918         if (amdgpu_sriov_vf(adev)) {
3919                 gfx_v9_0_cp_gfx_enable(adev, false);
3920                 /* must disable polling for SRIOV when hw finished, otherwise
3921                  * CPC engine may still keep fetching WB address which is already
3922                  * invalid after sw finished and trigger DMAR reading error in
3923                  * hypervisor side.
3924                  */
3925                 WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
3926                 return 0;
3927         }
3928
3929         /* Use deinitialize sequence from CAIL when unbinding device from driver,
3930          * otherwise KIQ is hanging when binding back
3931          */
3932         if (!adev->in_gpu_reset && !adev->in_suspend) {
3933                 mutex_lock(&adev->srbm_mutex);
3934                 soc15_grbm_select(adev, adev->gfx.kiq.ring.me,
3935                                 adev->gfx.kiq.ring.pipe,
3936                                 adev->gfx.kiq.ring.queue, 0);
3937                 gfx_v9_0_kiq_fini_register(&adev->gfx.kiq.ring);
3938                 soc15_grbm_select(adev, 0, 0, 0, 0);
3939                 mutex_unlock(&adev->srbm_mutex);
3940         }
3941
3942         gfx_v9_0_cp_enable(adev, false);
3943         adev->gfx.rlc.funcs->stop(adev);
3944
3945         return 0;
3946 }
3947
3948 static int gfx_v9_0_suspend(void *handle)
3949 {
3950         return gfx_v9_0_hw_fini(handle);
3951 }
3952
3953 static int gfx_v9_0_resume(void *handle)
3954 {
3955         return gfx_v9_0_hw_init(handle);
3956 }
3957
3958 static bool gfx_v9_0_is_idle(void *handle)
3959 {
3960         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3961
3962         if (REG_GET_FIELD(RREG32_SOC15(GC, 0, mmGRBM_STATUS),
3963                                 GRBM_STATUS, GUI_ACTIVE))
3964                 return false;
3965         else
3966                 return true;
3967 }
3968
3969 static int gfx_v9_0_wait_for_idle(void *handle)
3970 {
3971         unsigned i;
3972         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3973
3974         for (i = 0; i < adev->usec_timeout; i++) {
3975                 if (gfx_v9_0_is_idle(handle))
3976                         return 0;
3977                 udelay(1);
3978         }
3979         return -ETIMEDOUT;
3980 }
3981
3982 static int gfx_v9_0_soft_reset(void *handle)
3983 {
3984         u32 grbm_soft_reset = 0;
3985         u32 tmp;
3986         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3987
3988         /* GRBM_STATUS */
3989         tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS);
3990         if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
3991                    GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
3992                    GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
3993                    GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
3994                    GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
3995                    GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK)) {
3996                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
3997                                                 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
3998                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
3999                                                 GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
4000         }
4001
4002         if (tmp & (GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
4003                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4004                                                 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
4005         }
4006
4007         /* GRBM_STATUS2 */
4008         tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS2);
4009         if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
4010                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4011                                                 GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
4012
4013
4014         if (grbm_soft_reset) {
4015                 /* stop the rlc */
4016                 adev->gfx.rlc.funcs->stop(adev);
4017
4018                 if (adev->asic_type != CHIP_ARCTURUS)
4019                         /* Disable GFX parsing/prefetching */
4020                         gfx_v9_0_cp_gfx_enable(adev, false);
4021
4022                 /* Disable MEC parsing/prefetching */
4023                 gfx_v9_0_cp_compute_enable(adev, false);
4024
4025                 if (grbm_soft_reset) {
4026                         tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
4027                         tmp |= grbm_soft_reset;
4028                         dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
4029                         WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
4030                         tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
4031
4032                         udelay(50);
4033
4034                         tmp &= ~grbm_soft_reset;
4035                         WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
4036                         tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
4037                 }
4038
4039                 /* Wait a little for things to settle down */
4040                 udelay(50);
4041         }
4042         return 0;
4043 }
4044
4045 static uint64_t gfx_v9_0_kiq_read_clock(struct amdgpu_device *adev)
4046 {
4047         signed long r, cnt = 0;
4048         unsigned long flags;
4049         uint32_t seq, reg_val_offs = 0;
4050         uint64_t value = 0;
4051         struct amdgpu_kiq *kiq = &adev->gfx.kiq;
4052         struct amdgpu_ring *ring = &kiq->ring;
4053
4054         BUG_ON(!ring->funcs->emit_rreg);
4055
4056         spin_lock_irqsave(&kiq->ring_lock, flags);
4057         if (amdgpu_device_wb_get(adev, &reg_val_offs)) {
4058                 pr_err("critical bug! too many kiq readers\n");
4059                 goto failed_unlock;
4060         }
4061         amdgpu_ring_alloc(ring, 32);
4062         amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
4063         amdgpu_ring_write(ring, 9 |     /* src: register*/
4064                                 (5 << 8) |      /* dst: memory */
4065                                 (1 << 16) |     /* count sel */
4066                                 (1 << 20));     /* write confirm */
4067         amdgpu_ring_write(ring, 0);
4068         amdgpu_ring_write(ring, 0);
4069         amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
4070                                 reg_val_offs * 4));
4071         amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
4072                                 reg_val_offs * 4));
4073         r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT);
4074         if (r)
4075                 goto failed_undo;
4076
4077         amdgpu_ring_commit(ring);
4078         spin_unlock_irqrestore(&kiq->ring_lock, flags);
4079
4080         r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
4081
4082         /* don't wait anymore for gpu reset case because this way may
4083          * block gpu_recover() routine forever, e.g. this virt_kiq_rreg
4084          * is triggered in TTM and ttm_bo_lock_delayed_workqueue() will
4085          * never return if we keep waiting in virt_kiq_rreg, which cause
4086          * gpu_recover() hang there.
4087          *
4088          * also don't wait anymore for IRQ context
4089          * */
4090         if (r < 1 && (adev->in_gpu_reset || in_interrupt()))
4091                 goto failed_kiq_read;
4092
4093         might_sleep();
4094         while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) {
4095                 msleep(MAX_KIQ_REG_BAILOUT_INTERVAL);
4096                 r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
4097         }
4098
4099         if (cnt > MAX_KIQ_REG_TRY)
4100                 goto failed_kiq_read;
4101
4102         mb();
4103         value = (uint64_t)adev->wb.wb[reg_val_offs] |
4104                 (uint64_t)adev->wb.wb[reg_val_offs + 1 ] << 32ULL;
4105         amdgpu_device_wb_free(adev, reg_val_offs);
4106         return value;
4107
4108 failed_undo:
4109         amdgpu_ring_undo(ring);
4110 failed_unlock:
4111         spin_unlock_irqrestore(&kiq->ring_lock, flags);
4112 failed_kiq_read:
4113         if (reg_val_offs)
4114                 amdgpu_device_wb_free(adev, reg_val_offs);
4115         pr_err("failed to read gpu clock\n");
4116         return ~0;
4117 }
4118
4119 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev)
4120 {
4121         uint64_t clock;
4122
4123         amdgpu_gfx_off_ctrl(adev, false);
4124         mutex_lock(&adev->gfx.gpu_clock_mutex);
4125         if (adev->asic_type == CHIP_VEGA10 && amdgpu_sriov_runtime(adev)) {
4126                 clock = gfx_v9_0_kiq_read_clock(adev);
4127         } else {
4128                 WREG32_SOC15(GC, 0, mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
4129                 clock = (uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_LSB) |
4130                         ((uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
4131         }
4132         mutex_unlock(&adev->gfx.gpu_clock_mutex);
4133         amdgpu_gfx_off_ctrl(adev, true);
4134         return clock;
4135 }
4136
4137 static void gfx_v9_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
4138                                           uint32_t vmid,
4139                                           uint32_t gds_base, uint32_t gds_size,
4140                                           uint32_t gws_base, uint32_t gws_size,
4141                                           uint32_t oa_base, uint32_t oa_size)
4142 {
4143         struct amdgpu_device *adev = ring->adev;
4144
4145         /* GDS Base */
4146         gfx_v9_0_write_data_to_reg(ring, 0, false,
4147                                    SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_BASE) + 2 * vmid,
4148                                    gds_base);
4149
4150         /* GDS Size */
4151         gfx_v9_0_write_data_to_reg(ring, 0, false,
4152                                    SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE) + 2 * vmid,
4153                                    gds_size);
4154
4155         /* GWS */
4156         gfx_v9_0_write_data_to_reg(ring, 0, false,
4157                                    SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID0) + vmid,
4158                                    gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
4159
4160         /* OA */
4161         gfx_v9_0_write_data_to_reg(ring, 0, false,
4162                                    SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID0) + vmid,
4163                                    (1 << (oa_size + oa_base)) - (1 << oa_base));
4164 }
4165
4166 static const u32 vgpr_init_compute_shader[] =
4167 {
4168         0xb07c0000, 0xbe8000ff,
4169         0x000000f8, 0xbf110800,
4170         0x7e000280, 0x7e020280,
4171         0x7e040280, 0x7e060280,
4172         0x7e080280, 0x7e0a0280,
4173         0x7e0c0280, 0x7e0e0280,
4174         0x80808800, 0xbe803200,
4175         0xbf84fff5, 0xbf9c0000,
4176         0xd28c0001, 0x0001007f,
4177         0xd28d0001, 0x0002027e,
4178         0x10020288, 0xb8810904,
4179         0xb7814000, 0xd1196a01,
4180         0x00000301, 0xbe800087,
4181         0xbefc00c1, 0xd89c4000,
4182         0x00020201, 0xd89cc080,
4183         0x00040401, 0x320202ff,
4184         0x00000800, 0x80808100,
4185         0xbf84fff8, 0x7e020280,
4186         0xbf810000, 0x00000000,
4187 };
4188
4189 static const u32 sgpr_init_compute_shader[] =
4190 {
4191         0xb07c0000, 0xbe8000ff,
4192         0x0000005f, 0xbee50080,
4193         0xbe812c65, 0xbe822c65,
4194         0xbe832c65, 0xbe842c65,
4195         0xbe852c65, 0xb77c0005,
4196         0x80808500, 0xbf84fff8,
4197         0xbe800080, 0xbf810000,
4198 };
4199
4200 static const u32 vgpr_init_compute_shader_arcturus[] = {
4201         0xd3d94000, 0x18000080, 0xd3d94001, 0x18000080, 0xd3d94002, 0x18000080,
4202         0xd3d94003, 0x18000080, 0xd3d94004, 0x18000080, 0xd3d94005, 0x18000080,
4203         0xd3d94006, 0x18000080, 0xd3d94007, 0x18000080, 0xd3d94008, 0x18000080,
4204         0xd3d94009, 0x18000080, 0xd3d9400a, 0x18000080, 0xd3d9400b, 0x18000080,
4205         0xd3d9400c, 0x18000080, 0xd3d9400d, 0x18000080, 0xd3d9400e, 0x18000080,
4206         0xd3d9400f, 0x18000080, 0xd3d94010, 0x18000080, 0xd3d94011, 0x18000080,
4207         0xd3d94012, 0x18000080, 0xd3d94013, 0x18000080, 0xd3d94014, 0x18000080,
4208         0xd3d94015, 0x18000080, 0xd3d94016, 0x18000080, 0xd3d94017, 0x18000080,
4209         0xd3d94018, 0x18000080, 0xd3d94019, 0x18000080, 0xd3d9401a, 0x18000080,
4210         0xd3d9401b, 0x18000080, 0xd3d9401c, 0x18000080, 0xd3d9401d, 0x18000080,
4211         0xd3d9401e, 0x18000080, 0xd3d9401f, 0x18000080, 0xd3d94020, 0x18000080,
4212         0xd3d94021, 0x18000080, 0xd3d94022, 0x18000080, 0xd3d94023, 0x18000080,
4213         0xd3d94024, 0x18000080, 0xd3d94025, 0x18000080, 0xd3d94026, 0x18000080,
4214         0xd3d94027, 0x18000080, 0xd3d94028, 0x18000080, 0xd3d94029, 0x18000080,
4215         0xd3d9402a, 0x18000080, 0xd3d9402b, 0x18000080, 0xd3d9402c, 0x18000080,
4216         0xd3d9402d, 0x18000080, 0xd3d9402e, 0x18000080, 0xd3d9402f, 0x18000080,
4217         0xd3d94030, 0x18000080, 0xd3d94031, 0x18000080, 0xd3d94032, 0x18000080,
4218         0xd3d94033, 0x18000080, 0xd3d94034, 0x18000080, 0xd3d94035, 0x18000080,
4219         0xd3d94036, 0x18000080, 0xd3d94037, 0x18000080, 0xd3d94038, 0x18000080,
4220         0xd3d94039, 0x18000080, 0xd3d9403a, 0x18000080, 0xd3d9403b, 0x18000080,
4221         0xd3d9403c, 0x18000080, 0xd3d9403d, 0x18000080, 0xd3d9403e, 0x18000080,
4222         0xd3d9403f, 0x18000080, 0xd3d94040, 0x18000080, 0xd3d94041, 0x18000080,
4223         0xd3d94042, 0x18000080, 0xd3d94043, 0x18000080, 0xd3d94044, 0x18000080,
4224         0xd3d94045, 0x18000080, 0xd3d94046, 0x18000080, 0xd3d94047, 0x18000080,
4225         0xd3d94048, 0x18000080, 0xd3d94049, 0x18000080, 0xd3d9404a, 0x18000080,
4226         0xd3d9404b, 0x18000080, 0xd3d9404c, 0x18000080, 0xd3d9404d, 0x18000080,
4227         0xd3d9404e, 0x18000080, 0xd3d9404f, 0x18000080, 0xd3d94050, 0x18000080,
4228         0xd3d94051, 0x18000080, 0xd3d94052, 0x18000080, 0xd3d94053, 0x18000080,
4229         0xd3d94054, 0x18000080, 0xd3d94055, 0x18000080, 0xd3d94056, 0x18000080,
4230         0xd3d94057, 0x18000080, 0xd3d94058, 0x18000080, 0xd3d94059, 0x18000080,
4231         0xd3d9405a, 0x18000080, 0xd3d9405b, 0x18000080, 0xd3d9405c, 0x18000080,
4232         0xd3d9405d, 0x18000080, 0xd3d9405e, 0x18000080, 0xd3d9405f, 0x18000080,
4233         0xd3d94060, 0x18000080, 0xd3d94061, 0x18000080, 0xd3d94062, 0x18000080,
4234         0xd3d94063, 0x18000080, 0xd3d94064, 0x18000080, 0xd3d94065, 0x18000080,
4235         0xd3d94066, 0x18000080, 0xd3d94067, 0x18000080, 0xd3d94068, 0x18000080,
4236         0xd3d94069, 0x18000080, 0xd3d9406a, 0x18000080, 0xd3d9406b, 0x18000080,
4237         0xd3d9406c, 0x18000080, 0xd3d9406d, 0x18000080, 0xd3d9406e, 0x18000080,
4238         0xd3d9406f, 0x18000080, 0xd3d94070, 0x18000080, 0xd3d94071, 0x18000080,
4239         0xd3d94072, 0x18000080, 0xd3d94073, 0x18000080, 0xd3d94074, 0x18000080,
4240         0xd3d94075, 0x18000080, 0xd3d94076, 0x18000080, 0xd3d94077, 0x18000080,
4241         0xd3d94078, 0x18000080, 0xd3d94079, 0x18000080, 0xd3d9407a, 0x18000080,
4242         0xd3d9407b, 0x18000080, 0xd3d9407c, 0x18000080, 0xd3d9407d, 0x18000080,
4243         0xd3d9407e, 0x18000080, 0xd3d9407f, 0x18000080, 0xd3d94080, 0x18000080,
4244         0xd3d94081, 0x18000080, 0xd3d94082, 0x18000080, 0xd3d94083, 0x18000080,
4245         0xd3d94084, 0x18000080, 0xd3d94085, 0x18000080, 0xd3d94086, 0x18000080,
4246         0xd3d94087, 0x18000080, 0xd3d94088, 0x18000080, 0xd3d94089, 0x18000080,
4247         0xd3d9408a, 0x18000080, 0xd3d9408b, 0x18000080, 0xd3d9408c, 0x18000080,
4248         0xd3d9408d, 0x18000080, 0xd3d9408e, 0x18000080, 0xd3d9408f, 0x18000080,
4249         0xd3d94090, 0x18000080, 0xd3d94091, 0x18000080, 0xd3d94092, 0x18000080,
4250         0xd3d94093, 0x18000080, 0xd3d94094, 0x18000080, 0xd3d94095, 0x18000080,
4251         0xd3d94096, 0x18000080, 0xd3d94097, 0x18000080, 0xd3d94098, 0x18000080,
4252         0xd3d94099, 0x18000080, 0xd3d9409a, 0x18000080, 0xd3d9409b, 0x18000080,
4253         0xd3d9409c, 0x18000080, 0xd3d9409d, 0x18000080, 0xd3d9409e, 0x18000080,
4254         0xd3d9409f, 0x18000080, 0xd3d940a0, 0x18000080, 0xd3d940a1, 0x18000080,
4255         0xd3d940a2, 0x18000080, 0xd3d940a3, 0x18000080, 0xd3d940a4, 0x18000080,
4256         0xd3d940a5, 0x18000080, 0xd3d940a6, 0x18000080, 0xd3d940a7, 0x18000080,
4257         0xd3d940a8, 0x18000080, 0xd3d940a9, 0x18000080, 0xd3d940aa, 0x18000080,
4258         0xd3d940ab, 0x18000080, 0xd3d940ac, 0x18000080, 0xd3d940ad, 0x18000080,
4259         0xd3d940ae, 0x18000080, 0xd3d940af, 0x18000080, 0xd3d940b0, 0x18000080,
4260         0xd3d940b1, 0x18000080, 0xd3d940b2, 0x18000080, 0xd3d940b3, 0x18000080,
4261         0xd3d940b4, 0x18000080, 0xd3d940b5, 0x18000080, 0xd3d940b6, 0x18000080,
4262         0xd3d940b7, 0x18000080, 0xd3d940b8, 0x18000080, 0xd3d940b9, 0x18000080,
4263         0xd3d940ba, 0x18000080, 0xd3d940bb, 0x18000080, 0xd3d940bc, 0x18000080,
4264         0xd3d940bd, 0x18000080, 0xd3d940be, 0x18000080, 0xd3d940bf, 0x18000080,
4265         0xd3d940c0, 0x18000080, 0xd3d940c1, 0x18000080, 0xd3d940c2, 0x18000080,
4266         0xd3d940c3, 0x18000080, 0xd3d940c4, 0x18000080, 0xd3d940c5, 0x18000080,
4267         0xd3d940c6, 0x18000080, 0xd3d940c7, 0x18000080, 0xd3d940c8, 0x18000080,
4268         0xd3d940c9, 0x18000080, 0xd3d940ca, 0x18000080, 0xd3d940cb, 0x18000080,
4269         0xd3d940cc, 0x18000080, 0xd3d940cd, 0x18000080, 0xd3d940ce, 0x18000080,
4270         0xd3d940cf, 0x18000080, 0xd3d940d0, 0x18000080, 0xd3d940d1, 0x18000080,
4271         0xd3d940d2, 0x18000080, 0xd3d940d3, 0x18000080, 0xd3d940d4, 0x18000080,
4272         0xd3d940d5, 0x18000080, 0xd3d940d6, 0x18000080, 0xd3d940d7, 0x18000080,
4273         0xd3d940d8, 0x18000080, 0xd3d940d9, 0x18000080, 0xd3d940da, 0x18000080,
4274         0xd3d940db, 0x18000080, 0xd3d940dc, 0x18000080, 0xd3d940dd, 0x18000080,
4275         0xd3d940de, 0x18000080, 0xd3d940df, 0x18000080, 0xd3d940e0, 0x18000080,
4276         0xd3d940e1, 0x18000080, 0xd3d940e2, 0x18000080, 0xd3d940e3, 0x18000080,
4277         0xd3d940e4, 0x18000080, 0xd3d940e5, 0x18000080, 0xd3d940e6, 0x18000080,
4278         0xd3d940e7, 0x18000080, 0xd3d940e8, 0x18000080, 0xd3d940e9, 0x18000080,
4279         0xd3d940ea, 0x18000080, 0xd3d940eb, 0x18000080, 0xd3d940ec, 0x18000080,
4280         0xd3d940ed, 0x18000080, 0xd3d940ee, 0x18000080, 0xd3d940ef, 0x18000080,
4281         0xd3d940f0, 0x18000080, 0xd3d940f1, 0x18000080, 0xd3d940f2, 0x18000080,
4282         0xd3d940f3, 0x18000080, 0xd3d940f4, 0x18000080, 0xd3d940f5, 0x18000080,
4283         0xd3d940f6, 0x18000080, 0xd3d940f7, 0x18000080, 0xd3d940f8, 0x18000080,
4284         0xd3d940f9, 0x18000080, 0xd3d940fa, 0x18000080, 0xd3d940fb, 0x18000080,
4285         0xd3d940fc, 0x18000080, 0xd3d940fd, 0x18000080, 0xd3d940fe, 0x18000080,
4286         0xd3d940ff, 0x18000080, 0xb07c0000, 0xbe8a00ff, 0x000000f8, 0xbf11080a,
4287         0x7e000280, 0x7e020280, 0x7e040280, 0x7e060280, 0x7e080280, 0x7e0a0280,
4288         0x7e0c0280, 0x7e0e0280, 0x808a880a, 0xbe80320a, 0xbf84fff5, 0xbf9c0000,
4289         0xd28c0001, 0x0001007f, 0xd28d0001, 0x0002027e, 0x10020288, 0xb88b0904,
4290         0xb78b4000, 0xd1196a01, 0x00001701, 0xbe8a0087, 0xbefc00c1, 0xd89c4000,
4291         0x00020201, 0xd89cc080, 0x00040401, 0x320202ff, 0x00000800, 0x808a810a,
4292         0xbf84fff8, 0xbf810000,
4293 };
4294
4295 /* When below register arrays changed, please update gpr_reg_size,
4296   and sec_ded_counter_reg_size in function gfx_v9_0_do_edc_gpr_workarounds,
4297   to cover all gfx9 ASICs */
4298 static const struct soc15_reg_entry vgpr_init_regs[] = {
4299    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
4300    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 },
4301    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 4 },
4302    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
4303    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x3f },
4304    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x400000 },  /* 64KB LDS */
4305    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff },
4306    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff },
4307    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff },
4308    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff },
4309    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0xffffffff },
4310    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0xffffffff },
4311    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0xffffffff },
4312    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0xffffffff },
4313 };
4314
4315 static const struct soc15_reg_entry vgpr_init_regs_arcturus[] = {
4316    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
4317    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 },
4318    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 4 },
4319    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
4320    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0xbf },
4321    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x400000 },  /* 64KB LDS */
4322    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff },
4323    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff },
4324    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff },
4325    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff },
4326    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0xffffffff },
4327    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0xffffffff },
4328    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0xffffffff },
4329    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0xffffffff },
4330 };
4331
4332 static const struct soc15_reg_entry sgpr1_init_regs[] = {
4333    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
4334    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 },
4335    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 8 },
4336    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
4337    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x240 }, /* (80 GPRS) */
4338    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x0 },
4339    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0x000000ff },
4340    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0x000000ff },
4341    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0x000000ff },
4342    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0x000000ff },
4343    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0x000000ff },
4344    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0x000000ff },
4345    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0x000000ff },
4346    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0x000000ff },
4347 };
4348
4349 static const struct soc15_reg_entry sgpr2_init_regs[] = {
4350    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
4351    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 },
4352    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 8 },
4353    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
4354    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x240 }, /* (80 GPRS) */
4355    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x0 },
4356    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0x0000ff00 },
4357    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0x0000ff00 },
4358    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0x0000ff00 },
4359    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0x0000ff00 },
4360    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0x0000ff00 },
4361    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0x0000ff00 },
4362    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0x0000ff00 },
4363    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0x0000ff00 },
4364 };
4365
4366 static const struct soc15_reg_entry gfx_v9_0_edc_counter_regs[] = {
4367    { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT), 0, 1, 1},
4368    { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT), 0, 1, 1},
4369    { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT), 0, 1, 1},
4370    { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT), 0, 1, 1},
4371    { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT), 0, 1, 1},
4372    { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT), 0, 1, 1},
4373    { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT), 0, 1, 1},
4374    { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT), 0, 1, 1},
4375    { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT), 0, 1, 1},
4376    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT), 0, 1, 1},
4377    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_GRBM_CNT), 0, 1, 1},
4378    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_DED), 0, 1, 1},
4379    { SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT), 0, 4, 1},
4380    { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 0, 4, 6},
4381    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_DED_CNT), 0, 4, 16},
4382    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_INFO), 0, 4, 16},
4383    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_SEC_CNT), 0, 4, 16},
4384    { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 1, 16},
4385    { SOC15_REG_ENTRY(GC, 0, mmTCP_ATC_EDC_GATCL1_CNT), 0, 4, 16},
4386    { SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT), 0, 4, 16},
4387    { SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 0, 4, 16},
4388    { SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 0, 4, 16},
4389    { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 0, 4, 6},
4390    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 0, 4, 16},
4391    { SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 0, 4, 16},
4392    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), 0, 1, 1},
4393    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 0, 1, 1},
4394    { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 1, 32},
4395    { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 1, 32},
4396    { SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT), 0, 1, 72},
4397    { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0, 1, 16},
4398    { SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT), 0, 1, 2},
4399    { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 0, 4, 6},
4400 };
4401
4402 static int gfx_v9_0_do_edc_gds_workarounds(struct amdgpu_device *adev)
4403 {
4404         struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
4405         int i, r;
4406
4407         /* only support when RAS is enabled */
4408         if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
4409                 return 0;
4410
4411         r = amdgpu_ring_alloc(ring, 7);
4412         if (r) {
4413                 DRM_ERROR("amdgpu: GDS workarounds failed to lock ring %s (%d).\n",
4414                         ring->name, r);
4415                 return r;
4416         }
4417
4418         WREG32_SOC15(GC, 0, mmGDS_VMID0_BASE, 0x00000000);
4419         WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, adev->gds.gds_size);
4420
4421         amdgpu_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
4422         amdgpu_ring_write(ring, (PACKET3_DMA_DATA_CP_SYNC |
4423                                 PACKET3_DMA_DATA_DST_SEL(1) |
4424                                 PACKET3_DMA_DATA_SRC_SEL(2) |
4425                                 PACKET3_DMA_DATA_ENGINE(0)));
4426         amdgpu_ring_write(ring, 0);
4427         amdgpu_ring_write(ring, 0);
4428         amdgpu_ring_write(ring, 0);
4429         amdgpu_ring_write(ring, 0);
4430         amdgpu_ring_write(ring, PACKET3_DMA_DATA_CMD_RAW_WAIT |
4431                                 adev->gds.gds_size);
4432
4433         amdgpu_ring_commit(ring);
4434
4435         for (i = 0; i < adev->usec_timeout; i++) {
4436                 if (ring->wptr == gfx_v9_0_ring_get_rptr_compute(ring))
4437                         break;
4438                 udelay(1);
4439         }
4440
4441         if (i >= adev->usec_timeout)
4442                 r = -ETIMEDOUT;
4443
4444         WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, 0x00000000);
4445
4446         return r;
4447 }
4448
4449 static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
4450 {
4451         struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
4452         struct amdgpu_ib ib;
4453         struct dma_fence *f = NULL;
4454         int r, i;
4455         unsigned total_size, vgpr_offset, sgpr_offset;
4456         u64 gpu_addr;
4457
4458         int compute_dim_x = adev->gfx.config.max_shader_engines *
4459                                                 adev->gfx.config.max_cu_per_sh *
4460                                                 adev->gfx.config.max_sh_per_se;
4461         int sgpr_work_group_size = 5;
4462         int gpr_reg_size = adev->gfx.config.max_shader_engines + 6;
4463         int vgpr_init_shader_size;
4464         const u32 *vgpr_init_shader_ptr;
4465         const struct soc15_reg_entry *vgpr_init_regs_ptr;
4466
4467         /* only support when RAS is enabled */
4468         if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
4469                 return 0;
4470
4471         /* bail if the compute ring is not ready */
4472         if (!ring->sched.ready)
4473                 return 0;
4474
4475         if (adev->asic_type == CHIP_ARCTURUS) {
4476                 vgpr_init_shader_ptr = vgpr_init_compute_shader_arcturus;
4477                 vgpr_init_shader_size = sizeof(vgpr_init_compute_shader_arcturus);
4478                 vgpr_init_regs_ptr = vgpr_init_regs_arcturus;
4479         } else {
4480                 vgpr_init_shader_ptr = vgpr_init_compute_shader;
4481                 vgpr_init_shader_size = sizeof(vgpr_init_compute_shader);
4482                 vgpr_init_regs_ptr = vgpr_init_regs;
4483         }
4484
4485         total_size =
4486                 (gpr_reg_size * 3 + 4 + 5 + 2) * 4; /* VGPRS */
4487         total_size +=
4488                 (gpr_reg_size * 3 + 4 + 5 + 2) * 4; /* SGPRS1 */
4489         total_size +=
4490                 (gpr_reg_size * 3 + 4 + 5 + 2) * 4; /* SGPRS2 */
4491         total_size = ALIGN(total_size, 256);
4492         vgpr_offset = total_size;
4493         total_size += ALIGN(vgpr_init_shader_size, 256);
4494         sgpr_offset = total_size;
4495         total_size += sizeof(sgpr_init_compute_shader);
4496
4497         /* allocate an indirect buffer to put the commands in */
4498         memset(&ib, 0, sizeof(ib));
4499         r = amdgpu_ib_get(adev, NULL, total_size,
4500                                         AMDGPU_IB_POOL_DIRECT, &ib);
4501         if (r) {
4502                 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
4503                 return r;
4504         }
4505
4506         /* load the compute shaders */
4507         for (i = 0; i < vgpr_init_shader_size/sizeof(u32); i++)
4508                 ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_shader_ptr[i];
4509
4510         for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
4511                 ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
4512
4513         /* init the ib length to 0 */
4514         ib.length_dw = 0;
4515
4516         /* VGPR */
4517         /* write the register state for the compute dispatch */
4518         for (i = 0; i < gpr_reg_size; i++) {
4519                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
4520                 ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(vgpr_init_regs_ptr[i])
4521                                                                 - PACKET3_SET_SH_REG_START;
4522                 ib.ptr[ib.length_dw++] = vgpr_init_regs_ptr[i].reg_value;
4523         }
4524         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
4525         gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
4526         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
4527         ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
4528                                                         - PACKET3_SET_SH_REG_START;
4529         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
4530         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
4531
4532         /* write dispatch packet */
4533         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
4534         ib.ptr[ib.length_dw++] = compute_dim_x * 2; /* x */
4535         ib.ptr[ib.length_dw++] = 1; /* y */
4536         ib.ptr[ib.length_dw++] = 1; /* z */
4537         ib.ptr[ib.length_dw++] =
4538                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
4539
4540         /* write CS partial flush packet */
4541         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
4542         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
4543
4544         /* SGPR1 */
4545         /* write the register state for the compute dispatch */
4546         for (i = 0; i < gpr_reg_size; i++) {
4547                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
4548                 ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(sgpr1_init_regs[i])
4549                                                                 - PACKET3_SET_SH_REG_START;
4550                 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i].reg_value;
4551         }
4552         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
4553         gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
4554         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
4555         ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
4556                                                         - PACKET3_SET_SH_REG_START;
4557         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
4558         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
4559
4560         /* write dispatch packet */
4561         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
4562         ib.ptr[ib.length_dw++] = compute_dim_x / 2 * sgpr_work_group_size; /* x */
4563         ib.ptr[ib.length_dw++] = 1; /* y */
4564         ib.ptr[ib.length_dw++] = 1; /* z */
4565         ib.ptr[ib.length_dw++] =
4566                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
4567
4568         /* write CS partial flush packet */
4569         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
4570         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
4571
4572         /* SGPR2 */
4573         /* write the register state for the compute dispatch */
4574         for (i = 0; i < gpr_reg_size; i++) {
4575                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
4576                 ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(sgpr2_init_regs[i])
4577                                                                 - PACKET3_SET_SH_REG_START;
4578                 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i].reg_value;
4579         }
4580         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
4581         gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
4582         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
4583         ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
4584                                                         - PACKET3_SET_SH_REG_START;
4585         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
4586         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
4587
4588         /* write dispatch packet */
4589         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
4590         ib.ptr[ib.length_dw++] = compute_dim_x / 2 * sgpr_work_group_size; /* x */
4591         ib.ptr[ib.length_dw++] = 1; /* y */
4592         ib.ptr[ib.length_dw++] = 1; /* z */
4593         ib.ptr[ib.length_dw++] =
4594                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
4595
4596         /* write CS partial flush packet */
4597         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
4598         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
4599
4600         /* shedule the ib on the ring */
4601         r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
4602         if (r) {
4603                 DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
4604                 goto fail;
4605         }
4606
4607         /* wait for the GPU to finish processing the IB */
4608         r = dma_fence_wait(f, false);
4609         if (r) {
4610                 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
4611                 goto fail;
4612         }
4613
4614 fail:
4615         amdgpu_ib_free(adev, &ib, NULL);
4616         dma_fence_put(f);
4617
4618         return r;
4619 }
4620
4621 static int gfx_v9_0_early_init(void *handle)
4622 {
4623         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4624
4625         if (adev->asic_type == CHIP_ARCTURUS)
4626                 adev->gfx.num_gfx_rings = 0;
4627         else
4628                 adev->gfx.num_gfx_rings = GFX9_NUM_GFX_RINGS;
4629         adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
4630         gfx_v9_0_set_kiq_pm4_funcs(adev);
4631         gfx_v9_0_set_ring_funcs(adev);
4632         gfx_v9_0_set_irq_funcs(adev);
4633         gfx_v9_0_set_gds_init(adev);
4634         gfx_v9_0_set_rlc_funcs(adev);
4635
4636         return 0;
4637 }
4638
4639 static int gfx_v9_0_ecc_late_init(void *handle)
4640 {
4641         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4642         int r;
4643
4644         /*
4645          * Temp workaround to fix the issue that CP firmware fails to
4646          * update read pointer when CPDMA is writing clearing operation
4647          * to GDS in suspend/resume sequence on several cards. So just
4648          * limit this operation in cold boot sequence.
4649          */
4650         if (!adev->in_suspend) {
4651                 r = gfx_v9_0_do_edc_gds_workarounds(adev);
4652                 if (r)
4653                         return r;
4654         }
4655
4656         /* requires IBs so do in late init after IB pool is initialized */
4657         r = gfx_v9_0_do_edc_gpr_workarounds(adev);
4658         if (r)
4659                 return r;
4660
4661         if (adev->gfx.funcs &&
4662             adev->gfx.funcs->reset_ras_error_count)
4663                 adev->gfx.funcs->reset_ras_error_count(adev);
4664
4665         r = amdgpu_gfx_ras_late_init(adev);
4666         if (r)
4667                 return r;
4668
4669         return 0;
4670 }
4671
4672 static int gfx_v9_0_late_init(void *handle)
4673 {
4674         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4675         int r;
4676
4677         r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
4678         if (r)
4679                 return r;
4680
4681         r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
4682         if (r)
4683                 return r;
4684
4685         r = gfx_v9_0_ecc_late_init(handle);
4686         if (r)
4687                 return r;
4688
4689         return 0;
4690 }
4691
4692 static bool gfx_v9_0_is_rlc_enabled(struct amdgpu_device *adev)
4693 {
4694         uint32_t rlc_setting;
4695
4696         /* if RLC is not enabled, do nothing */
4697         rlc_setting = RREG32_SOC15(GC, 0, mmRLC_CNTL);
4698         if (!(rlc_setting & RLC_CNTL__RLC_ENABLE_F32_MASK))
4699                 return false;
4700
4701         return true;
4702 }
4703
4704 static void gfx_v9_0_set_safe_mode(struct amdgpu_device *adev)
4705 {
4706         uint32_t data;
4707         unsigned i;
4708
4709         data = RLC_SAFE_MODE__CMD_MASK;
4710         data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
4711         WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data);
4712
4713         /* wait for RLC_SAFE_MODE */
4714         for (i = 0; i < adev->usec_timeout; i++) {
4715                 if (!REG_GET_FIELD(RREG32_SOC15(GC, 0, mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
4716                         break;
4717                 udelay(1);
4718         }
4719 }
4720
4721 static void gfx_v9_0_unset_safe_mode(struct amdgpu_device *adev)
4722 {
4723         uint32_t data;
4724
4725         data = RLC_SAFE_MODE__CMD_MASK;
4726         WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data);
4727 }
4728
4729 static void gfx_v9_0_update_gfx_cg_power_gating(struct amdgpu_device *adev,
4730                                                 bool enable)
4731 {
4732         amdgpu_gfx_rlc_enter_safe_mode(adev);
4733
4734         if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
4735                 gfx_v9_0_enable_gfx_cg_power_gating(adev, true);
4736                 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
4737                         gfx_v9_0_enable_gfx_pipeline_powergating(adev, true);
4738         } else {
4739                 gfx_v9_0_enable_gfx_cg_power_gating(adev, false);
4740                 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
4741                         gfx_v9_0_enable_gfx_pipeline_powergating(adev, false);
4742         }
4743
4744         amdgpu_gfx_rlc_exit_safe_mode(adev);
4745 }
4746
4747 static void gfx_v9_0_update_gfx_mg_power_gating(struct amdgpu_device *adev,
4748                                                 bool enable)
4749 {
4750         /* TODO: double check if we need to perform under safe mode */
4751         /* gfx_v9_0_enter_rlc_safe_mode(adev); */
4752
4753         if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
4754                 gfx_v9_0_enable_gfx_static_mg_power_gating(adev, true);
4755         else
4756                 gfx_v9_0_enable_gfx_static_mg_power_gating(adev, false);
4757
4758         if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
4759                 gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, true);
4760         else
4761                 gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, false);
4762
4763         /* gfx_v9_0_exit_rlc_safe_mode(adev); */
4764 }
4765
4766 static void gfx_v9_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
4767                                                       bool enable)
4768 {
4769         uint32_t data, def;
4770
4771         amdgpu_gfx_rlc_enter_safe_mode(adev);
4772
4773         /* It is disabled by HW by default */
4774         if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
4775                 /* 1 - RLC_CGTT_MGCG_OVERRIDE */
4776                 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4777
4778                 if (adev->asic_type != CHIP_VEGA12)
4779                         data &= ~RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK;
4780
4781                 data &= ~(RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
4782                           RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK |
4783                           RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK);
4784
4785                 /* only for Vega10 & Raven1 */
4786                 data |= RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK;
4787
4788                 if (def != data)
4789                         WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4790
4791                 /* MGLS is a global flag to control all MGLS in GFX */
4792                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
4793                         /* 2 - RLC memory Light sleep */
4794                         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
4795                                 def = data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
4796                                 data |= RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
4797                                 if (def != data)
4798                                         WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data);
4799                         }
4800                         /* 3 - CP memory Light sleep */
4801                         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
4802                                 def = data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
4803                                 data |= CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
4804                                 if (def != data)
4805                                         WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data);
4806                         }
4807                 }
4808         } else {
4809                 /* 1 - MGCG_OVERRIDE */
4810                 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4811
4812                 if (adev->asic_type != CHIP_VEGA12)
4813                         data |= RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK;
4814
4815                 data |= (RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK |
4816                          RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
4817                          RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK |
4818                          RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK);
4819
4820                 if (def != data)
4821                         WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4822
4823                 /* 2 - disable MGLS in RLC */
4824                 data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
4825                 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
4826                         data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
4827                         WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data);
4828                 }
4829
4830                 /* 3 - disable MGLS in CP */
4831                 data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
4832                 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
4833                         data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
4834                         WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data);
4835                 }
4836         }
4837
4838         amdgpu_gfx_rlc_exit_safe_mode(adev);
4839 }
4840
4841 static void gfx_v9_0_update_3d_clock_gating(struct amdgpu_device *adev,
4842                                            bool enable)
4843 {
4844         uint32_t data, def;
4845
4846         if (adev->asic_type == CHIP_ARCTURUS)
4847                 return;
4848
4849         amdgpu_gfx_rlc_enter_safe_mode(adev);
4850
4851         /* Enable 3D CGCG/CGLS */
4852         if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG)) {
4853                 /* write cmd to clear cgcg/cgls ov */
4854                 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4855                 /* unset CGCG override */
4856                 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_GFX3D_CG_OVERRIDE_MASK;
4857                 /* update CGCG and CGLS override bits */
4858                 if (def != data)
4859                         WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4860
4861                 /* enable 3Dcgcg FSM(0x0000363f) */
4862                 def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
4863
4864                 data = (0x36 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
4865                         RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK;
4866                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS)
4867                         data |= (0x000F << RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
4868                                 RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK;
4869                 if (def != data)
4870                         WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data);
4871
4872                 /* set IDLE_POLL_COUNT(0x00900100) */
4873                 def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL);
4874                 data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
4875                         (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
4876                 if (def != data)
4877                         WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data);
4878         } else {
4879                 /* Disable CGCG/CGLS */
4880                 def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
4881                 /* disable cgcg, cgls should be disabled */
4882                 data &= ~(RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK |
4883                           RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK);
4884                 /* disable cgcg and cgls in FSM */
4885                 if (def != data)
4886                         WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data);
4887         }
4888
4889         amdgpu_gfx_rlc_exit_safe_mode(adev);
4890 }
4891
4892 static void gfx_v9_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
4893                                                       bool enable)
4894 {
4895         uint32_t def, data;
4896
4897         amdgpu_gfx_rlc_enter_safe_mode(adev);
4898
4899         if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
4900                 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4901                 /* unset CGCG override */
4902                 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGCG_OVERRIDE_MASK;
4903                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
4904                         data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
4905                 else
4906                         data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
4907                 /* update CGCG and CGLS override bits */
4908                 if (def != data)
4909                         WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4910
4911                 /* enable cgcg FSM(0x0000363F) */
4912                 def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
4913
4914                 if (adev->asic_type == CHIP_ARCTURUS)
4915                         data = (0x2000 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
4916                                 RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
4917                 else
4918                         data = (0x36 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
4919                                 RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
4920                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
4921                         data |= (0x000F << RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
4922                                 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
4923                 if (def != data)
4924                         WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data);
4925
4926                 /* set IDLE_POLL_COUNT(0x00900100) */
4927                 def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL);
4928                 data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
4929                         (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
4930                 if (def != data)
4931                         WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data);
4932         } else {
4933                 def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
4934                 /* reset CGCG/CGLS bits */
4935                 data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
4936                 /* disable cgcg and cgls in FSM */
4937                 if (def != data)
4938                         WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data);
4939         }
4940
4941         amdgpu_gfx_rlc_exit_safe_mode(adev);
4942 }
4943
4944 static int gfx_v9_0_update_gfx_clock_gating(struct amdgpu_device *adev,
4945                                             bool enable)
4946 {
4947         if (enable) {
4948                 /* CGCG/CGLS should be enabled after MGCG/MGLS
4949                  * ===  MGCG + MGLS ===
4950                  */
4951                 gfx_v9_0_update_medium_grain_clock_gating(adev, enable);
4952                 /* ===  CGCG /CGLS for GFX 3D Only === */
4953                 gfx_v9_0_update_3d_clock_gating(adev, enable);
4954                 /* ===  CGCG + CGLS === */
4955                 gfx_v9_0_update_coarse_grain_clock_gating(adev, enable);
4956         } else {
4957                 /* CGCG/CGLS should be disabled before MGCG/MGLS
4958                  * ===  CGCG + CGLS ===
4959                  */
4960                 gfx_v9_0_update_coarse_grain_clock_gating(adev, enable);
4961                 /* ===  CGCG /CGLS for GFX 3D Only === */
4962                 gfx_v9_0_update_3d_clock_gating(adev, enable);
4963                 /* ===  MGCG + MGLS === */
4964                 gfx_v9_0_update_medium_grain_clock_gating(adev, enable);
4965         }
4966         return 0;
4967 }
4968
4969 static void gfx_v9_0_update_spm_vmid(struct amdgpu_device *adev, unsigned vmid)
4970 {
4971         u32 reg, data;
4972
4973         reg = SOC15_REG_OFFSET(GC, 0, mmRLC_SPM_MC_CNTL);
4974         if (amdgpu_sriov_is_pp_one_vf(adev))
4975                 data = RREG32_NO_KIQ(reg);
4976         else
4977                 data = RREG32(reg);
4978
4979         data &= ~RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK;
4980         data |= (vmid & RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK) << RLC_SPM_MC_CNTL__RLC_SPM_VMID__SHIFT;
4981
4982         if (amdgpu_sriov_is_pp_one_vf(adev))
4983                 WREG32_SOC15_NO_KIQ(GC, 0, mmRLC_SPM_MC_CNTL, data);
4984         else
4985                 WREG32_SOC15(GC, 0, mmRLC_SPM_MC_CNTL, data);
4986 }
4987
4988 static bool gfx_v9_0_check_rlcg_range(struct amdgpu_device *adev,
4989                                         uint32_t offset,
4990                                         struct soc15_reg_rlcg *entries, int arr_size)
4991 {
4992         int i;
4993         uint32_t reg;
4994
4995         if (!entries)
4996                 return false;
4997
4998         for (i = 0; i < arr_size; i++) {
4999                 const struct soc15_reg_rlcg *entry;
5000
5001                 entry = &entries[i];
5002                 reg = adev->reg_offset[entry->hwip][entry->instance][entry->segment] + entry->reg;
5003                 if (offset == reg)
5004                         return true;
5005         }
5006
5007         return false;
5008 }
5009
5010 static bool gfx_v9_0_is_rlcg_access_range(struct amdgpu_device *adev, u32 offset)
5011 {
5012         return gfx_v9_0_check_rlcg_range(adev, offset,
5013                                         (void *)rlcg_access_gc_9_0,
5014                                         ARRAY_SIZE(rlcg_access_gc_9_0));
5015 }
5016
5017 static const struct amdgpu_rlc_funcs gfx_v9_0_rlc_funcs = {
5018         .is_rlc_enabled = gfx_v9_0_is_rlc_enabled,
5019         .set_safe_mode = gfx_v9_0_set_safe_mode,
5020         .unset_safe_mode = gfx_v9_0_unset_safe_mode,
5021         .init = gfx_v9_0_rlc_init,
5022         .get_csb_size = gfx_v9_0_get_csb_size,
5023         .get_csb_buffer = gfx_v9_0_get_csb_buffer,
5024         .get_cp_table_num = gfx_v9_0_cp_jump_table_num,
5025         .resume = gfx_v9_0_rlc_resume,
5026         .stop = gfx_v9_0_rlc_stop,
5027         .reset = gfx_v9_0_rlc_reset,
5028         .start = gfx_v9_0_rlc_start,
5029         .update_spm_vmid = gfx_v9_0_update_spm_vmid,
5030         .rlcg_wreg = gfx_v9_0_rlcg_wreg,
5031         .is_rlcg_access_range = gfx_v9_0_is_rlcg_access_range,
5032 };
5033
5034 static int gfx_v9_0_set_powergating_state(void *handle,
5035                                           enum amd_powergating_state state)
5036 {
5037         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5038         bool enable = (state == AMD_PG_STATE_GATE);
5039
5040         switch (adev->asic_type) {
5041         case CHIP_RAVEN:
5042         case CHIP_RENOIR:
5043                 if (!enable)
5044                         amdgpu_gfx_off_ctrl(adev, false);
5045
5046                 if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
5047                         gfx_v9_0_enable_sck_slow_down_on_power_up(adev, true);
5048                         gfx_v9_0_enable_sck_slow_down_on_power_down(adev, true);
5049                 } else {
5050                         gfx_v9_0_enable_sck_slow_down_on_power_up(adev, false);
5051                         gfx_v9_0_enable_sck_slow_down_on_power_down(adev, false);
5052                 }
5053
5054                 if (adev->pg_flags & AMD_PG_SUPPORT_CP)
5055                         gfx_v9_0_enable_cp_power_gating(adev, true);
5056                 else
5057                         gfx_v9_0_enable_cp_power_gating(adev, false);
5058
5059                 /* update gfx cgpg state */
5060                 gfx_v9_0_update_gfx_cg_power_gating(adev, enable);
5061
5062                 /* update mgcg state */
5063                 gfx_v9_0_update_gfx_mg_power_gating(adev, enable);
5064
5065                 if (enable)
5066                         amdgpu_gfx_off_ctrl(adev, true);
5067                 break;
5068         case CHIP_VEGA12:
5069                 amdgpu_gfx_off_ctrl(adev, enable);
5070                 break;
5071         default:
5072                 break;
5073         }
5074
5075         return 0;
5076 }
5077
5078 static int gfx_v9_0_set_clockgating_state(void *handle,
5079                                           enum amd_clockgating_state state)
5080 {
5081         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5082
5083         if (amdgpu_sriov_vf(adev))
5084                 return 0;
5085
5086         switch (adev->asic_type) {
5087         case CHIP_VEGA10:
5088         case CHIP_VEGA12:
5089         case CHIP_VEGA20:
5090         case CHIP_RAVEN:
5091         case CHIP_ARCTURUS:
5092         case CHIP_RENOIR:
5093                 gfx_v9_0_update_gfx_clock_gating(adev,
5094                                                  state == AMD_CG_STATE_GATE);
5095                 break;
5096         default:
5097                 break;
5098         }
5099         return 0;
5100 }
5101
5102 static void gfx_v9_0_get_clockgating_state(void *handle, u32 *flags)
5103 {
5104         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5105         int data;
5106
5107         if (amdgpu_sriov_vf(adev))
5108                 *flags = 0;
5109
5110         /* AMD_CG_SUPPORT_GFX_MGCG */
5111         data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE));
5112         if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK))
5113                 *flags |= AMD_CG_SUPPORT_GFX_MGCG;
5114
5115         /* AMD_CG_SUPPORT_GFX_CGCG */
5116         data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL));
5117         if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
5118                 *flags |= AMD_CG_SUPPORT_GFX_CGCG;
5119
5120         /* AMD_CG_SUPPORT_GFX_CGLS */
5121         if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
5122                 *flags |= AMD_CG_SUPPORT_GFX_CGLS;
5123
5124         /* AMD_CG_SUPPORT_GFX_RLC_LS */
5125         data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_MEM_SLP_CNTL));
5126         if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK)
5127                 *flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS;
5128
5129         /* AMD_CG_SUPPORT_GFX_CP_LS */
5130         data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmCP_MEM_SLP_CNTL));
5131         if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK)
5132                 *flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS;
5133
5134         if (adev->asic_type != CHIP_ARCTURUS) {
5135                 /* AMD_CG_SUPPORT_GFX_3D_CGCG */
5136                 data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D));
5137                 if (data & RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK)
5138                         *flags |= AMD_CG_SUPPORT_GFX_3D_CGCG;
5139
5140                 /* AMD_CG_SUPPORT_GFX_3D_CGLS */
5141                 if (data & RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK)
5142                         *flags |= AMD_CG_SUPPORT_GFX_3D_CGLS;
5143         }
5144 }
5145
5146 static u64 gfx_v9_0_ring_get_rptr_gfx(struct amdgpu_ring *ring)
5147 {
5148         return ring->adev->wb.wb[ring->rptr_offs]; /* gfx9 is 32bit rptr*/
5149 }
5150
5151 static u64 gfx_v9_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
5152 {
5153         struct amdgpu_device *adev = ring->adev;
5154         u64 wptr;
5155
5156         /* XXX check if swapping is necessary on BE */
5157         if (ring->use_doorbell) {
5158                 wptr = atomic64_read((atomic64_t *)&adev->wb.wb[ring->wptr_offs]);
5159         } else {
5160                 wptr = RREG32_SOC15(GC, 0, mmCP_RB0_WPTR);
5161                 wptr += (u64)RREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI) << 32;
5162         }
5163
5164         return wptr;
5165 }
5166
5167 static void gfx_v9_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
5168 {
5169         struct amdgpu_device *adev = ring->adev;
5170
5171         if (ring->use_doorbell) {
5172                 /* XXX check if swapping is necessary on BE */
5173                 atomic64_set((atomic64_t*)&adev->wb.wb[ring->wptr_offs], ring->wptr);
5174                 WDOORBELL64(ring->doorbell_index, ring->wptr);
5175         } else {
5176                 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
5177                 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
5178         }
5179 }
5180
5181 static void gfx_v9_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
5182 {
5183         struct amdgpu_device *adev = ring->adev;
5184         u32 ref_and_mask, reg_mem_engine;
5185         const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio.hdp_flush_reg;
5186
5187         if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
5188                 switch (ring->me) {
5189                 case 1:
5190                         ref_and_mask = nbio_hf_reg->ref_and_mask_cp2 << ring->pipe;
5191                         break;
5192                 case 2:
5193                         ref_and_mask = nbio_hf_reg->ref_and_mask_cp6 << ring->pipe;
5194                         break;
5195                 default:
5196                         return;
5197                 }
5198                 reg_mem_engine = 0;
5199         } else {
5200                 ref_and_mask = nbio_hf_reg->ref_and_mask_cp0;
5201                 reg_mem_engine = 1; /* pfp */
5202         }
5203
5204         gfx_v9_0_wait_reg_mem(ring, reg_mem_engine, 0, 1,
5205                               adev->nbio.funcs->get_hdp_flush_req_offset(adev),
5206                               adev->nbio.funcs->get_hdp_flush_done_offset(adev),
5207                               ref_and_mask, ref_and_mask, 0x20);
5208 }
5209
5210 static void gfx_v9_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
5211                                         struct amdgpu_job *job,
5212                                         struct amdgpu_ib *ib,
5213                                         uint32_t flags)
5214 {
5215         unsigned vmid = AMDGPU_JOB_GET_VMID(job);
5216         u32 header, control = 0;
5217
5218         if (ib->flags & AMDGPU_IB_FLAG_CE)
5219                 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
5220         else
5221                 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
5222
5223         control |= ib->length_dw | (vmid << 24);
5224
5225         if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) {
5226                 control |= INDIRECT_BUFFER_PRE_ENB(1);
5227
5228                 if (!(ib->flags & AMDGPU_IB_FLAG_CE) && vmid)
5229                         gfx_v9_0_ring_emit_de_meta(ring);
5230         }
5231
5232         amdgpu_ring_write(ring, header);
5233         BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
5234         amdgpu_ring_write(ring,
5235 #ifdef __BIG_ENDIAN
5236                 (2 << 0) |
5237 #endif
5238                 lower_32_bits(ib->gpu_addr));
5239         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
5240         amdgpu_ring_write(ring, control);
5241 }
5242
5243 static void gfx_v9_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
5244                                           struct amdgpu_job *job,
5245                                           struct amdgpu_ib *ib,
5246                                           uint32_t flags)
5247 {
5248         unsigned vmid = AMDGPU_JOB_GET_VMID(job);
5249         u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
5250
5251         /* Currently, there is a high possibility to get wave ID mismatch
5252          * between ME and GDS, leading to a hw deadlock, because ME generates
5253          * different wave IDs than the GDS expects. This situation happens
5254          * randomly when at least 5 compute pipes use GDS ordered append.
5255          * The wave IDs generated by ME are also wrong after suspend/resume.
5256          * Those are probably bugs somewhere else in the kernel driver.
5257          *
5258          * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and
5259          * GDS to 0 for this ring (me/pipe).
5260          */
5261         if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) {
5262                 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
5263                 amdgpu_ring_write(ring, mmGDS_COMPUTE_MAX_WAVE_ID);
5264                 amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id);
5265         }
5266
5267         amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
5268         BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
5269         amdgpu_ring_write(ring,
5270 #ifdef __BIG_ENDIAN
5271                                 (2 << 0) |
5272 #endif
5273                                 lower_32_bits(ib->gpu_addr));
5274         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
5275         amdgpu_ring_write(ring, control);
5276 }
5277
5278 static void gfx_v9_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
5279                                      u64 seq, unsigned flags)
5280 {
5281         bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
5282         bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
5283         bool writeback = flags & AMDGPU_FENCE_FLAG_TC_WB_ONLY;
5284
5285         /* RELEASE_MEM - flush caches, send int */
5286         amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6));
5287         amdgpu_ring_write(ring, ((writeback ? (EOP_TC_WB_ACTION_EN |
5288                                                EOP_TC_NC_ACTION_EN) :
5289                                               (EOP_TCL1_ACTION_EN |
5290                                                EOP_TC_ACTION_EN |
5291                                                EOP_TC_WB_ACTION_EN |
5292                                                EOP_TC_MD_ACTION_EN)) |
5293                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
5294                                  EVENT_INDEX(5)));
5295         amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
5296
5297         /*
5298          * the address should be Qword aligned if 64bit write, Dword
5299          * aligned if only send 32bit data low (discard data high)
5300          */
5301         if (write64bit)
5302                 BUG_ON(addr & 0x7);
5303         else
5304                 BUG_ON(addr & 0x3);
5305         amdgpu_ring_write(ring, lower_32_bits(addr));
5306         amdgpu_ring_write(ring, upper_32_bits(addr));
5307         amdgpu_ring_write(ring, lower_32_bits(seq));
5308         amdgpu_ring_write(ring, upper_32_bits(seq));
5309         amdgpu_ring_write(ring, 0);
5310 }
5311
5312 static void gfx_v9_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
5313 {
5314         int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
5315         uint32_t seq = ring->fence_drv.sync_seq;
5316         uint64_t addr = ring->fence_drv.gpu_addr;
5317
5318         gfx_v9_0_wait_reg_mem(ring, usepfp, 1, 0,
5319                               lower_32_bits(addr), upper_32_bits(addr),
5320                               seq, 0xffffffff, 4);
5321 }
5322
5323 static void gfx_v9_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
5324                                         unsigned vmid, uint64_t pd_addr)
5325 {
5326         amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
5327
5328         /* compute doesn't have PFP */
5329         if (ring->funcs->type == AMDGPU_RING_TYPE_GFX) {
5330                 /* sync PFP to ME, otherwise we might get invalid PFP reads */
5331                 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
5332                 amdgpu_ring_write(ring, 0x0);
5333         }
5334 }
5335
5336 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
5337 {
5338         return ring->adev->wb.wb[ring->rptr_offs]; /* gfx9 hardware is 32bit rptr */
5339 }
5340
5341 static u64 gfx_v9_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
5342 {
5343         u64 wptr;
5344
5345         /* XXX check if swapping is necessary on BE */
5346         if (ring->use_doorbell)
5347                 wptr = atomic64_read((atomic64_t *)&ring->adev->wb.wb[ring->wptr_offs]);
5348         else
5349                 BUG();
5350         return wptr;
5351 }
5352
5353 static void gfx_v9_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
5354 {
5355         struct amdgpu_device *adev = ring->adev;
5356
5357         /* XXX check if swapping is necessary on BE */
5358         if (ring->use_doorbell) {
5359                 atomic64_set((atomic64_t*)&adev->wb.wb[ring->wptr_offs], ring->wptr);
5360                 WDOORBELL64(ring->doorbell_index, ring->wptr);
5361         } else{
5362                 BUG(); /* only DOORBELL method supported on gfx9 now */
5363         }
5364 }
5365
5366 static void gfx_v9_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
5367                                          u64 seq, unsigned int flags)
5368 {
5369         struct amdgpu_device *adev = ring->adev;
5370
5371         /* we only allocate 32bit for each seq wb address */
5372         BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
5373
5374         /* write fence seq to the "addr" */
5375         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5376         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5377                                  WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
5378         amdgpu_ring_write(ring, lower_32_bits(addr));
5379         amdgpu_ring_write(ring, upper_32_bits(addr));
5380         amdgpu_ring_write(ring, lower_32_bits(seq));
5381
5382         if (flags & AMDGPU_FENCE_FLAG_INT) {
5383                 /* set register to trigger INT */
5384                 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5385                 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5386                                          WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
5387                 amdgpu_ring_write(ring, SOC15_REG_OFFSET(GC, 0, mmCPC_INT_STATUS));
5388                 amdgpu_ring_write(ring, 0);
5389                 amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
5390         }
5391 }
5392
5393 static void gfx_v9_ring_emit_sb(struct amdgpu_ring *ring)
5394 {
5395         amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
5396         amdgpu_ring_write(ring, 0);
5397 }
5398
5399 static void gfx_v9_0_ring_emit_ce_meta(struct amdgpu_ring *ring)
5400 {
5401         struct v9_ce_ib_state ce_payload = {0};
5402         uint64_t csa_addr;
5403         int cnt;
5404
5405         cnt = (sizeof(ce_payload) >> 2) + 4 - 2;
5406         csa_addr = amdgpu_csa_vaddr(ring->adev);
5407
5408         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
5409         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
5410                                  WRITE_DATA_DST_SEL(8) |
5411                                  WR_CONFIRM) |
5412                                  WRITE_DATA_CACHE_POLICY(0));
5413         amdgpu_ring_write(ring, lower_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, ce_payload)));
5414         amdgpu_ring_write(ring, upper_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, ce_payload)));
5415         amdgpu_ring_write_multiple(ring, (void *)&ce_payload, sizeof(ce_payload) >> 2);
5416 }
5417
5418 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring)
5419 {
5420         struct v9_de_ib_state de_payload = {0};
5421         uint64_t csa_addr, gds_addr;
5422         int cnt;
5423
5424         csa_addr = amdgpu_csa_vaddr(ring->adev);
5425         gds_addr = csa_addr + 4096;
5426         de_payload.gds_backup_addrlo = lower_32_bits(gds_addr);
5427         de_payload.gds_backup_addrhi = upper_32_bits(gds_addr);
5428
5429         cnt = (sizeof(de_payload) >> 2) + 4 - 2;
5430         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
5431         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
5432                                  WRITE_DATA_DST_SEL(8) |
5433                                  WR_CONFIRM) |
5434                                  WRITE_DATA_CACHE_POLICY(0));
5435         amdgpu_ring_write(ring, lower_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, de_payload)));
5436         amdgpu_ring_write(ring, upper_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, de_payload)));
5437         amdgpu_ring_write_multiple(ring, (void *)&de_payload, sizeof(de_payload) >> 2);
5438 }
5439
5440 static void gfx_v9_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start,
5441                                    bool secure)
5442 {
5443         uint32_t v = secure ? FRAME_TMZ : 0;
5444
5445         amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0));
5446         amdgpu_ring_write(ring, v | FRAME_CMD(start ? 0 : 1));
5447 }
5448
5449 static void gfx_v9_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
5450 {
5451         uint32_t dw2 = 0;
5452
5453         if (amdgpu_sriov_vf(ring->adev))
5454                 gfx_v9_0_ring_emit_ce_meta(ring);
5455
5456         dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
5457         if (flags & AMDGPU_HAVE_CTX_SWITCH) {
5458                 /* set load_global_config & load_global_uconfig */
5459                 dw2 |= 0x8001;
5460                 /* set load_cs_sh_regs */
5461                 dw2 |= 0x01000000;
5462                 /* set load_per_context_state & load_gfx_sh_regs for GFX */
5463                 dw2 |= 0x10002;
5464
5465                 /* set load_ce_ram if preamble presented */
5466                 if (AMDGPU_PREAMBLE_IB_PRESENT & flags)
5467                         dw2 |= 0x10000000;
5468         } else {
5469                 /* still load_ce_ram if this is the first time preamble presented
5470                  * although there is no context switch happens.
5471                  */
5472                 if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags)
5473                         dw2 |= 0x10000000;
5474         }
5475
5476         amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
5477         amdgpu_ring_write(ring, dw2);
5478         amdgpu_ring_write(ring, 0);
5479 }
5480
5481 static unsigned gfx_v9_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring)
5482 {
5483         unsigned ret;
5484         amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
5485         amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
5486         amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
5487         amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */
5488         ret = ring->wptr & ring->buf_mask;
5489         amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */
5490         return ret;
5491 }
5492
5493 static void gfx_v9_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset)
5494 {
5495         unsigned cur;
5496         BUG_ON(offset > ring->buf_mask);
5497         BUG_ON(ring->ring[offset] != 0x55aa55aa);
5498
5499         cur = (ring->wptr & ring->buf_mask) - 1;
5500         if (likely(cur > offset))
5501                 ring->ring[offset] = cur - offset;
5502         else
5503                 ring->ring[offset] = (ring->ring_size>>2) - offset + cur;
5504 }
5505
5506 static void gfx_v9_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg,
5507                                     uint32_t reg_val_offs)
5508 {
5509         struct amdgpu_device *adev = ring->adev;
5510
5511         amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
5512         amdgpu_ring_write(ring, 0 |     /* src: register*/
5513                                 (5 << 8) |      /* dst: memory */
5514                                 (1 << 20));     /* write confirm */
5515         amdgpu_ring_write(ring, reg);
5516         amdgpu_ring_write(ring, 0);
5517         amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
5518                                 reg_val_offs * 4));
5519         amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
5520                                 reg_val_offs * 4));
5521 }
5522
5523 static void gfx_v9_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
5524                                     uint32_t val)
5525 {
5526         uint32_t cmd = 0;
5527
5528         switch (ring->funcs->type) {
5529         case AMDGPU_RING_TYPE_GFX:
5530                 cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM;
5531                 break;
5532         case AMDGPU_RING_TYPE_KIQ:
5533                 cmd = (1 << 16); /* no inc addr */
5534                 break;
5535         default:
5536                 cmd = WR_CONFIRM;
5537                 break;
5538         }
5539         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5540         amdgpu_ring_write(ring, cmd);
5541         amdgpu_ring_write(ring, reg);
5542         amdgpu_ring_write(ring, 0);
5543         amdgpu_ring_write(ring, val);
5544 }
5545
5546 static void gfx_v9_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
5547                                         uint32_t val, uint32_t mask)
5548 {
5549         gfx_v9_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20);
5550 }
5551
5552 static void gfx_v9_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
5553                                                   uint32_t reg0, uint32_t reg1,
5554                                                   uint32_t ref, uint32_t mask)
5555 {
5556         int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
5557         struct amdgpu_device *adev = ring->adev;
5558         bool fw_version_ok = (ring->funcs->type == AMDGPU_RING_TYPE_GFX) ?
5559                 adev->gfx.me_fw_write_wait : adev->gfx.mec_fw_write_wait;
5560
5561         if (fw_version_ok)
5562                 gfx_v9_0_wait_reg_mem(ring, usepfp, 0, 1, reg0, reg1,
5563                                       ref, mask, 0x20);
5564         else
5565                 amdgpu_ring_emit_reg_write_reg_wait_helper(ring, reg0, reg1,
5566                                                            ref, mask);
5567 }
5568
5569 static void gfx_v9_0_ring_soft_recovery(struct amdgpu_ring *ring, unsigned vmid)
5570 {
5571         struct amdgpu_device *adev = ring->adev;
5572         uint32_t value = 0;
5573
5574         value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03);
5575         value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01);
5576         value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1);
5577         value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid);
5578         WREG32_SOC15(GC, 0, mmSQ_CMD, value);
5579 }
5580
5581 static void gfx_v9_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
5582                                                  enum amdgpu_interrupt_state state)
5583 {
5584         switch (state) {
5585         case AMDGPU_IRQ_STATE_DISABLE:
5586         case AMDGPU_IRQ_STATE_ENABLE:
5587                 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5588                                TIME_STAMP_INT_ENABLE,
5589                                state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
5590                 break;
5591         default:
5592                 break;
5593         }
5594 }
5595
5596 static void gfx_v9_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
5597                                                      int me, int pipe,
5598                                                      enum amdgpu_interrupt_state state)
5599 {
5600         u32 mec_int_cntl, mec_int_cntl_reg;
5601
5602         /*
5603          * amdgpu controls only the first MEC. That's why this function only
5604          * handles the setting of interrupts for this specific MEC. All other
5605          * pipes' interrupts are set by amdkfd.
5606          */
5607
5608         if (me == 1) {
5609                 switch (pipe) {
5610                 case 0:
5611                         mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE0_INT_CNTL);
5612                         break;
5613                 case 1:
5614                         mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE1_INT_CNTL);
5615                         break;
5616                 case 2:
5617                         mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE2_INT_CNTL);
5618                         break;
5619                 case 3:
5620                         mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE3_INT_CNTL);
5621                         break;
5622                 default:
5623                         DRM_DEBUG("invalid pipe %d\n", pipe);
5624                         return;
5625                 }
5626         } else {
5627                 DRM_DEBUG("invalid me %d\n", me);
5628                 return;
5629         }
5630
5631         switch (state) {
5632         case AMDGPU_IRQ_STATE_DISABLE:
5633                 mec_int_cntl = RREG32(mec_int_cntl_reg);
5634                 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
5635                                              TIME_STAMP_INT_ENABLE, 0);
5636                 WREG32(mec_int_cntl_reg, mec_int_cntl);
5637                 break;
5638         case AMDGPU_IRQ_STATE_ENABLE:
5639                 mec_int_cntl = RREG32(mec_int_cntl_reg);
5640                 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
5641                                              TIME_STAMP_INT_ENABLE, 1);
5642                 WREG32(mec_int_cntl_reg, mec_int_cntl);
5643                 break;
5644         default:
5645                 break;
5646         }
5647 }
5648
5649 static int gfx_v9_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
5650                                              struct amdgpu_irq_src *source,
5651                                              unsigned type,
5652                                              enum amdgpu_interrupt_state state)
5653 {
5654         switch (state) {
5655         case AMDGPU_IRQ_STATE_DISABLE:
5656         case AMDGPU_IRQ_STATE_ENABLE:
5657                 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5658                                PRIV_REG_INT_ENABLE,
5659                                state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
5660                 break;
5661         default:
5662                 break;
5663         }
5664
5665         return 0;
5666 }
5667
5668 static int gfx_v9_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
5669                                               struct amdgpu_irq_src *source,
5670                                               unsigned type,
5671                                               enum amdgpu_interrupt_state state)
5672 {
5673         switch (state) {
5674         case AMDGPU_IRQ_STATE_DISABLE:
5675         case AMDGPU_IRQ_STATE_ENABLE:
5676                 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5677                                PRIV_INSTR_INT_ENABLE,
5678                                state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
5679         default:
5680                 break;
5681         }
5682
5683         return 0;
5684 }
5685
5686 #define ENABLE_ECC_ON_ME_PIPE(me, pipe)                         \
5687         WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\
5688                         CP_ECC_ERROR_INT_ENABLE, 1)
5689
5690 #define DISABLE_ECC_ON_ME_PIPE(me, pipe)                        \
5691         WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\
5692                         CP_ECC_ERROR_INT_ENABLE, 0)
5693
5694 static int gfx_v9_0_set_cp_ecc_error_state(struct amdgpu_device *adev,
5695                                               struct amdgpu_irq_src *source,
5696                                               unsigned type,
5697                                               enum amdgpu_interrupt_state state)
5698 {
5699         switch (state) {
5700         case AMDGPU_IRQ_STATE_DISABLE:
5701                 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5702                                 CP_ECC_ERROR_INT_ENABLE, 0);
5703                 DISABLE_ECC_ON_ME_PIPE(1, 0);
5704                 DISABLE_ECC_ON_ME_PIPE(1, 1);
5705                 DISABLE_ECC_ON_ME_PIPE(1, 2);
5706                 DISABLE_ECC_ON_ME_PIPE(1, 3);
5707                 break;
5708
5709         case AMDGPU_IRQ_STATE_ENABLE:
5710                 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5711                                 CP_ECC_ERROR_INT_ENABLE, 1);
5712                 ENABLE_ECC_ON_ME_PIPE(1, 0);
5713                 ENABLE_ECC_ON_ME_PIPE(1, 1);
5714                 ENABLE_ECC_ON_ME_PIPE(1, 2);
5715                 ENABLE_ECC_ON_ME_PIPE(1, 3);
5716                 break;
5717         default:
5718                 break;
5719         }
5720
5721         return 0;
5722 }
5723
5724
5725 static int gfx_v9_0_set_eop_interrupt_state(struct amdgpu_device *adev,
5726                                             struct amdgpu_irq_src *src,
5727                                             unsigned type,
5728                                             enum amdgpu_interrupt_state state)
5729 {
5730         switch (type) {
5731         case AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP:
5732                 gfx_v9_0_set_gfx_eop_interrupt_state(adev, state);
5733                 break;
5734         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
5735                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
5736                 break;
5737         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
5738                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
5739                 break;
5740         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
5741                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
5742                 break;
5743         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
5744                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
5745                 break;
5746         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
5747                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
5748                 break;
5749         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
5750                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
5751                 break;
5752         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
5753                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
5754                 break;
5755         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
5756                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
5757                 break;
5758         default:
5759                 break;
5760         }
5761         return 0;
5762 }
5763
5764 static int gfx_v9_0_eop_irq(struct amdgpu_device *adev,
5765                             struct amdgpu_irq_src *source,
5766                             struct amdgpu_iv_entry *entry)
5767 {
5768         int i;
5769         u8 me_id, pipe_id, queue_id;
5770         struct amdgpu_ring *ring;
5771
5772         DRM_DEBUG("IH: CP EOP\n");
5773         me_id = (entry->ring_id & 0x0c) >> 2;
5774         pipe_id = (entry->ring_id & 0x03) >> 0;
5775         queue_id = (entry->ring_id & 0x70) >> 4;
5776
5777         switch (me_id) {
5778         case 0:
5779                 amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
5780                 break;
5781         case 1:
5782         case 2:
5783                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5784                         ring = &adev->gfx.compute_ring[i];
5785                         /* Per-queue interrupt is supported for MEC starting from VI.
5786                           * The interrupt can only be enabled/disabled per pipe instead of per queue.
5787                           */
5788                         if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
5789                                 amdgpu_fence_process(ring);
5790                 }
5791                 break;
5792         }
5793         return 0;
5794 }
5795
5796 static void gfx_v9_0_fault(struct amdgpu_device *adev,
5797                            struct amdgpu_iv_entry *entry)
5798 {
5799         u8 me_id, pipe_id, queue_id;
5800         struct amdgpu_ring *ring;
5801         int i;
5802
5803         me_id = (entry->ring_id & 0x0c) >> 2;
5804         pipe_id = (entry->ring_id & 0x03) >> 0;
5805         queue_id = (entry->ring_id & 0x70) >> 4;
5806
5807         switch (me_id) {
5808         case 0:
5809                 drm_sched_fault(&adev->gfx.gfx_ring[0].sched);
5810                 break;
5811         case 1:
5812         case 2:
5813                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5814                         ring = &adev->gfx.compute_ring[i];
5815                         if (ring->me == me_id && ring->pipe == pipe_id &&
5816                             ring->queue == queue_id)
5817                                 drm_sched_fault(&ring->sched);
5818                 }
5819                 break;
5820         }
5821 }
5822
5823 static int gfx_v9_0_priv_reg_irq(struct amdgpu_device *adev,
5824                                  struct amdgpu_irq_src *source,
5825                                  struct amdgpu_iv_entry *entry)
5826 {
5827         DRM_ERROR("Illegal register access in command stream\n");
5828         gfx_v9_0_fault(adev, entry);
5829         return 0;
5830 }
5831
5832 static int gfx_v9_0_priv_inst_irq(struct amdgpu_device *adev,
5833                                   struct amdgpu_irq_src *source,
5834                                   struct amdgpu_iv_entry *entry)
5835 {
5836         DRM_ERROR("Illegal instruction in command stream\n");
5837         gfx_v9_0_fault(adev, entry);
5838         return 0;
5839 }
5840
5841
5842 static const struct soc15_ras_field_entry gfx_v9_0_ras_fields[] = {
5843         { "CPC_SCRATCH", SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT),
5844           SOC15_REG_FIELD(CPC_EDC_SCRATCH_CNT, SEC_COUNT),
5845           SOC15_REG_FIELD(CPC_EDC_SCRATCH_CNT, DED_COUNT)
5846         },
5847         { "CPC_UCODE", SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT),
5848           SOC15_REG_FIELD(CPC_EDC_UCODE_CNT, SEC_COUNT),
5849           SOC15_REG_FIELD(CPC_EDC_UCODE_CNT, DED_COUNT)
5850         },
5851         { "CPF_ROQ_ME1", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT),
5852           SOC15_REG_FIELD(CPF_EDC_ROQ_CNT, COUNT_ME1),
5853           0, 0
5854         },
5855         { "CPF_ROQ_ME2", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT),
5856           SOC15_REG_FIELD(CPF_EDC_ROQ_CNT, COUNT_ME2),
5857           0, 0
5858         },
5859         { "CPF_TAG", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT),
5860           SOC15_REG_FIELD(CPF_EDC_TAG_CNT, SEC_COUNT),
5861           SOC15_REG_FIELD(CPF_EDC_TAG_CNT, DED_COUNT)
5862         },
5863         { "CPG_DMA_ROQ", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT),
5864           SOC15_REG_FIELD(CPG_EDC_DMA_CNT, ROQ_COUNT),
5865           0, 0
5866         },
5867         { "CPG_DMA_TAG", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT),
5868           SOC15_REG_FIELD(CPG_EDC_DMA_CNT, TAG_SEC_COUNT),
5869           SOC15_REG_FIELD(CPG_EDC_DMA_CNT, TAG_DED_COUNT)
5870         },
5871         { "CPG_TAG", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT),
5872           SOC15_REG_FIELD(CPG_EDC_TAG_CNT, SEC_COUNT),
5873           SOC15_REG_FIELD(CPG_EDC_TAG_CNT, DED_COUNT)
5874         },
5875         { "DC_CSINVOC", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT),
5876           SOC15_REG_FIELD(DC_EDC_CSINVOC_CNT, COUNT_ME1),
5877           0, 0
5878         },
5879         { "DC_RESTORE", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT),
5880           SOC15_REG_FIELD(DC_EDC_RESTORE_CNT, COUNT_ME1),
5881           0, 0
5882         },
5883         { "DC_STATE", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT),
5884           SOC15_REG_FIELD(DC_EDC_STATE_CNT, COUNT_ME1),
5885           0, 0
5886         },
5887         { "GDS_MEM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT),
5888           SOC15_REG_FIELD(GDS_EDC_CNT, GDS_MEM_SEC),
5889           SOC15_REG_FIELD(GDS_EDC_CNT, GDS_MEM_DED)
5890         },
5891         { "GDS_INPUT_QUEUE", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT),
5892           SOC15_REG_FIELD(GDS_EDC_CNT, GDS_INPUT_QUEUE_SED),
5893           0, 0
5894         },
5895         { "GDS_ME0_CS_PIPE_MEM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT),
5896           SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, ME0_CS_PIPE_MEM_SEC),
5897           SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, ME0_CS_PIPE_MEM_DED)
5898         },
5899         { "GDS_OA_PHY_PHY_CMD_RAM_MEM",
5900           SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT),
5901           SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_CMD_RAM_MEM_SEC),
5902           SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_CMD_RAM_MEM_DED)
5903         },
5904         { "GDS_OA_PHY_PHY_DATA_RAM_MEM",
5905           SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT),
5906           SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_DATA_RAM_MEM_SED),
5907           0, 0
5908         },
5909         { "GDS_OA_PIPE_ME1_PIPE0_PIPE_MEM",
5910           SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
5911           SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE0_PIPE_MEM_SEC),
5912           SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE0_PIPE_MEM_DED)
5913         },
5914         { "GDS_OA_PIPE_ME1_PIPE1_PIPE_MEM",
5915           SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
5916           SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE1_PIPE_MEM_SEC),
5917           SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE1_PIPE_MEM_DED)
5918         },
5919         { "GDS_OA_PIPE_ME1_PIPE2_PIPE_MEM",
5920           SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
5921           SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE2_PIPE_MEM_SEC),
5922           SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE2_PIPE_MEM_DED)
5923         },
5924         { "GDS_OA_PIPE_ME1_PIPE3_PIPE_MEM",
5925           SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
5926           SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE3_PIPE_MEM_SEC),
5927           SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE3_PIPE_MEM_DED)
5928         },
5929         { "SPI_SR_MEM", SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT),
5930           SOC15_REG_FIELD(SPI_EDC_CNT, SPI_SR_MEM_SED_COUNT),
5931           0, 0
5932         },
5933         { "TA_FS_DFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
5934           SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_DFIFO_SEC_COUNT),
5935           SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_DFIFO_DED_COUNT)
5936         },
5937         { "TA_FS_AFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
5938           SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_AFIFO_SED_COUNT),
5939           0, 0
5940         },
5941         { "TA_FL_LFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
5942           SOC15_REG_FIELD(TA_EDC_CNT, TA_FL_LFIFO_SED_COUNT),
5943           0, 0
5944         },
5945         { "TA_FX_LFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
5946           SOC15_REG_FIELD(TA_EDC_CNT, TA_FX_LFIFO_SED_COUNT),
5947           0, 0
5948         },
5949         { "TA_FS_CFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
5950           SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_CFIFO_SED_COUNT),
5951           0, 0
5952         },
5953         { "TCA_HOLE_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT),
5954           SOC15_REG_FIELD(TCA_EDC_CNT, HOLE_FIFO_SED_COUNT),
5955           0, 0
5956         },
5957         { "TCA_REQ_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT),
5958           SOC15_REG_FIELD(TCA_EDC_CNT, REQ_FIFO_SED_COUNT),
5959           0, 0
5960         },
5961         { "TCC_CACHE_DATA", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5962           SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DATA_SEC_COUNT),
5963           SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DATA_DED_COUNT)
5964         },
5965         { "TCC_CACHE_DIRTY", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5966           SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DIRTY_SEC_COUNT),
5967           SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DIRTY_DED_COUNT)
5968         },
5969         { "TCC_HIGH_RATE_TAG", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5970           SOC15_REG_FIELD(TCC_EDC_CNT, HIGH_RATE_TAG_SEC_COUNT),
5971           SOC15_REG_FIELD(TCC_EDC_CNT, HIGH_RATE_TAG_DED_COUNT)
5972         },
5973         { "TCC_LOW_RATE_TAG", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5974           SOC15_REG_FIELD(TCC_EDC_CNT, LOW_RATE_TAG_SEC_COUNT),
5975           SOC15_REG_FIELD(TCC_EDC_CNT, LOW_RATE_TAG_DED_COUNT)
5976         },
5977         { "TCC_SRC_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5978           SOC15_REG_FIELD(TCC_EDC_CNT, SRC_FIFO_SEC_COUNT),
5979           SOC15_REG_FIELD(TCC_EDC_CNT, SRC_FIFO_DED_COUNT)
5980         },
5981         { "TCC_IN_USE_DEC", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5982           SOC15_REG_FIELD(TCC_EDC_CNT, IN_USE_DEC_SED_COUNT),
5983           0, 0
5984         },
5985         { "TCC_IN_USE_TRANSFER", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5986           SOC15_REG_FIELD(TCC_EDC_CNT, IN_USE_TRANSFER_SED_COUNT),
5987           0, 0
5988         },
5989         { "TCC_LATENCY_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5990           SOC15_REG_FIELD(TCC_EDC_CNT, LATENCY_FIFO_SED_COUNT),
5991           0, 0
5992         },
5993         { "TCC_RETURN_DATA", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5994           SOC15_REG_FIELD(TCC_EDC_CNT, RETURN_DATA_SED_COUNT),
5995           0, 0
5996         },
5997         { "TCC_RETURN_CONTROL", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5998           SOC15_REG_FIELD(TCC_EDC_CNT, RETURN_CONTROL_SED_COUNT),
5999           0, 0
6000         },
6001         { "TCC_UC_ATOMIC_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6002           SOC15_REG_FIELD(TCC_EDC_CNT, UC_ATOMIC_FIFO_SED_COUNT),
6003           0, 0
6004         },
6005         { "TCC_WRITE_RETURN", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6006           SOC15_REG_FIELD(TCC_EDC_CNT2, WRITE_RETURN_SED_COUNT),
6007           0, 0
6008         },
6009         { "TCC_WRITE_CACHE_READ", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6010           SOC15_REG_FIELD(TCC_EDC_CNT2, WRITE_CACHE_READ_SED_COUNT),
6011           0, 0
6012         },
6013         { "TCC_SRC_FIFO_NEXT_RAM", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6014           SOC15_REG_FIELD(TCC_EDC_CNT2, SRC_FIFO_NEXT_RAM_SED_COUNT),
6015           0, 0
6016         },
6017         { "TCC_LATENCY_FIFO_NEXT_RAM", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6018           SOC15_REG_FIELD(TCC_EDC_CNT2, LATENCY_FIFO_NEXT_RAM_SED_COUNT),
6019           0, 0
6020         },
6021         { "TCC_CACHE_TAG_PROBE_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6022           SOC15_REG_FIELD(TCC_EDC_CNT2, CACHE_TAG_PROBE_FIFO_SED_COUNT),
6023           0, 0
6024         },
6025         { "TCC_WRRET_TAG_WRITE_RETURN", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6026           SOC15_REG_FIELD(TCC_EDC_CNT2, WRRET_TAG_WRITE_RETURN_SED_COUNT),
6027           0, 0
6028         },
6029         { "TCC_ATOMIC_RETURN_BUFFER", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6030           SOC15_REG_FIELD(TCC_EDC_CNT2, ATOMIC_RETURN_BUFFER_SED_COUNT),
6031           0, 0
6032         },
6033         { "TCI_WRITE_RAM", SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT),
6034           SOC15_REG_FIELD(TCI_EDC_CNT, WRITE_RAM_SED_COUNT),
6035           0, 0
6036         },
6037         { "TCP_CACHE_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6038           SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CACHE_RAM_SEC_COUNT),
6039           SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CACHE_RAM_DED_COUNT)
6040         },
6041         { "TCP_LFIFO_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6042           SOC15_REG_FIELD(TCP_EDC_CNT_NEW, LFIFO_RAM_SEC_COUNT),
6043           SOC15_REG_FIELD(TCP_EDC_CNT_NEW, LFIFO_RAM_DED_COUNT)
6044         },
6045         { "TCP_CMD_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6046           SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CMD_FIFO_SED_COUNT),
6047           0, 0
6048         },
6049         { "TCP_VM_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6050           SOC15_REG_FIELD(TCP_EDC_CNT_NEW, VM_FIFO_SEC_COUNT),
6051           0, 0
6052         },
6053         { "TCP_DB_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6054           SOC15_REG_FIELD(TCP_EDC_CNT_NEW, DB_RAM_SED_COUNT),
6055           0, 0
6056         },
6057         { "TCP_UTCL1_LFIFO0", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6058           SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO0_SEC_COUNT),
6059           SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO0_DED_COUNT)
6060         },
6061         { "TCP_UTCL1_LFIFO1", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6062           SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO1_SEC_COUNT),
6063           SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO1_DED_COUNT)
6064         },
6065         { "TD_SS_FIFO_LO", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT),
6066           SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_LO_SEC_COUNT),
6067           SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_LO_DED_COUNT)
6068         },
6069         { "TD_SS_FIFO_HI", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT),
6070           SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_HI_SEC_COUNT),
6071           SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_HI_DED_COUNT)
6072         },
6073         { "TD_CS_FIFO", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT),
6074           SOC15_REG_FIELD(TD_EDC_CNT, CS_FIFO_SED_COUNT),
6075           0, 0
6076         },
6077         { "SQ_LDS_D", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6078           SOC15_REG_FIELD(SQ_EDC_CNT, LDS_D_SEC_COUNT),
6079           SOC15_REG_FIELD(SQ_EDC_CNT, LDS_D_DED_COUNT)
6080         },
6081         { "SQ_LDS_I", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6082           SOC15_REG_FIELD(SQ_EDC_CNT, LDS_I_SEC_COUNT),
6083           SOC15_REG_FIELD(SQ_EDC_CNT, LDS_I_DED_COUNT)
6084         },
6085         { "SQ_SGPR", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6086           SOC15_REG_FIELD(SQ_EDC_CNT, SGPR_SEC_COUNT),
6087           SOC15_REG_FIELD(SQ_EDC_CNT, SGPR_DED_COUNT)
6088         },
6089         { "SQ_VGPR0", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6090           SOC15_REG_FIELD(SQ_EDC_CNT, VGPR0_SEC_COUNT),
6091           SOC15_REG_FIELD(SQ_EDC_CNT, VGPR0_DED_COUNT)
6092         },
6093         { "SQ_VGPR1", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6094           SOC15_REG_FIELD(SQ_EDC_CNT, VGPR1_SEC_COUNT),
6095           SOC15_REG_FIELD(SQ_EDC_CNT, VGPR1_DED_COUNT)
6096         },
6097         { "SQ_VGPR2", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6098           SOC15_REG_FIELD(SQ_EDC_CNT, VGPR2_SEC_COUNT),
6099           SOC15_REG_FIELD(SQ_EDC_CNT, VGPR2_DED_COUNT)
6100         },
6101         { "SQ_VGPR3", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6102           SOC15_REG_FIELD(SQ_EDC_CNT, VGPR3_SEC_COUNT),
6103           SOC15_REG_FIELD(SQ_EDC_CNT, VGPR3_DED_COUNT)
6104         },
6105         { "SQC_DATA_CU0_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6106           SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_WRITE_DATA_BUF_SEC_COUNT),
6107           SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_WRITE_DATA_BUF_DED_COUNT)
6108         },
6109         { "SQC_DATA_CU0_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6110           SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_UTCL1_LFIFO_SEC_COUNT),
6111           SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_UTCL1_LFIFO_DED_COUNT)
6112         },
6113         { "SQC_DATA_CU1_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6114           SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_WRITE_DATA_BUF_SEC_COUNT),
6115           SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_WRITE_DATA_BUF_DED_COUNT)
6116         },
6117         { "SQC_DATA_CU1_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6118           SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_UTCL1_LFIFO_SEC_COUNT),
6119           SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_UTCL1_LFIFO_DED_COUNT)
6120         },
6121         { "SQC_DATA_CU2_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6122           SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_WRITE_DATA_BUF_SEC_COUNT),
6123           SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_WRITE_DATA_BUF_DED_COUNT)
6124         },
6125         { "SQC_DATA_CU2_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6126           SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_UTCL1_LFIFO_SEC_COUNT),
6127           SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_UTCL1_LFIFO_DED_COUNT)
6128         },
6129         { "SQC_INST_BANKA_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6130           SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_TAG_RAM_SEC_COUNT),
6131           SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_TAG_RAM_DED_COUNT)
6132         },
6133         { "SQC_INST_BANKA_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6134           SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_BANK_RAM_SEC_COUNT),
6135           SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_BANK_RAM_DED_COUNT)
6136         },
6137         { "SQC_DATA_BANKA_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6138           SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_TAG_RAM_SEC_COUNT),
6139           SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_TAG_RAM_DED_COUNT)
6140         },
6141         { "SQC_DATA_BANKA_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6142           SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_BANK_RAM_SEC_COUNT),
6143           SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_BANK_RAM_DED_COUNT)
6144         },
6145         { "SQC_INST_BANKA_UTCL1_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6146           SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_UTCL1_MISS_FIFO_SED_COUNT),
6147           0, 0
6148         },
6149         { "SQC_INST_BANKA_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6150           SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_MISS_FIFO_SED_COUNT),
6151           0, 0
6152         },
6153         { "SQC_DATA_BANKA_HIT_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6154           SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_HIT_FIFO_SED_COUNT),
6155           0, 0
6156         },
6157         { "SQC_DATA_BANKA_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6158           SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_MISS_FIFO_SED_COUNT),
6159           0, 0
6160         },
6161         { "SQC_DATA_BANKA_DIRTY_BIT_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6162           SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_DIRTY_BIT_RAM_SED_COUNT),
6163           0, 0
6164         },
6165         { "SQC_INST_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6166           SOC15_REG_FIELD(SQC_EDC_CNT2, INST_UTCL1_LFIFO_SEC_COUNT),
6167           SOC15_REG_FIELD(SQC_EDC_CNT2, INST_UTCL1_LFIFO_DED_COUNT)
6168         },
6169         { "SQC_INST_BANKB_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6170           SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_TAG_RAM_SEC_COUNT),
6171           SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_TAG_RAM_DED_COUNT)
6172         },
6173         { "SQC_INST_BANKB_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6174           SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_BANK_RAM_SEC_COUNT),
6175           SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_BANK_RAM_DED_COUNT)
6176         },
6177         { "SQC_DATA_BANKB_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6178           SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_TAG_RAM_SEC_COUNT),
6179           SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_TAG_RAM_DED_COUNT)
6180         },
6181         { "SQC_DATA_BANKB_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6182           SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_BANK_RAM_SEC_COUNT),
6183           SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_BANK_RAM_DED_COUNT)
6184         },
6185         { "SQC_INST_BANKB_UTCL1_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6186           SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_UTCL1_MISS_FIFO_SED_COUNT),
6187           0, 0
6188         },
6189         { "SQC_INST_BANKB_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6190           SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_MISS_FIFO_SED_COUNT),
6191           0, 0
6192         },
6193         { "SQC_DATA_BANKB_HIT_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6194           SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_HIT_FIFO_SED_COUNT),
6195           0, 0
6196         },
6197         { "SQC_DATA_BANKB_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6198           SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_MISS_FIFO_SED_COUNT),
6199           0, 0
6200         },
6201         { "SQC_DATA_BANKB_DIRTY_BIT_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6202           SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_DIRTY_BIT_RAM_SED_COUNT),
6203           0, 0
6204         },
6205         { "EA_DRAMRD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6206           SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_CMDMEM_SEC_COUNT),
6207           SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_CMDMEM_DED_COUNT)
6208         },
6209         { "EA_DRAMWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6210           SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_CMDMEM_SEC_COUNT),
6211           SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_CMDMEM_DED_COUNT)
6212         },
6213         { "EA_DRAMWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6214           SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_DATAMEM_SEC_COUNT),
6215           SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_DATAMEM_DED_COUNT)
6216         },
6217         { "EA_RRET_TAGMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6218           SOC15_REG_FIELD(GCEA_EDC_CNT, RRET_TAGMEM_SEC_COUNT),
6219           SOC15_REG_FIELD(GCEA_EDC_CNT, RRET_TAGMEM_DED_COUNT)
6220         },
6221         { "EA_WRET_TAGMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6222           SOC15_REG_FIELD(GCEA_EDC_CNT, WRET_TAGMEM_SEC_COUNT),
6223           SOC15_REG_FIELD(GCEA_EDC_CNT, WRET_TAGMEM_DED_COUNT)
6224         },
6225         { "EA_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6226           SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_PAGEMEM_SED_COUNT),
6227           0, 0
6228         },
6229         { "EA_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6230           SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_PAGEMEM_SED_COUNT),
6231           0, 0
6232         },
6233         { "EA_IORD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6234           SOC15_REG_FIELD(GCEA_EDC_CNT, IORD_CMDMEM_SED_COUNT),
6235           0, 0
6236         },
6237         { "EA_IOWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6238           SOC15_REG_FIELD(GCEA_EDC_CNT, IOWR_CMDMEM_SED_COUNT),
6239           0, 0
6240         },
6241         { "EA_IOWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6242           SOC15_REG_FIELD(GCEA_EDC_CNT, IOWR_DATAMEM_SED_COUNT),
6243           0, 0
6244         },
6245         { "GMIRD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6246           SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_CMDMEM_SEC_COUNT),
6247           SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_CMDMEM_DED_COUNT)
6248         },
6249         { "GMIWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6250           SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_CMDMEM_SEC_COUNT),
6251           SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_CMDMEM_DED_COUNT)
6252         },
6253         { "GMIWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6254           SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_DATAMEM_SEC_COUNT),
6255           SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_DATAMEM_DED_COUNT)
6256         },
6257         { "GMIRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6258           SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_PAGEMEM_SED_COUNT),
6259           0, 0
6260         },
6261         { "GMIWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6262           SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_PAGEMEM_SED_COUNT),
6263           0, 0
6264         },
6265         { "MAM_D0MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6266           SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D0MEM_SED_COUNT),
6267           0, 0
6268         },
6269         { "MAM_D1MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6270           SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D1MEM_SED_COUNT),
6271           0, 0
6272         },
6273         { "MAM_D2MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6274           SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D2MEM_SED_COUNT),
6275           0, 0
6276         },
6277         { "MAM_D3MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6278           SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D3MEM_SED_COUNT),
6279           0, 0
6280         }
6281 };
6282
6283 static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev,
6284                                      void *inject_if)
6285 {
6286         struct ras_inject_if *info = (struct ras_inject_if *)inject_if;
6287         int ret;
6288         struct ta_ras_trigger_error_input block_info = { 0 };
6289
6290         if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
6291                 return -EINVAL;
6292
6293         if (info->head.sub_block_index >= ARRAY_SIZE(ras_gfx_subblocks))
6294                 return -EINVAL;
6295
6296         if (!ras_gfx_subblocks[info->head.sub_block_index].name)
6297                 return -EPERM;
6298
6299         if (!(ras_gfx_subblocks[info->head.sub_block_index].hw_supported_error_type &
6300               info->head.type)) {
6301                 DRM_ERROR("GFX Subblock %s, hardware do not support type 0x%x\n",
6302                         ras_gfx_subblocks[info->head.sub_block_index].name,
6303                         info->head.type);
6304                 return -EPERM;
6305         }
6306
6307         if (!(ras_gfx_subblocks[info->head.sub_block_index].sw_supported_error_type &
6308               info->head.type)) {
6309                 DRM_ERROR("GFX Subblock %s, driver do not support type 0x%x\n",
6310                         ras_gfx_subblocks[info->head.sub_block_index].name,
6311                         info->head.type);
6312                 return -EPERM;
6313         }
6314
6315         block_info.block_id = amdgpu_ras_block_to_ta(info->head.block);
6316         block_info.sub_block_index =
6317                 ras_gfx_subblocks[info->head.sub_block_index].ta_subblock;
6318         block_info.inject_error_type = amdgpu_ras_error_to_ta(info->head.type);
6319         block_info.address = info->address;
6320         block_info.value = info->value;
6321
6322         mutex_lock(&adev->grbm_idx_mutex);
6323         ret = psp_ras_trigger_error(&adev->psp, &block_info);
6324         mutex_unlock(&adev->grbm_idx_mutex);
6325
6326         return ret;
6327 }
6328
6329 static const char *vml2_mems[] = {
6330         "UTC_VML2_BANK_CACHE_0_BIGK_MEM0",
6331         "UTC_VML2_BANK_CACHE_0_BIGK_MEM1",
6332         "UTC_VML2_BANK_CACHE_0_4K_MEM0",
6333         "UTC_VML2_BANK_CACHE_0_4K_MEM1",
6334         "UTC_VML2_BANK_CACHE_1_BIGK_MEM0",
6335         "UTC_VML2_BANK_CACHE_1_BIGK_MEM1",
6336         "UTC_VML2_BANK_CACHE_1_4K_MEM0",
6337         "UTC_VML2_BANK_CACHE_1_4K_MEM1",
6338         "UTC_VML2_BANK_CACHE_2_BIGK_MEM0",
6339         "UTC_VML2_BANK_CACHE_2_BIGK_MEM1",
6340         "UTC_VML2_BANK_CACHE_2_4K_MEM0",
6341         "UTC_VML2_BANK_CACHE_2_4K_MEM1",
6342         "UTC_VML2_BANK_CACHE_3_BIGK_MEM0",
6343         "UTC_VML2_BANK_CACHE_3_BIGK_MEM1",
6344         "UTC_VML2_BANK_CACHE_3_4K_MEM0",
6345         "UTC_VML2_BANK_CACHE_3_4K_MEM1",
6346 };
6347
6348 static const char *vml2_walker_mems[] = {
6349         "UTC_VML2_CACHE_PDE0_MEM0",
6350         "UTC_VML2_CACHE_PDE0_MEM1",
6351         "UTC_VML2_CACHE_PDE1_MEM0",
6352         "UTC_VML2_CACHE_PDE1_MEM1",
6353         "UTC_VML2_CACHE_PDE2_MEM0",
6354         "UTC_VML2_CACHE_PDE2_MEM1",
6355         "UTC_VML2_RDIF_LOG_FIFO",
6356 };
6357
6358 static const char *atc_l2_cache_2m_mems[] = {
6359         "UTC_ATCL2_CACHE_2M_BANK0_WAY0_MEM",
6360         "UTC_ATCL2_CACHE_2M_BANK0_WAY1_MEM",
6361         "UTC_ATCL2_CACHE_2M_BANK1_WAY0_MEM",
6362         "UTC_ATCL2_CACHE_2M_BANK1_WAY1_MEM",
6363 };
6364
6365 static const char *atc_l2_cache_4k_mems[] = {
6366         "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM0",
6367         "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM1",
6368         "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM2",
6369         "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM3",
6370         "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM4",
6371         "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM5",
6372         "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM6",
6373         "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM7",
6374         "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM0",
6375         "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM1",
6376         "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM2",
6377         "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM3",
6378         "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM4",
6379         "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM5",
6380         "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM6",
6381         "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM7",
6382         "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM0",
6383         "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM1",
6384         "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM2",
6385         "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM3",
6386         "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM4",
6387         "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM5",
6388         "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM6",
6389         "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM7",
6390         "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM0",
6391         "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM1",
6392         "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM2",
6393         "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM3",
6394         "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM4",
6395         "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM5",
6396         "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM6",
6397         "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM7",
6398 };
6399
6400 static int gfx_v9_0_query_utc_edc_status(struct amdgpu_device *adev,
6401                                          struct ras_err_data *err_data)
6402 {
6403         uint32_t i, data;
6404         uint32_t sec_count, ded_count;
6405
6406         WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
6407         WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT, 0);
6408         WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
6409         WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT, 0);
6410         WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
6411         WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT, 0);
6412         WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
6413         WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT, 0);
6414
6415         for (i = 0; i < ARRAY_SIZE(vml2_mems); i++) {
6416                 WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, i);
6417                 data = RREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT);
6418
6419                 sec_count = REG_GET_FIELD(data, VM_L2_MEM_ECC_CNT, SEC_COUNT);
6420                 if (sec_count) {
6421                         dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6422                                 "SEC %d\n", i, vml2_mems[i], sec_count);
6423                         err_data->ce_count += sec_count;
6424                 }
6425
6426                 ded_count = REG_GET_FIELD(data, VM_L2_MEM_ECC_CNT, DED_COUNT);
6427                 if (ded_count) {
6428                         dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6429                                 "DED %d\n", i, vml2_mems[i], ded_count);
6430                         err_data->ue_count += ded_count;
6431                 }
6432         }
6433
6434         for (i = 0; i < ARRAY_SIZE(vml2_walker_mems); i++) {
6435                 WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, i);
6436                 data = RREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT);
6437
6438                 sec_count = REG_GET_FIELD(data, VM_L2_WALKER_MEM_ECC_CNT,
6439                                                 SEC_COUNT);
6440                 if (sec_count) {
6441                         dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6442                                 "SEC %d\n", i, vml2_walker_mems[i], sec_count);
6443                         err_data->ce_count += sec_count;
6444                 }
6445
6446                 ded_count = REG_GET_FIELD(data, VM_L2_WALKER_MEM_ECC_CNT,
6447                                                 DED_COUNT);
6448                 if (ded_count) {
6449                         dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6450                                 "DED %d\n", i, vml2_walker_mems[i], ded_count);
6451                         err_data->ue_count += ded_count;
6452                 }
6453         }
6454
6455         for (i = 0; i < ARRAY_SIZE(atc_l2_cache_2m_mems); i++) {
6456                 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, i);
6457                 data = RREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT);
6458
6459                 sec_count = (data & 0x00006000L) >> 0xd;
6460                 if (sec_count) {
6461                         dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6462                                 "SEC %d\n", i, atc_l2_cache_2m_mems[i],
6463                                 sec_count);
6464                         err_data->ce_count += sec_count;
6465                 }
6466         }
6467
6468         for (i = 0; i < ARRAY_SIZE(atc_l2_cache_4k_mems); i++) {
6469                 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, i);
6470                 data = RREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT);
6471
6472                 sec_count = (data & 0x00006000L) >> 0xd;
6473                 if (sec_count) {
6474                         dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6475                                 "SEC %d\n", i, atc_l2_cache_4k_mems[i],
6476                                 sec_count);
6477                         err_data->ce_count += sec_count;
6478                 }
6479
6480                 ded_count = (data & 0x00018000L) >> 0xf;
6481                 if (ded_count) {
6482                         dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6483                                 "DED %d\n", i, atc_l2_cache_4k_mems[i],
6484                                 ded_count);
6485                         err_data->ue_count += ded_count;
6486                 }
6487         }
6488
6489         WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
6490         WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
6491         WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
6492         WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
6493
6494         return 0;
6495 }
6496
6497 static int gfx_v9_0_ras_error_count(struct amdgpu_device *adev,
6498         const struct soc15_reg_entry *reg,
6499         uint32_t se_id, uint32_t inst_id, uint32_t value,
6500         uint32_t *sec_count, uint32_t *ded_count)
6501 {
6502         uint32_t i;
6503         uint32_t sec_cnt, ded_cnt;
6504
6505         for (i = 0; i < ARRAY_SIZE(gfx_v9_0_ras_fields); i++) {
6506                 if(gfx_v9_0_ras_fields[i].reg_offset != reg->reg_offset ||
6507                         gfx_v9_0_ras_fields[i].seg != reg->seg ||
6508                         gfx_v9_0_ras_fields[i].inst != reg->inst)
6509                         continue;
6510
6511                 sec_cnt = (value &
6512                                 gfx_v9_0_ras_fields[i].sec_count_mask) >>
6513                                 gfx_v9_0_ras_fields[i].sec_count_shift;
6514                 if (sec_cnt) {
6515                         dev_info(adev->dev, "GFX SubBlock %s, "
6516                                 "Instance[%d][%d], SEC %d\n",
6517                                 gfx_v9_0_ras_fields[i].name,
6518                                 se_id, inst_id,
6519                                 sec_cnt);
6520                         *sec_count += sec_cnt;
6521                 }
6522
6523                 ded_cnt = (value &
6524                                 gfx_v9_0_ras_fields[i].ded_count_mask) >>
6525                                 gfx_v9_0_ras_fields[i].ded_count_shift;
6526                 if (ded_cnt) {
6527                         dev_info(adev->dev, "GFX SubBlock %s, "
6528                                 "Instance[%d][%d], DED %d\n",
6529                                 gfx_v9_0_ras_fields[i].name,
6530                                 se_id, inst_id,
6531                                 ded_cnt);
6532                         *ded_count += ded_cnt;
6533                 }
6534         }
6535
6536         return 0;
6537 }
6538
6539 static void gfx_v9_0_reset_ras_error_count(struct amdgpu_device *adev)
6540 {
6541         int i, j, k;
6542
6543         if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
6544                 return;
6545
6546         /* read back registers to clear the counters */
6547         mutex_lock(&adev->grbm_idx_mutex);
6548         for (i = 0; i < ARRAY_SIZE(gfx_v9_0_edc_counter_regs); i++) {
6549                 for (j = 0; j < gfx_v9_0_edc_counter_regs[i].se_num; j++) {
6550                         for (k = 0; k < gfx_v9_0_edc_counter_regs[i].instance; k++) {
6551                                 gfx_v9_0_select_se_sh(adev, j, 0x0, k);
6552                                 RREG32(SOC15_REG_ENTRY_OFFSET(gfx_v9_0_edc_counter_regs[i]));
6553                         }
6554                 }
6555         }
6556         WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, 0xe0000000);
6557         mutex_unlock(&adev->grbm_idx_mutex);
6558
6559         WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
6560         WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT, 0);
6561         WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
6562         WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT, 0);
6563         WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
6564         WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT, 0);
6565         WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
6566         WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT, 0);
6567
6568         for (i = 0; i < ARRAY_SIZE(vml2_mems); i++) {
6569                 WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, i);
6570                 RREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT);
6571         }
6572
6573         for (i = 0; i < ARRAY_SIZE(vml2_walker_mems); i++) {
6574                 WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, i);
6575                 RREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT);
6576         }
6577
6578         for (i = 0; i < ARRAY_SIZE(atc_l2_cache_2m_mems); i++) {
6579                 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, i);
6580                 RREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT);
6581         }
6582
6583         for (i = 0; i < ARRAY_SIZE(atc_l2_cache_4k_mems); i++) {
6584                 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, i);
6585                 RREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT);
6586         }
6587
6588         WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
6589         WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
6590         WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
6591         WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
6592 }
6593
6594 static int gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev,
6595                                           void *ras_error_status)
6596 {
6597         struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
6598         uint32_t sec_count = 0, ded_count = 0;
6599         uint32_t i, j, k;
6600         uint32_t reg_value;
6601
6602         if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
6603                 return -EINVAL;
6604
6605         err_data->ue_count = 0;
6606         err_data->ce_count = 0;
6607
6608         mutex_lock(&adev->grbm_idx_mutex);
6609
6610         for (i = 0; i < ARRAY_SIZE(gfx_v9_0_edc_counter_regs); i++) {
6611                 for (j = 0; j < gfx_v9_0_edc_counter_regs[i].se_num; j++) {
6612                         for (k = 0; k < gfx_v9_0_edc_counter_regs[i].instance; k++) {
6613                                 gfx_v9_0_select_se_sh(adev, j, 0, k);
6614                                 reg_value =
6615                                         RREG32(SOC15_REG_ENTRY_OFFSET(gfx_v9_0_edc_counter_regs[i]));
6616                                 if (reg_value)
6617                                         gfx_v9_0_ras_error_count(adev,
6618                                                 &gfx_v9_0_edc_counter_regs[i],
6619                                                 j, k, reg_value,
6620                                                 &sec_count, &ded_count);
6621                         }
6622                 }
6623         }
6624
6625         err_data->ce_count += sec_count;
6626         err_data->ue_count += ded_count;
6627
6628         gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
6629         mutex_unlock(&adev->grbm_idx_mutex);
6630
6631         gfx_v9_0_query_utc_edc_status(adev, err_data);
6632
6633         return 0;
6634 }
6635
6636 static void gfx_v9_0_emit_mem_sync(struct amdgpu_ring *ring)
6637 {
6638         const unsigned int cp_coher_cntl =
6639                         PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_SH_ICACHE_ACTION_ENA(1) |
6640                         PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_SH_KCACHE_ACTION_ENA(1) |
6641                         PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TC_ACTION_ENA(1) |
6642                         PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TCL1_ACTION_ENA(1) |
6643                         PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TC_WB_ACTION_ENA(1);
6644
6645         /* ACQUIRE_MEM -make one or more surfaces valid for use by the subsequent operations */
6646         amdgpu_ring_write(ring, PACKET3(PACKET3_ACQUIRE_MEM, 5));
6647         amdgpu_ring_write(ring, cp_coher_cntl); /* CP_COHER_CNTL */
6648         amdgpu_ring_write(ring, 0xffffffff);  /* CP_COHER_SIZE */
6649         amdgpu_ring_write(ring, 0xffffff);  /* CP_COHER_SIZE_HI */
6650         amdgpu_ring_write(ring, 0); /* CP_COHER_BASE */
6651         amdgpu_ring_write(ring, 0);  /* CP_COHER_BASE_HI */
6652         amdgpu_ring_write(ring, 0x0000000A); /* POLL_INTERVAL */
6653 }
6654
6655 static const struct amd_ip_funcs gfx_v9_0_ip_funcs = {
6656         .name = "gfx_v9_0",
6657         .early_init = gfx_v9_0_early_init,
6658         .late_init = gfx_v9_0_late_init,
6659         .sw_init = gfx_v9_0_sw_init,
6660         .sw_fini = gfx_v9_0_sw_fini,
6661         .hw_init = gfx_v9_0_hw_init,
6662         .hw_fini = gfx_v9_0_hw_fini,
6663         .suspend = gfx_v9_0_suspend,
6664         .resume = gfx_v9_0_resume,
6665         .is_idle = gfx_v9_0_is_idle,
6666         .wait_for_idle = gfx_v9_0_wait_for_idle,
6667         .soft_reset = gfx_v9_0_soft_reset,
6668         .set_clockgating_state = gfx_v9_0_set_clockgating_state,
6669         .set_powergating_state = gfx_v9_0_set_powergating_state,
6670         .get_clockgating_state = gfx_v9_0_get_clockgating_state,
6671 };
6672
6673 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = {
6674         .type = AMDGPU_RING_TYPE_GFX,
6675         .align_mask = 0xff,
6676         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6677         .support_64bit_ptrs = true,
6678         .vmhub = AMDGPU_GFXHUB_0,
6679         .get_rptr = gfx_v9_0_ring_get_rptr_gfx,
6680         .get_wptr = gfx_v9_0_ring_get_wptr_gfx,
6681         .set_wptr = gfx_v9_0_ring_set_wptr_gfx,
6682         .emit_frame_size = /* totally 242 maximum if 16 IBs */
6683                 5 +  /* COND_EXEC */
6684                 7 +  /* PIPELINE_SYNC */
6685                 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
6686                 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
6687                 2 + /* VM_FLUSH */
6688                 8 +  /* FENCE for VM_FLUSH */
6689                 20 + /* GDS switch */
6690                 4 + /* double SWITCH_BUFFER,
6691                        the first COND_EXEC jump to the place just
6692                            prior to this double SWITCH_BUFFER  */
6693                 5 + /* COND_EXEC */
6694                 7 +      /*     HDP_flush */
6695                 4 +      /*     VGT_flush */
6696                 14 + /* CE_META */
6697                 31 + /* DE_META */
6698                 3 + /* CNTX_CTRL */
6699                 5 + /* HDP_INVL */
6700                 8 + 8 + /* FENCE x2 */
6701                 2 + /* SWITCH_BUFFER */
6702                 7, /* gfx_v9_0_emit_mem_sync */
6703         .emit_ib_size = 4, /* gfx_v9_0_ring_emit_ib_gfx */
6704         .emit_ib = gfx_v9_0_ring_emit_ib_gfx,
6705         .emit_fence = gfx_v9_0_ring_emit_fence,
6706         .emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
6707         .emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
6708         .emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
6709         .emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
6710         .test_ring = gfx_v9_0_ring_test_ring,
6711         .test_ib = gfx_v9_0_ring_test_ib,
6712         .insert_nop = amdgpu_ring_insert_nop,
6713         .pad_ib = amdgpu_ring_generic_pad_ib,
6714         .emit_switch_buffer = gfx_v9_ring_emit_sb,
6715         .emit_cntxcntl = gfx_v9_ring_emit_cntxcntl,
6716         .init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec,
6717         .patch_cond_exec = gfx_v9_0_ring_emit_patch_cond_exec,
6718         .emit_frame_cntl = gfx_v9_0_ring_emit_frame_cntl,
6719         .emit_wreg = gfx_v9_0_ring_emit_wreg,
6720         .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
6721         .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
6722         .soft_recovery = gfx_v9_0_ring_soft_recovery,
6723         .emit_mem_sync = gfx_v9_0_emit_mem_sync,
6724 };
6725
6726 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = {
6727         .type = AMDGPU_RING_TYPE_COMPUTE,
6728         .align_mask = 0xff,
6729         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6730         .support_64bit_ptrs = true,
6731         .vmhub = AMDGPU_GFXHUB_0,
6732         .get_rptr = gfx_v9_0_ring_get_rptr_compute,
6733         .get_wptr = gfx_v9_0_ring_get_wptr_compute,
6734         .set_wptr = gfx_v9_0_ring_set_wptr_compute,
6735         .emit_frame_size =
6736                 20 + /* gfx_v9_0_ring_emit_gds_switch */
6737                 7 + /* gfx_v9_0_ring_emit_hdp_flush */
6738                 5 + /* hdp invalidate */
6739                 7 + /* gfx_v9_0_ring_emit_pipeline_sync */
6740                 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
6741                 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
6742                 2 + /* gfx_v9_0_ring_emit_vm_flush */
6743                 8 + 8 + 8 + /* gfx_v9_0_ring_emit_fence x3 for user fence, vm fence */
6744                 7, /* gfx_v9_0_emit_mem_sync */
6745         .emit_ib_size = 7, /* gfx_v9_0_ring_emit_ib_compute */
6746         .emit_ib = gfx_v9_0_ring_emit_ib_compute,
6747         .emit_fence = gfx_v9_0_ring_emit_fence,
6748         .emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
6749         .emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
6750         .emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
6751         .emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
6752         .test_ring = gfx_v9_0_ring_test_ring,
6753         .test_ib = gfx_v9_0_ring_test_ib,
6754         .insert_nop = amdgpu_ring_insert_nop,
6755         .pad_ib = amdgpu_ring_generic_pad_ib,
6756         .emit_wreg = gfx_v9_0_ring_emit_wreg,
6757         .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
6758         .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
6759         .emit_mem_sync = gfx_v9_0_emit_mem_sync,
6760 };
6761
6762 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = {
6763         .type = AMDGPU_RING_TYPE_KIQ,
6764         .align_mask = 0xff,
6765         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6766         .support_64bit_ptrs = true,
6767         .vmhub = AMDGPU_GFXHUB_0,
6768         .get_rptr = gfx_v9_0_ring_get_rptr_compute,
6769         .get_wptr = gfx_v9_0_ring_get_wptr_compute,
6770         .set_wptr = gfx_v9_0_ring_set_wptr_compute,
6771         .emit_frame_size =
6772                 20 + /* gfx_v9_0_ring_emit_gds_switch */
6773                 7 + /* gfx_v9_0_ring_emit_hdp_flush */
6774                 5 + /* hdp invalidate */
6775                 7 + /* gfx_v9_0_ring_emit_pipeline_sync */
6776                 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
6777                 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
6778                 2 + /* gfx_v9_0_ring_emit_vm_flush */
6779                 8 + 8 + 8, /* gfx_v9_0_ring_emit_fence_kiq x3 for user fence, vm fence */
6780         .emit_ib_size = 7, /* gfx_v9_0_ring_emit_ib_compute */
6781         .emit_fence = gfx_v9_0_ring_emit_fence_kiq,
6782         .test_ring = gfx_v9_0_ring_test_ring,
6783         .insert_nop = amdgpu_ring_insert_nop,
6784         .pad_ib = amdgpu_ring_generic_pad_ib,
6785         .emit_rreg = gfx_v9_0_ring_emit_rreg,
6786         .emit_wreg = gfx_v9_0_ring_emit_wreg,
6787         .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
6788         .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
6789 };
6790
6791 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev)
6792 {
6793         int i;
6794
6795         adev->gfx.kiq.ring.funcs = &gfx_v9_0_ring_funcs_kiq;
6796
6797         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
6798                 adev->gfx.gfx_ring[i].funcs = &gfx_v9_0_ring_funcs_gfx;
6799
6800         for (i = 0; i < adev->gfx.num_compute_rings; i++)
6801                 adev->gfx.compute_ring[i].funcs = &gfx_v9_0_ring_funcs_compute;
6802 }
6803
6804 static const struct amdgpu_irq_src_funcs gfx_v9_0_eop_irq_funcs = {
6805         .set = gfx_v9_0_set_eop_interrupt_state,
6806         .process = gfx_v9_0_eop_irq,
6807 };
6808
6809 static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_reg_irq_funcs = {
6810         .set = gfx_v9_0_set_priv_reg_fault_state,
6811         .process = gfx_v9_0_priv_reg_irq,
6812 };
6813
6814 static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_inst_irq_funcs = {
6815         .set = gfx_v9_0_set_priv_inst_fault_state,
6816         .process = gfx_v9_0_priv_inst_irq,
6817 };
6818
6819 static const struct amdgpu_irq_src_funcs gfx_v9_0_cp_ecc_error_irq_funcs = {
6820         .set = gfx_v9_0_set_cp_ecc_error_state,
6821         .process = amdgpu_gfx_cp_ecc_error_irq,
6822 };
6823
6824
6825 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev)
6826 {
6827         adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
6828         adev->gfx.eop_irq.funcs = &gfx_v9_0_eop_irq_funcs;
6829
6830         adev->gfx.priv_reg_irq.num_types = 1;
6831         adev->gfx.priv_reg_irq.funcs = &gfx_v9_0_priv_reg_irq_funcs;
6832
6833         adev->gfx.priv_inst_irq.num_types = 1;
6834         adev->gfx.priv_inst_irq.funcs = &gfx_v9_0_priv_inst_irq_funcs;
6835
6836         adev->gfx.cp_ecc_error_irq.num_types = 2; /*C5 ECC error and C9 FUE error*/
6837         adev->gfx.cp_ecc_error_irq.funcs = &gfx_v9_0_cp_ecc_error_irq_funcs;
6838 }
6839
6840 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev)
6841 {
6842         switch (adev->asic_type) {
6843         case CHIP_VEGA10:
6844         case CHIP_VEGA12:
6845         case CHIP_VEGA20:
6846         case CHIP_RAVEN:
6847         case CHIP_ARCTURUS:
6848         case CHIP_RENOIR:
6849                 adev->gfx.rlc.funcs = &gfx_v9_0_rlc_funcs;
6850                 break;
6851         default:
6852                 break;
6853         }
6854 }
6855
6856 static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev)
6857 {
6858         /* init asci gds info */
6859         switch (adev->asic_type) {
6860         case CHIP_VEGA10:
6861         case CHIP_VEGA12:
6862         case CHIP_VEGA20:
6863                 adev->gds.gds_size = 0x10000;
6864                 break;
6865         case CHIP_RAVEN:
6866         case CHIP_ARCTURUS:
6867                 adev->gds.gds_size = 0x1000;
6868                 break;
6869         default:
6870                 adev->gds.gds_size = 0x10000;
6871                 break;
6872         }
6873
6874         switch (adev->asic_type) {
6875         case CHIP_VEGA10:
6876         case CHIP_VEGA20:
6877                 adev->gds.gds_compute_max_wave_id = 0x7ff;
6878                 break;
6879         case CHIP_VEGA12:
6880                 adev->gds.gds_compute_max_wave_id = 0x27f;
6881                 break;
6882         case CHIP_RAVEN:
6883                 if (adev->apu_flags & AMD_APU_IS_RAVEN2)
6884                         adev->gds.gds_compute_max_wave_id = 0x77; /* raven2 */
6885                 else
6886                         adev->gds.gds_compute_max_wave_id = 0x15f; /* raven1 */
6887                 break;
6888         case CHIP_ARCTURUS:
6889                 adev->gds.gds_compute_max_wave_id = 0xfff;
6890                 break;
6891         default:
6892                 /* this really depends on the chip */
6893                 adev->gds.gds_compute_max_wave_id = 0x7ff;
6894                 break;
6895         }
6896
6897         adev->gds.gws_size = 64;
6898         adev->gds.oa_size = 16;
6899 }
6900
6901 static void gfx_v9_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
6902                                                  u32 bitmap)
6903 {
6904         u32 data;
6905
6906         if (!bitmap)
6907                 return;
6908
6909         data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
6910         data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
6911
6912         WREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG, data);
6913 }
6914
6915 static u32 gfx_v9_0_get_cu_active_bitmap(struct amdgpu_device *adev)
6916 {
6917         u32 data, mask;
6918
6919         data = RREG32_SOC15(GC, 0, mmCC_GC_SHADER_ARRAY_CONFIG);
6920         data |= RREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG);
6921
6922         data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
6923         data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
6924
6925         mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh);
6926
6927         return (~data) & mask;
6928 }
6929
6930 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
6931                                  struct amdgpu_cu_info *cu_info)
6932 {
6933         int i, j, k, counter, active_cu_number = 0;
6934         u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
6935         unsigned disable_masks[4 * 4];
6936
6937         if (!adev || !cu_info)
6938                 return -EINVAL;
6939
6940         /*
6941          * 16 comes from bitmap array size 4*4, and it can cover all gfx9 ASICs
6942          */
6943         if (adev->gfx.config.max_shader_engines *
6944                 adev->gfx.config.max_sh_per_se > 16)
6945                 return -EINVAL;
6946
6947         amdgpu_gfx_parse_disable_cu(disable_masks,
6948                                     adev->gfx.config.max_shader_engines,
6949                                     adev->gfx.config.max_sh_per_se);
6950
6951         mutex_lock(&adev->grbm_idx_mutex);
6952         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
6953                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
6954                         mask = 1;
6955                         ao_bitmap = 0;
6956                         counter = 0;
6957                         gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
6958                         gfx_v9_0_set_user_cu_inactive_bitmap(
6959                                 adev, disable_masks[i * adev->gfx.config.max_sh_per_se + j]);
6960                         bitmap = gfx_v9_0_get_cu_active_bitmap(adev);
6961
6962                         /*
6963                          * The bitmap(and ao_cu_bitmap) in cu_info structure is
6964                          * 4x4 size array, and it's usually suitable for Vega
6965                          * ASICs which has 4*2 SE/SH layout.
6966                          * But for Arcturus, SE/SH layout is changed to 8*1.
6967                          * To mostly reduce the impact, we make it compatible
6968                          * with current bitmap array as below:
6969                          *    SE4,SH0 --> bitmap[0][1]
6970                          *    SE5,SH0 --> bitmap[1][1]
6971                          *    SE6,SH0 --> bitmap[2][1]
6972                          *    SE7,SH0 --> bitmap[3][1]
6973                          */
6974                         cu_info->bitmap[i % 4][j + i / 4] = bitmap;
6975
6976                         for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
6977                                 if (bitmap & mask) {
6978                                         if (counter < adev->gfx.config.max_cu_per_sh)
6979                                                 ao_bitmap |= mask;
6980                                         counter ++;
6981                                 }
6982                                 mask <<= 1;
6983                         }
6984                         active_cu_number += counter;
6985                         if (i < 2 && j < 2)
6986                                 ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
6987                         cu_info->ao_cu_bitmap[i % 4][j + i / 4] = ao_bitmap;
6988                 }
6989         }
6990         gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
6991         mutex_unlock(&adev->grbm_idx_mutex);
6992
6993         cu_info->number = active_cu_number;
6994         cu_info->ao_cu_mask = ao_cu_mask;
6995         cu_info->simd_per_cu = NUM_SIMD_PER_CU;
6996
6997         return 0;
6998 }
6999
7000 const struct amdgpu_ip_block_version gfx_v9_0_ip_block =
7001 {
7002         .type = AMD_IP_BLOCK_TYPE_GFX,
7003         .major = 9,
7004         .minor = 0,
7005         .rev = 0,
7006         .funcs = &gfx_v9_0_ip_funcs,
7007 };
This page took 0.460251 seconds and 4 git commands to generate.