]> Git Repo - linux.git/blob - drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
Merge tag 'mfd-next-5.5' of git://git.kernel.org/pub/scm/linux/kernel/git/lee/mfd
[linux.git] / drivers / gpu / drm / amd / amdgpu / gfx_v9_0.c
1 /*
2  * Copyright 2016 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23
24 #include <linux/delay.h>
25 #include <linux/kernel.h>
26 #include <linux/firmware.h>
27 #include <linux/module.h>
28 #include <linux/pci.h>
29
30 #include "amdgpu.h"
31 #include "amdgpu_gfx.h"
32 #include "soc15.h"
33 #include "soc15d.h"
34 #include "amdgpu_atomfirmware.h"
35 #include "amdgpu_pm.h"
36
37 #include "gc/gc_9_0_offset.h"
38 #include "gc/gc_9_0_sh_mask.h"
39
40 #include "vega10_enum.h"
41 #include "hdp/hdp_4_0_offset.h"
42
43 #include "soc15_common.h"
44 #include "clearstate_gfx9.h"
45 #include "v9_structs.h"
46
47 #include "ivsrcid/gfx/irqsrcs_gfx_9_0.h"
48
49 #include "amdgpu_ras.h"
50
51 #define GFX9_NUM_GFX_RINGS     1
52 #define GFX9_MEC_HPD_SIZE 4096
53 #define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L
54 #define RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET 0x00000000L
55
56 #define mmPWR_MISC_CNTL_STATUS                                  0x0183
57 #define mmPWR_MISC_CNTL_STATUS_BASE_IDX                         0
58 #define PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN__SHIFT        0x0
59 #define PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT          0x1
60 #define PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK          0x00000001L
61 #define PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK            0x00000006L
62
63 #define mmGCEA_PROBE_MAP                        0x070c
64 #define mmGCEA_PROBE_MAP_BASE_IDX               0
65
66 MODULE_FIRMWARE("amdgpu/vega10_ce.bin");
67 MODULE_FIRMWARE("amdgpu/vega10_pfp.bin");
68 MODULE_FIRMWARE("amdgpu/vega10_me.bin");
69 MODULE_FIRMWARE("amdgpu/vega10_mec.bin");
70 MODULE_FIRMWARE("amdgpu/vega10_mec2.bin");
71 MODULE_FIRMWARE("amdgpu/vega10_rlc.bin");
72
73 MODULE_FIRMWARE("amdgpu/vega12_ce.bin");
74 MODULE_FIRMWARE("amdgpu/vega12_pfp.bin");
75 MODULE_FIRMWARE("amdgpu/vega12_me.bin");
76 MODULE_FIRMWARE("amdgpu/vega12_mec.bin");
77 MODULE_FIRMWARE("amdgpu/vega12_mec2.bin");
78 MODULE_FIRMWARE("amdgpu/vega12_rlc.bin");
79
80 MODULE_FIRMWARE("amdgpu/vega20_ce.bin");
81 MODULE_FIRMWARE("amdgpu/vega20_pfp.bin");
82 MODULE_FIRMWARE("amdgpu/vega20_me.bin");
83 MODULE_FIRMWARE("amdgpu/vega20_mec.bin");
84 MODULE_FIRMWARE("amdgpu/vega20_mec2.bin");
85 MODULE_FIRMWARE("amdgpu/vega20_rlc.bin");
86
87 MODULE_FIRMWARE("amdgpu/raven_ce.bin");
88 MODULE_FIRMWARE("amdgpu/raven_pfp.bin");
89 MODULE_FIRMWARE("amdgpu/raven_me.bin");
90 MODULE_FIRMWARE("amdgpu/raven_mec.bin");
91 MODULE_FIRMWARE("amdgpu/raven_mec2.bin");
92 MODULE_FIRMWARE("amdgpu/raven_rlc.bin");
93
94 MODULE_FIRMWARE("amdgpu/picasso_ce.bin");
95 MODULE_FIRMWARE("amdgpu/picasso_pfp.bin");
96 MODULE_FIRMWARE("amdgpu/picasso_me.bin");
97 MODULE_FIRMWARE("amdgpu/picasso_mec.bin");
98 MODULE_FIRMWARE("amdgpu/picasso_mec2.bin");
99 MODULE_FIRMWARE("amdgpu/picasso_rlc.bin");
100 MODULE_FIRMWARE("amdgpu/picasso_rlc_am4.bin");
101
102 MODULE_FIRMWARE("amdgpu/raven2_ce.bin");
103 MODULE_FIRMWARE("amdgpu/raven2_pfp.bin");
104 MODULE_FIRMWARE("amdgpu/raven2_me.bin");
105 MODULE_FIRMWARE("amdgpu/raven2_mec.bin");
106 MODULE_FIRMWARE("amdgpu/raven2_mec2.bin");
107 MODULE_FIRMWARE("amdgpu/raven2_rlc.bin");
108 MODULE_FIRMWARE("amdgpu/raven_kicker_rlc.bin");
109
110 MODULE_FIRMWARE("amdgpu/arcturus_mec.bin");
111 MODULE_FIRMWARE("amdgpu/arcturus_mec2.bin");
112 MODULE_FIRMWARE("amdgpu/arcturus_rlc.bin");
113
114 MODULE_FIRMWARE("amdgpu/renoir_ce.bin");
115 MODULE_FIRMWARE("amdgpu/renoir_pfp.bin");
116 MODULE_FIRMWARE("amdgpu/renoir_me.bin");
117 MODULE_FIRMWARE("amdgpu/renoir_mec.bin");
118 MODULE_FIRMWARE("amdgpu/renoir_mec2.bin");
119 MODULE_FIRMWARE("amdgpu/renoir_rlc.bin");
120
121 #define mmTCP_CHAN_STEER_0_ARCT                                                         0x0b03
122 #define mmTCP_CHAN_STEER_0_ARCT_BASE_IDX                                                        0
123 #define mmTCP_CHAN_STEER_1_ARCT                                                         0x0b04
124 #define mmTCP_CHAN_STEER_1_ARCT_BASE_IDX                                                        0
125 #define mmTCP_CHAN_STEER_2_ARCT                                                         0x0b09
126 #define mmTCP_CHAN_STEER_2_ARCT_BASE_IDX                                                        0
127 #define mmTCP_CHAN_STEER_3_ARCT                                                         0x0b0a
128 #define mmTCP_CHAN_STEER_3_ARCT_BASE_IDX                                                        0
129 #define mmTCP_CHAN_STEER_4_ARCT                                                         0x0b0b
130 #define mmTCP_CHAN_STEER_4_ARCT_BASE_IDX                                                        0
131 #define mmTCP_CHAN_STEER_5_ARCT                                                         0x0b0c
132 #define mmTCP_CHAN_STEER_5_ARCT_BASE_IDX                                                        0
133
134 struct ras_gfx_subblock_reg {
135         const char *name;
136         uint32_t hwip;
137         uint32_t inst;
138         uint32_t seg;
139         uint32_t reg_offset;
140         uint32_t sec_count_mask;
141         uint32_t sec_count_shift;
142         uint32_t ded_count_mask;
143         uint32_t ded_count_shift;
144 };
145
146 enum ta_ras_gfx_subblock {
147         /*CPC*/
148         TA_RAS_BLOCK__GFX_CPC_INDEX_START = 0,
149         TA_RAS_BLOCK__GFX_CPC_SCRATCH = TA_RAS_BLOCK__GFX_CPC_INDEX_START,
150         TA_RAS_BLOCK__GFX_CPC_UCODE,
151         TA_RAS_BLOCK__GFX_DC_STATE_ME1,
152         TA_RAS_BLOCK__GFX_DC_CSINVOC_ME1,
153         TA_RAS_BLOCK__GFX_DC_RESTORE_ME1,
154         TA_RAS_BLOCK__GFX_DC_STATE_ME2,
155         TA_RAS_BLOCK__GFX_DC_CSINVOC_ME2,
156         TA_RAS_BLOCK__GFX_DC_RESTORE_ME2,
157         TA_RAS_BLOCK__GFX_CPC_INDEX_END = TA_RAS_BLOCK__GFX_DC_RESTORE_ME2,
158         /* CPF*/
159         TA_RAS_BLOCK__GFX_CPF_INDEX_START,
160         TA_RAS_BLOCK__GFX_CPF_ROQ_ME2 = TA_RAS_BLOCK__GFX_CPF_INDEX_START,
161         TA_RAS_BLOCK__GFX_CPF_ROQ_ME1,
162         TA_RAS_BLOCK__GFX_CPF_TAG,
163         TA_RAS_BLOCK__GFX_CPF_INDEX_END = TA_RAS_BLOCK__GFX_CPF_TAG,
164         /* CPG*/
165         TA_RAS_BLOCK__GFX_CPG_INDEX_START,
166         TA_RAS_BLOCK__GFX_CPG_DMA_ROQ = TA_RAS_BLOCK__GFX_CPG_INDEX_START,
167         TA_RAS_BLOCK__GFX_CPG_DMA_TAG,
168         TA_RAS_BLOCK__GFX_CPG_TAG,
169         TA_RAS_BLOCK__GFX_CPG_INDEX_END = TA_RAS_BLOCK__GFX_CPG_TAG,
170         /* GDS*/
171         TA_RAS_BLOCK__GFX_GDS_INDEX_START,
172         TA_RAS_BLOCK__GFX_GDS_MEM = TA_RAS_BLOCK__GFX_GDS_INDEX_START,
173         TA_RAS_BLOCK__GFX_GDS_INPUT_QUEUE,
174         TA_RAS_BLOCK__GFX_GDS_OA_PHY_CMD_RAM_MEM,
175         TA_RAS_BLOCK__GFX_GDS_OA_PHY_DATA_RAM_MEM,
176         TA_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM,
177         TA_RAS_BLOCK__GFX_GDS_INDEX_END = TA_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM,
178         /* SPI*/
179         TA_RAS_BLOCK__GFX_SPI_SR_MEM,
180         /* SQ*/
181         TA_RAS_BLOCK__GFX_SQ_INDEX_START,
182         TA_RAS_BLOCK__GFX_SQ_SGPR = TA_RAS_BLOCK__GFX_SQ_INDEX_START,
183         TA_RAS_BLOCK__GFX_SQ_LDS_D,
184         TA_RAS_BLOCK__GFX_SQ_LDS_I,
185         TA_RAS_BLOCK__GFX_SQ_VGPR, /* VGPR = SP*/
186         TA_RAS_BLOCK__GFX_SQ_INDEX_END = TA_RAS_BLOCK__GFX_SQ_VGPR,
187         /* SQC (3 ranges)*/
188         TA_RAS_BLOCK__GFX_SQC_INDEX_START,
189         /* SQC range 0*/
190         TA_RAS_BLOCK__GFX_SQC_INDEX0_START = TA_RAS_BLOCK__GFX_SQC_INDEX_START,
191         TA_RAS_BLOCK__GFX_SQC_INST_UTCL1_LFIFO =
192                 TA_RAS_BLOCK__GFX_SQC_INDEX0_START,
193         TA_RAS_BLOCK__GFX_SQC_DATA_CU0_WRITE_DATA_BUF,
194         TA_RAS_BLOCK__GFX_SQC_DATA_CU0_UTCL1_LFIFO,
195         TA_RAS_BLOCK__GFX_SQC_DATA_CU1_WRITE_DATA_BUF,
196         TA_RAS_BLOCK__GFX_SQC_DATA_CU1_UTCL1_LFIFO,
197         TA_RAS_BLOCK__GFX_SQC_DATA_CU2_WRITE_DATA_BUF,
198         TA_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO,
199         TA_RAS_BLOCK__GFX_SQC_INDEX0_END =
200                 TA_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO,
201         /* SQC range 1*/
202         TA_RAS_BLOCK__GFX_SQC_INDEX1_START,
203         TA_RAS_BLOCK__GFX_SQC_INST_BANKA_TAG_RAM =
204                 TA_RAS_BLOCK__GFX_SQC_INDEX1_START,
205         TA_RAS_BLOCK__GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO,
206         TA_RAS_BLOCK__GFX_SQC_INST_BANKA_MISS_FIFO,
207         TA_RAS_BLOCK__GFX_SQC_INST_BANKA_BANK_RAM,
208         TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_TAG_RAM,
209         TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_HIT_FIFO,
210         TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_MISS_FIFO,
211         TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM,
212         TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM,
213         TA_RAS_BLOCK__GFX_SQC_INDEX1_END =
214                 TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM,
215         /* SQC range 2*/
216         TA_RAS_BLOCK__GFX_SQC_INDEX2_START,
217         TA_RAS_BLOCK__GFX_SQC_INST_BANKB_TAG_RAM =
218                 TA_RAS_BLOCK__GFX_SQC_INDEX2_START,
219         TA_RAS_BLOCK__GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO,
220         TA_RAS_BLOCK__GFX_SQC_INST_BANKB_MISS_FIFO,
221         TA_RAS_BLOCK__GFX_SQC_INST_BANKB_BANK_RAM,
222         TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_TAG_RAM,
223         TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_HIT_FIFO,
224         TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_MISS_FIFO,
225         TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM,
226         TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM,
227         TA_RAS_BLOCK__GFX_SQC_INDEX2_END =
228                 TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM,
229         TA_RAS_BLOCK__GFX_SQC_INDEX_END = TA_RAS_BLOCK__GFX_SQC_INDEX2_END,
230         /* TA*/
231         TA_RAS_BLOCK__GFX_TA_INDEX_START,
232         TA_RAS_BLOCK__GFX_TA_FS_DFIFO = TA_RAS_BLOCK__GFX_TA_INDEX_START,
233         TA_RAS_BLOCK__GFX_TA_FS_AFIFO,
234         TA_RAS_BLOCK__GFX_TA_FL_LFIFO,
235         TA_RAS_BLOCK__GFX_TA_FX_LFIFO,
236         TA_RAS_BLOCK__GFX_TA_FS_CFIFO,
237         TA_RAS_BLOCK__GFX_TA_INDEX_END = TA_RAS_BLOCK__GFX_TA_FS_CFIFO,
238         /* TCA*/
239         TA_RAS_BLOCK__GFX_TCA_INDEX_START,
240         TA_RAS_BLOCK__GFX_TCA_HOLE_FIFO = TA_RAS_BLOCK__GFX_TCA_INDEX_START,
241         TA_RAS_BLOCK__GFX_TCA_REQ_FIFO,
242         TA_RAS_BLOCK__GFX_TCA_INDEX_END = TA_RAS_BLOCK__GFX_TCA_REQ_FIFO,
243         /* TCC (5 sub-ranges)*/
244         TA_RAS_BLOCK__GFX_TCC_INDEX_START,
245         /* TCC range 0*/
246         TA_RAS_BLOCK__GFX_TCC_INDEX0_START = TA_RAS_BLOCK__GFX_TCC_INDEX_START,
247         TA_RAS_BLOCK__GFX_TCC_CACHE_DATA = TA_RAS_BLOCK__GFX_TCC_INDEX0_START,
248         TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_0_1,
249         TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_0,
250         TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_1,
251         TA_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_0,
252         TA_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_1,
253         TA_RAS_BLOCK__GFX_TCC_HIGH_RATE_TAG,
254         TA_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG,
255         TA_RAS_BLOCK__GFX_TCC_INDEX0_END = TA_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG,
256         /* TCC range 1*/
257         TA_RAS_BLOCK__GFX_TCC_INDEX1_START,
258         TA_RAS_BLOCK__GFX_TCC_IN_USE_DEC = TA_RAS_BLOCK__GFX_TCC_INDEX1_START,
259         TA_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER,
260         TA_RAS_BLOCK__GFX_TCC_INDEX1_END =
261                 TA_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER,
262         /* TCC range 2*/
263         TA_RAS_BLOCK__GFX_TCC_INDEX2_START,
264         TA_RAS_BLOCK__GFX_TCC_RETURN_DATA = TA_RAS_BLOCK__GFX_TCC_INDEX2_START,
265         TA_RAS_BLOCK__GFX_TCC_RETURN_CONTROL,
266         TA_RAS_BLOCK__GFX_TCC_UC_ATOMIC_FIFO,
267         TA_RAS_BLOCK__GFX_TCC_WRITE_RETURN,
268         TA_RAS_BLOCK__GFX_TCC_WRITE_CACHE_READ,
269         TA_RAS_BLOCK__GFX_TCC_SRC_FIFO,
270         TA_RAS_BLOCK__GFX_TCC_SRC_FIFO_NEXT_RAM,
271         TA_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO,
272         TA_RAS_BLOCK__GFX_TCC_INDEX2_END =
273                 TA_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO,
274         /* TCC range 3*/
275         TA_RAS_BLOCK__GFX_TCC_INDEX3_START,
276         TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO = TA_RAS_BLOCK__GFX_TCC_INDEX3_START,
277         TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM,
278         TA_RAS_BLOCK__GFX_TCC_INDEX3_END =
279                 TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM,
280         /* TCC range 4*/
281         TA_RAS_BLOCK__GFX_TCC_INDEX4_START,
282         TA_RAS_BLOCK__GFX_TCC_WRRET_TAG_WRITE_RETURN =
283                 TA_RAS_BLOCK__GFX_TCC_INDEX4_START,
284         TA_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER,
285         TA_RAS_BLOCK__GFX_TCC_INDEX4_END =
286                 TA_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER,
287         TA_RAS_BLOCK__GFX_TCC_INDEX_END = TA_RAS_BLOCK__GFX_TCC_INDEX4_END,
288         /* TCI*/
289         TA_RAS_BLOCK__GFX_TCI_WRITE_RAM,
290         /* TCP*/
291         TA_RAS_BLOCK__GFX_TCP_INDEX_START,
292         TA_RAS_BLOCK__GFX_TCP_CACHE_RAM = TA_RAS_BLOCK__GFX_TCP_INDEX_START,
293         TA_RAS_BLOCK__GFX_TCP_LFIFO_RAM,
294         TA_RAS_BLOCK__GFX_TCP_CMD_FIFO,
295         TA_RAS_BLOCK__GFX_TCP_VM_FIFO,
296         TA_RAS_BLOCK__GFX_TCP_DB_RAM,
297         TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO0,
298         TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1,
299         TA_RAS_BLOCK__GFX_TCP_INDEX_END = TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1,
300         /* TD*/
301         TA_RAS_BLOCK__GFX_TD_INDEX_START,
302         TA_RAS_BLOCK__GFX_TD_SS_FIFO_LO = TA_RAS_BLOCK__GFX_TD_INDEX_START,
303         TA_RAS_BLOCK__GFX_TD_SS_FIFO_HI,
304         TA_RAS_BLOCK__GFX_TD_CS_FIFO,
305         TA_RAS_BLOCK__GFX_TD_INDEX_END = TA_RAS_BLOCK__GFX_TD_CS_FIFO,
306         /* EA (3 sub-ranges)*/
307         TA_RAS_BLOCK__GFX_EA_INDEX_START,
308         /* EA range 0*/
309         TA_RAS_BLOCK__GFX_EA_INDEX0_START = TA_RAS_BLOCK__GFX_EA_INDEX_START,
310         TA_RAS_BLOCK__GFX_EA_DRAMRD_CMDMEM = TA_RAS_BLOCK__GFX_EA_INDEX0_START,
311         TA_RAS_BLOCK__GFX_EA_DRAMWR_CMDMEM,
312         TA_RAS_BLOCK__GFX_EA_DRAMWR_DATAMEM,
313         TA_RAS_BLOCK__GFX_EA_RRET_TAGMEM,
314         TA_RAS_BLOCK__GFX_EA_WRET_TAGMEM,
315         TA_RAS_BLOCK__GFX_EA_GMIRD_CMDMEM,
316         TA_RAS_BLOCK__GFX_EA_GMIWR_CMDMEM,
317         TA_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM,
318         TA_RAS_BLOCK__GFX_EA_INDEX0_END = TA_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM,
319         /* EA range 1*/
320         TA_RAS_BLOCK__GFX_EA_INDEX1_START,
321         TA_RAS_BLOCK__GFX_EA_DRAMRD_PAGEMEM = TA_RAS_BLOCK__GFX_EA_INDEX1_START,
322         TA_RAS_BLOCK__GFX_EA_DRAMWR_PAGEMEM,
323         TA_RAS_BLOCK__GFX_EA_IORD_CMDMEM,
324         TA_RAS_BLOCK__GFX_EA_IOWR_CMDMEM,
325         TA_RAS_BLOCK__GFX_EA_IOWR_DATAMEM,
326         TA_RAS_BLOCK__GFX_EA_GMIRD_PAGEMEM,
327         TA_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM,
328         TA_RAS_BLOCK__GFX_EA_INDEX1_END = TA_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM,
329         /* EA range 2*/
330         TA_RAS_BLOCK__GFX_EA_INDEX2_START,
331         TA_RAS_BLOCK__GFX_EA_MAM_D0MEM = TA_RAS_BLOCK__GFX_EA_INDEX2_START,
332         TA_RAS_BLOCK__GFX_EA_MAM_D1MEM,
333         TA_RAS_BLOCK__GFX_EA_MAM_D2MEM,
334         TA_RAS_BLOCK__GFX_EA_MAM_D3MEM,
335         TA_RAS_BLOCK__GFX_EA_INDEX2_END = TA_RAS_BLOCK__GFX_EA_MAM_D3MEM,
336         TA_RAS_BLOCK__GFX_EA_INDEX_END = TA_RAS_BLOCK__GFX_EA_INDEX2_END,
337         /* UTC VM L2 bank*/
338         TA_RAS_BLOCK__UTC_VML2_BANK_CACHE,
339         /* UTC VM walker*/
340         TA_RAS_BLOCK__UTC_VML2_WALKER,
341         /* UTC ATC L2 2MB cache*/
342         TA_RAS_BLOCK__UTC_ATCL2_CACHE_2M_BANK,
343         /* UTC ATC L2 4KB cache*/
344         TA_RAS_BLOCK__UTC_ATCL2_CACHE_4K_BANK,
345         TA_RAS_BLOCK__GFX_MAX
346 };
347
348 struct ras_gfx_subblock {
349         unsigned char *name;
350         int ta_subblock;
351         int hw_supported_error_type;
352         int sw_supported_error_type;
353 };
354
355 #define AMDGPU_RAS_SUB_BLOCK(subblock, a, b, c, d, e, f, g, h)                             \
356         [AMDGPU_RAS_BLOCK__##subblock] = {                                     \
357                 #subblock,                                                     \
358                 TA_RAS_BLOCK__##subblock,                                      \
359                 ((a) | ((b) << 1) | ((c) << 2) | ((d) << 3)),                  \
360                 (((e) << 1) | ((f) << 3) | (g) | ((h) << 2)),                  \
361         }
362
363 static const struct ras_gfx_subblock ras_gfx_subblocks[] = {
364         AMDGPU_RAS_SUB_BLOCK(GFX_CPC_SCRATCH, 0, 1, 1, 1, 1, 0, 0, 1),
365         AMDGPU_RAS_SUB_BLOCK(GFX_CPC_UCODE, 0, 1, 1, 1, 1, 0, 0, 1),
366         AMDGPU_RAS_SUB_BLOCK(GFX_DC_STATE_ME1, 1, 0, 0, 1, 0, 0, 1, 0),
367         AMDGPU_RAS_SUB_BLOCK(GFX_DC_CSINVOC_ME1, 1, 0, 0, 1, 0, 0, 0, 0),
368         AMDGPU_RAS_SUB_BLOCK(GFX_DC_RESTORE_ME1, 1, 0, 0, 1, 0, 0, 0, 0),
369         AMDGPU_RAS_SUB_BLOCK(GFX_DC_STATE_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
370         AMDGPU_RAS_SUB_BLOCK(GFX_DC_CSINVOC_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
371         AMDGPU_RAS_SUB_BLOCK(GFX_DC_RESTORE_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
372         AMDGPU_RAS_SUB_BLOCK(GFX_CPF_ROQ_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
373         AMDGPU_RAS_SUB_BLOCK(GFX_CPF_ROQ_ME1, 1, 0, 0, 1, 0, 0, 1, 0),
374         AMDGPU_RAS_SUB_BLOCK(GFX_CPF_TAG, 0, 1, 1, 1, 1, 0, 0, 1),
375         AMDGPU_RAS_SUB_BLOCK(GFX_CPG_DMA_ROQ, 1, 0, 0, 1, 0, 0, 1, 0),
376         AMDGPU_RAS_SUB_BLOCK(GFX_CPG_DMA_TAG, 0, 1, 1, 1, 0, 1, 0, 1),
377         AMDGPU_RAS_SUB_BLOCK(GFX_CPG_TAG, 0, 1, 1, 1, 1, 1, 0, 1),
378         AMDGPU_RAS_SUB_BLOCK(GFX_GDS_MEM, 0, 1, 1, 1, 0, 0, 0, 0),
379         AMDGPU_RAS_SUB_BLOCK(GFX_GDS_INPUT_QUEUE, 1, 0, 0, 1, 0, 0, 0, 0),
380         AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PHY_CMD_RAM_MEM, 0, 1, 1, 1, 0, 0, 0,
381                              0),
382         AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PHY_DATA_RAM_MEM, 1, 0, 0, 1, 0, 0, 0,
383                              0),
384         AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PIPE_MEM, 0, 1, 1, 1, 0, 0, 0, 0),
385         AMDGPU_RAS_SUB_BLOCK(GFX_SPI_SR_MEM, 1, 0, 0, 1, 0, 0, 0, 0),
386         AMDGPU_RAS_SUB_BLOCK(GFX_SQ_SGPR, 0, 1, 1, 1, 0, 0, 0, 0),
387         AMDGPU_RAS_SUB_BLOCK(GFX_SQ_LDS_D, 0, 1, 1, 1, 1, 0, 0, 1),
388         AMDGPU_RAS_SUB_BLOCK(GFX_SQ_LDS_I, 0, 1, 1, 1, 0, 0, 0, 0),
389         AMDGPU_RAS_SUB_BLOCK(GFX_SQ_VGPR, 0, 1, 1, 1, 0, 0, 0, 0),
390         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0, 1),
391         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU0_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0,
392                              0, 0),
393         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU0_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0,
394                              0),
395         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU1_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0,
396                              0, 0),
397         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU1_UTCL1_LFIFO, 0, 1, 1, 1, 1, 0, 0,
398                              0),
399         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU2_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0,
400                              0, 0),
401         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU2_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0,
402                              0),
403         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_TAG_RAM, 0, 1, 1, 1, 1, 0, 0,
404                              1),
405         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO, 1, 0, 0, 1, 0,
406                              0, 0, 0),
407         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
408                              0),
409         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
410                              0),
411         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_TAG_RAM, 0, 1, 1, 1, 0, 0, 0,
412                              0),
413         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_HIT_FIFO, 1, 0, 0, 1, 0, 0, 0,
414                              0),
415         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
416                              0),
417         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM, 1, 0, 0, 1, 0, 0,
418                              0, 0),
419         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
420                              0),
421         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_TAG_RAM, 0, 1, 1, 1, 1, 0, 0,
422                              0),
423         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO, 1, 0, 0, 1, 0,
424                              0, 0, 0),
425         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
426                              0),
427         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
428                              0),
429         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_TAG_RAM, 0, 1, 1, 1, 0, 0, 0,
430                              0),
431         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_HIT_FIFO, 1, 0, 0, 1, 0, 0, 0,
432                              0),
433         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
434                              0),
435         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM, 1, 0, 0, 1, 0, 0,
436                              0, 0),
437         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
438                              0),
439         AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_DFIFO, 0, 1, 1, 1, 1, 0, 0, 1),
440         AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_AFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
441         AMDGPU_RAS_SUB_BLOCK(GFX_TA_FL_LFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
442         AMDGPU_RAS_SUB_BLOCK(GFX_TA_FX_LFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
443         AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_CFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
444         AMDGPU_RAS_SUB_BLOCK(GFX_TCA_HOLE_FIFO, 1, 0, 0, 1, 0, 1, 1, 0),
445         AMDGPU_RAS_SUB_BLOCK(GFX_TCA_REQ_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
446         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA, 0, 1, 1, 1, 1, 0, 0, 1),
447         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_0_1, 0, 1, 1, 1, 1, 0, 0,
448                              1),
449         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_1_0, 0, 1, 1, 1, 1, 0, 0,
450                              1),
451         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_1_1, 0, 1, 1, 1, 1, 0, 0,
452                              1),
453         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DIRTY_BANK_0, 0, 1, 1, 1, 0, 0, 0,
454                              0),
455         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DIRTY_BANK_1, 0, 1, 1, 1, 0, 0, 0,
456                              0),
457         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_HIGH_RATE_TAG, 0, 1, 1, 1, 0, 0, 0, 0),
458         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LOW_RATE_TAG, 0, 1, 1, 1, 0, 0, 0, 0),
459         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_IN_USE_DEC, 1, 0, 0, 1, 0, 0, 0, 0),
460         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_IN_USE_TRANSFER, 1, 0, 0, 1, 0, 0, 0, 0),
461         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_RETURN_DATA, 1, 0, 0, 1, 0, 0, 0, 0),
462         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_RETURN_CONTROL, 1, 0, 0, 1, 0, 0, 0, 0),
463         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_UC_ATOMIC_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
464         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRITE_RETURN, 1, 0, 0, 1, 0, 1, 1, 0),
465         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRITE_CACHE_READ, 1, 0, 0, 1, 0, 0, 0, 0),
466         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_SRC_FIFO, 0, 1, 1, 1, 0, 0, 0, 0),
467         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_SRC_FIFO_NEXT_RAM, 1, 0, 0, 1, 0, 0, 1, 0),
468         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_TAG_PROBE_FIFO, 1, 0, 0, 1, 0, 0, 0,
469                              0),
470         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LATENCY_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
471         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LATENCY_FIFO_NEXT_RAM, 1, 0, 0, 1, 0, 0, 0,
472                              0),
473         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRRET_TAG_WRITE_RETURN, 1, 0, 0, 1, 0, 0,
474                              0, 0),
475         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_ATOMIC_RETURN_BUFFER, 1, 0, 0, 1, 0, 0, 0,
476                              0),
477         AMDGPU_RAS_SUB_BLOCK(GFX_TCI_WRITE_RAM, 1, 0, 0, 1, 0, 0, 0, 0),
478         AMDGPU_RAS_SUB_BLOCK(GFX_TCP_CACHE_RAM, 0, 1, 1, 1, 1, 0, 0, 1),
479         AMDGPU_RAS_SUB_BLOCK(GFX_TCP_LFIFO_RAM, 0, 1, 1, 1, 0, 0, 0, 0),
480         AMDGPU_RAS_SUB_BLOCK(GFX_TCP_CMD_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
481         AMDGPU_RAS_SUB_BLOCK(GFX_TCP_VM_FIFO, 0, 1, 1, 1, 0, 0, 0, 0),
482         AMDGPU_RAS_SUB_BLOCK(GFX_TCP_DB_RAM, 1, 0, 0, 1, 0, 0, 0, 0),
483         AMDGPU_RAS_SUB_BLOCK(GFX_TCP_UTCL1_LFIFO0, 0, 1, 1, 1, 0, 0, 0, 0),
484         AMDGPU_RAS_SUB_BLOCK(GFX_TCP_UTCL1_LFIFO1, 0, 1, 1, 1, 0, 0, 0, 0),
485         AMDGPU_RAS_SUB_BLOCK(GFX_TD_SS_FIFO_LO, 0, 1, 1, 1, 1, 0, 0, 1),
486         AMDGPU_RAS_SUB_BLOCK(GFX_TD_SS_FIFO_HI, 0, 1, 1, 1, 0, 0, 0, 0),
487         AMDGPU_RAS_SUB_BLOCK(GFX_TD_CS_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
488         AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMRD_CMDMEM, 0, 1, 1, 1, 1, 0, 0, 1),
489         AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0),
490         AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_DATAMEM, 0, 1, 1, 1, 0, 0, 0, 0),
491         AMDGPU_RAS_SUB_BLOCK(GFX_EA_RRET_TAGMEM, 0, 1, 1, 1, 0, 0, 0, 0),
492         AMDGPU_RAS_SUB_BLOCK(GFX_EA_WRET_TAGMEM, 0, 1, 1, 1, 0, 0, 0, 0),
493         AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIRD_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0),
494         AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0),
495         AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_DATAMEM, 0, 1, 1, 1, 0, 0, 0, 0),
496         AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMRD_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
497         AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
498         AMDGPU_RAS_SUB_BLOCK(GFX_EA_IORD_CMDMEM, 1, 0, 0, 1, 0, 0, 0, 0),
499         AMDGPU_RAS_SUB_BLOCK(GFX_EA_IOWR_CMDMEM, 1, 0, 0, 1, 0, 0, 0, 0),
500         AMDGPU_RAS_SUB_BLOCK(GFX_EA_IOWR_DATAMEM, 1, 0, 0, 1, 0, 0, 0, 0),
501         AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIRD_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
502         AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
503         AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D0MEM, 1, 0, 0, 1, 0, 0, 0, 0),
504         AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D1MEM, 1, 0, 0, 1, 0, 0, 0, 0),
505         AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D2MEM, 1, 0, 0, 1, 0, 0, 0, 0),
506         AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D3MEM, 1, 0, 0, 1, 0, 0, 0, 0),
507         AMDGPU_RAS_SUB_BLOCK(UTC_VML2_BANK_CACHE, 0, 1, 1, 1, 0, 0, 0, 0),
508         AMDGPU_RAS_SUB_BLOCK(UTC_VML2_WALKER, 0, 1, 1, 1, 0, 0, 0, 0),
509         AMDGPU_RAS_SUB_BLOCK(UTC_ATCL2_CACHE_2M_BANK, 1, 0, 0, 1, 0, 0, 0, 0),
510         AMDGPU_RAS_SUB_BLOCK(UTC_ATCL2_CACHE_4K_BANK, 0, 1, 1, 1, 0, 0, 0, 0),
511 };
512
513 static const struct soc15_reg_golden golden_settings_gc_9_0[] =
514 {
515         SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000400),
516         SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0x80000000, 0x80000000),
517         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
518         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
519         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
520         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
521         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000),
522         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800),
523         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800),
524         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x0000ff87),
525         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x0000ff8f),
526         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000),
527         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
528         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68),
529         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197),
530         SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
531         SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff),
532         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800),
533         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800),
534         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000)
535 };
536
537 static const struct soc15_reg_golden golden_settings_gc_9_0_vg10[] =
538 {
539         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0x0000f000, 0x00012107),
540         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
541         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080),
542         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080),
543         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080),
544         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042),
545         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x2a114042),
546         SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080),
547         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0x00008000, 0x00048000),
548         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080),
549         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080),
550         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080),
551         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080),
552         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080),
553         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000),
554         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x01000107),
555         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x00001800, 0x00000800),
556         SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080)
557 };
558
559 static const struct soc15_reg_golden golden_settings_gc_9_0_vg20[] =
560 {
561         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x0f000080, 0x04000080),
562         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000),
563         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
564         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xf3e777ff, 0x22014042),
565         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xf3e777ff, 0x22014042),
566         SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0x00003e00, 0x00000400),
567         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xff840000, 0x04040000),
568         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00030000),
569         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff010f, 0x01000107),
570         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0x000b0000, 0x000b0000),
571         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01000000, 0x01000000)
572 };
573
574 static const struct soc15_reg_golden golden_settings_gc_9_1[] =
575 {
576         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
577         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080),
578         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080),
579         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080),
580         SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420),
581         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
582         SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080),
583         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
584         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
585         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
586         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080),
587         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080),
588         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080),
589         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080),
590         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080),
591         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
592         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
593         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003120),
594         SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
595         SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000000ff),
596         SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080),
597         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800),
598         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800),
599         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000)
600 };
601
602 static const struct soc15_reg_golden golden_settings_gc_9_1_rv1[] =
603 {
604         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
605         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24000042),
606         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24000042),
607         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04048000),
608         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_MODE_CNTL_1, 0x06000000, 0x06000000),
609         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000),
610         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x00000800)
611 };
612
613 static const struct soc15_reg_golden golden_settings_gc_9_1_rv2[] =
614 {
615         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0xff7fffff, 0x04000000),
616         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
617         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0xff7fffff, 0x0a000000),
618         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x7f0fffff, 0x08000080),
619         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0xff8fffff, 0x08000080),
620         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x7f8fffff, 0x08000080),
621         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x26013041),
622         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x26013041),
623         SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x3f8fffff, 0x08000080),
624         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
625         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0xff0fffff, 0x08000080),
626         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0xff0fffff, 0x08000080),
627         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0xff0fffff, 0x08000080),
628         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0xff0fffff, 0x08000080),
629         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0xff0fffff, 0x08000080),
630         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
631         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010),
632         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000),
633         SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x3f8fffff, 0x08000080),
634 };
635
636 static const struct soc15_reg_golden golden_settings_gc_9_1_rn[] =
637 {
638         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
639         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0xff7fffff, 0x0a000000),
640         SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000400),
641         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xf3e777ff, 0x24000042),
642         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xf3e777ff, 0x24000042),
643         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
644         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
645         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
646         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
647         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
648         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003120),
649         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCEA_PROBE_MAP, 0xffffffff, 0x0000cccc),
650 };
651
652 static const struct soc15_reg_golden golden_settings_gc_9_x_common[] =
653 {
654         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_SD_CNTL, 0xffffffff, 0x000001ff),
655         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_INDEX, 0xffffffff, 0x00000000),
656         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_DATA, 0xffffffff, 0x2544c382)
657 };
658
659 static const struct soc15_reg_golden golden_settings_gc_9_2_1[] =
660 {
661         SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420),
662         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
663         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
664         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
665         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
666         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000),
667         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800),
668         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800),
669         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x0000ff87),
670         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x0000ff8f),
671         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000),
672         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
673         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68),
674         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197),
675         SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
676         SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff)
677 };
678
679 static const struct soc15_reg_golden golden_settings_gc_9_2_1_vg12[] =
680 {
681         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x00000080, 0x04000080),
682         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
683         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000),
684         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24104041),
685         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24104041),
686         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
687         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff03ff, 0x01000107),
688         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
689         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x76325410),
690         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000),
691         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800),
692         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800),
693         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000)
694 };
695
696 static const struct soc15_reg_golden golden_settings_gc_9_4_1_arct[] =
697 {
698         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042),
699         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x10b0000),
700         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_0_ARCT, 0x3fffffff, 0x346f0a4e),
701         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_1_ARCT, 0x3fffffff, 0x1c642ca),
702         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_2_ARCT, 0x3fffffff, 0x26f45098),
703         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_3_ARCT, 0x3fffffff, 0x2ebd9fe3),
704         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_4_ARCT, 0x3fffffff, 0xb90f5b1),
705         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_5_ARCT, 0x3ff, 0x135),
706         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_CONFIG, 0xffffffff, 0x011A0000),
707         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_FIFO_SIZES, 0xffffffff, 0x00000f00),
708 };
709
710 static const u32 GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[] =
711 {
712         mmRLC_SRM_INDEX_CNTL_ADDR_0 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
713         mmRLC_SRM_INDEX_CNTL_ADDR_1 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
714         mmRLC_SRM_INDEX_CNTL_ADDR_2 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
715         mmRLC_SRM_INDEX_CNTL_ADDR_3 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
716         mmRLC_SRM_INDEX_CNTL_ADDR_4 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
717         mmRLC_SRM_INDEX_CNTL_ADDR_5 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
718         mmRLC_SRM_INDEX_CNTL_ADDR_6 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
719         mmRLC_SRM_INDEX_CNTL_ADDR_7 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
720 };
721
722 static const u32 GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[] =
723 {
724         mmRLC_SRM_INDEX_CNTL_DATA_0 - mmRLC_SRM_INDEX_CNTL_DATA_0,
725         mmRLC_SRM_INDEX_CNTL_DATA_1 - mmRLC_SRM_INDEX_CNTL_DATA_0,
726         mmRLC_SRM_INDEX_CNTL_DATA_2 - mmRLC_SRM_INDEX_CNTL_DATA_0,
727         mmRLC_SRM_INDEX_CNTL_DATA_3 - mmRLC_SRM_INDEX_CNTL_DATA_0,
728         mmRLC_SRM_INDEX_CNTL_DATA_4 - mmRLC_SRM_INDEX_CNTL_DATA_0,
729         mmRLC_SRM_INDEX_CNTL_DATA_5 - mmRLC_SRM_INDEX_CNTL_DATA_0,
730         mmRLC_SRM_INDEX_CNTL_DATA_6 - mmRLC_SRM_INDEX_CNTL_DATA_0,
731         mmRLC_SRM_INDEX_CNTL_DATA_7 - mmRLC_SRM_INDEX_CNTL_DATA_0,
732 };
733
734 #define VEGA10_GB_ADDR_CONFIG_GOLDEN 0x2a114042
735 #define VEGA12_GB_ADDR_CONFIG_GOLDEN 0x24104041
736 #define RAVEN_GB_ADDR_CONFIG_GOLDEN 0x24000042
737 #define RAVEN2_GB_ADDR_CONFIG_GOLDEN 0x26013041
738
739 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev);
740 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev);
741 static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev);
742 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev);
743 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
744                                  struct amdgpu_cu_info *cu_info);
745 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev);
746 static void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num, u32 instance);
747 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring);
748 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring);
749 static int gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev,
750                                           void *ras_error_status);
751 static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev,
752                                      void *inject_if);
753
754 static void gfx_v9_0_init_golden_registers(struct amdgpu_device *adev)
755 {
756         switch (adev->asic_type) {
757         case CHIP_VEGA10:
758                 soc15_program_register_sequence(adev,
759                                                 golden_settings_gc_9_0,
760                                                 ARRAY_SIZE(golden_settings_gc_9_0));
761                 soc15_program_register_sequence(adev,
762                                                 golden_settings_gc_9_0_vg10,
763                                                 ARRAY_SIZE(golden_settings_gc_9_0_vg10));
764                 break;
765         case CHIP_VEGA12:
766                 soc15_program_register_sequence(adev,
767                                                 golden_settings_gc_9_2_1,
768                                                 ARRAY_SIZE(golden_settings_gc_9_2_1));
769                 soc15_program_register_sequence(adev,
770                                                 golden_settings_gc_9_2_1_vg12,
771                                                 ARRAY_SIZE(golden_settings_gc_9_2_1_vg12));
772                 break;
773         case CHIP_VEGA20:
774                 soc15_program_register_sequence(adev,
775                                                 golden_settings_gc_9_0,
776                                                 ARRAY_SIZE(golden_settings_gc_9_0));
777                 soc15_program_register_sequence(adev,
778                                                 golden_settings_gc_9_0_vg20,
779                                                 ARRAY_SIZE(golden_settings_gc_9_0_vg20));
780                 break;
781         case CHIP_ARCTURUS:
782                 soc15_program_register_sequence(adev,
783                                                 golden_settings_gc_9_4_1_arct,
784                                                 ARRAY_SIZE(golden_settings_gc_9_4_1_arct));
785                 break;
786         case CHIP_RAVEN:
787                 soc15_program_register_sequence(adev, golden_settings_gc_9_1,
788                                                 ARRAY_SIZE(golden_settings_gc_9_1));
789                 if (adev->rev_id >= 8)
790                         soc15_program_register_sequence(adev,
791                                                         golden_settings_gc_9_1_rv2,
792                                                         ARRAY_SIZE(golden_settings_gc_9_1_rv2));
793                 else
794                         soc15_program_register_sequence(adev,
795                                                         golden_settings_gc_9_1_rv1,
796                                                         ARRAY_SIZE(golden_settings_gc_9_1_rv1));
797                 break;
798          case CHIP_RENOIR:
799                 soc15_program_register_sequence(adev,
800                                                 golden_settings_gc_9_1_rn,
801                                                 ARRAY_SIZE(golden_settings_gc_9_1_rn));
802                 return; /* for renoir, don't need common goldensetting */
803         default:
804                 break;
805         }
806
807         if (adev->asic_type != CHIP_ARCTURUS)
808                 soc15_program_register_sequence(adev, golden_settings_gc_9_x_common,
809                                                 (const u32)ARRAY_SIZE(golden_settings_gc_9_x_common));
810 }
811
812 static void gfx_v9_0_scratch_init(struct amdgpu_device *adev)
813 {
814         adev->gfx.scratch.num_reg = 8;
815         adev->gfx.scratch.reg_base = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0);
816         adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1;
817 }
818
819 static void gfx_v9_0_write_data_to_reg(struct amdgpu_ring *ring, int eng_sel,
820                                        bool wc, uint32_t reg, uint32_t val)
821 {
822         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
823         amdgpu_ring_write(ring, WRITE_DATA_ENGINE_SEL(eng_sel) |
824                                 WRITE_DATA_DST_SEL(0) |
825                                 (wc ? WR_CONFIRM : 0));
826         amdgpu_ring_write(ring, reg);
827         amdgpu_ring_write(ring, 0);
828         amdgpu_ring_write(ring, val);
829 }
830
831 static void gfx_v9_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel,
832                                   int mem_space, int opt, uint32_t addr0,
833                                   uint32_t addr1, uint32_t ref, uint32_t mask,
834                                   uint32_t inv)
835 {
836         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
837         amdgpu_ring_write(ring,
838                                  /* memory (1) or register (0) */
839                                  (WAIT_REG_MEM_MEM_SPACE(mem_space) |
840                                  WAIT_REG_MEM_OPERATION(opt) | /* wait */
841                                  WAIT_REG_MEM_FUNCTION(3) |  /* equal */
842                                  WAIT_REG_MEM_ENGINE(eng_sel)));
843
844         if (mem_space)
845                 BUG_ON(addr0 & 0x3); /* Dword align */
846         amdgpu_ring_write(ring, addr0);
847         amdgpu_ring_write(ring, addr1);
848         amdgpu_ring_write(ring, ref);
849         amdgpu_ring_write(ring, mask);
850         amdgpu_ring_write(ring, inv); /* poll interval */
851 }
852
853 static int gfx_v9_0_ring_test_ring(struct amdgpu_ring *ring)
854 {
855         struct amdgpu_device *adev = ring->adev;
856         uint32_t scratch;
857         uint32_t tmp = 0;
858         unsigned i;
859         int r;
860
861         r = amdgpu_gfx_scratch_get(adev, &scratch);
862         if (r)
863                 return r;
864
865         WREG32(scratch, 0xCAFEDEAD);
866         r = amdgpu_ring_alloc(ring, 3);
867         if (r)
868                 goto error_free_scratch;
869
870         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
871         amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
872         amdgpu_ring_write(ring, 0xDEADBEEF);
873         amdgpu_ring_commit(ring);
874
875         for (i = 0; i < adev->usec_timeout; i++) {
876                 tmp = RREG32(scratch);
877                 if (tmp == 0xDEADBEEF)
878                         break;
879                 udelay(1);
880         }
881
882         if (i >= adev->usec_timeout)
883                 r = -ETIMEDOUT;
884
885 error_free_scratch:
886         amdgpu_gfx_scratch_free(adev, scratch);
887         return r;
888 }
889
890 static int gfx_v9_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
891 {
892         struct amdgpu_device *adev = ring->adev;
893         struct amdgpu_ib ib;
894         struct dma_fence *f = NULL;
895
896         unsigned index;
897         uint64_t gpu_addr;
898         uint32_t tmp;
899         long r;
900
901         r = amdgpu_device_wb_get(adev, &index);
902         if (r)
903                 return r;
904
905         gpu_addr = adev->wb.gpu_addr + (index * 4);
906         adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
907         memset(&ib, 0, sizeof(ib));
908         r = amdgpu_ib_get(adev, NULL, 16, &ib);
909         if (r)
910                 goto err1;
911
912         ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3);
913         ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
914         ib.ptr[2] = lower_32_bits(gpu_addr);
915         ib.ptr[3] = upper_32_bits(gpu_addr);
916         ib.ptr[4] = 0xDEADBEEF;
917         ib.length_dw = 5;
918
919         r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
920         if (r)
921                 goto err2;
922
923         r = dma_fence_wait_timeout(f, false, timeout);
924         if (r == 0) {
925                 r = -ETIMEDOUT;
926                 goto err2;
927         } else if (r < 0) {
928                 goto err2;
929         }
930
931         tmp = adev->wb.wb[index];
932         if (tmp == 0xDEADBEEF)
933                 r = 0;
934         else
935                 r = -EINVAL;
936
937 err2:
938         amdgpu_ib_free(adev, &ib, NULL);
939         dma_fence_put(f);
940 err1:
941         amdgpu_device_wb_free(adev, index);
942         return r;
943 }
944
945
946 static void gfx_v9_0_free_microcode(struct amdgpu_device *adev)
947 {
948         release_firmware(adev->gfx.pfp_fw);
949         adev->gfx.pfp_fw = NULL;
950         release_firmware(adev->gfx.me_fw);
951         adev->gfx.me_fw = NULL;
952         release_firmware(adev->gfx.ce_fw);
953         adev->gfx.ce_fw = NULL;
954         release_firmware(adev->gfx.rlc_fw);
955         adev->gfx.rlc_fw = NULL;
956         release_firmware(adev->gfx.mec_fw);
957         adev->gfx.mec_fw = NULL;
958         release_firmware(adev->gfx.mec2_fw);
959         adev->gfx.mec2_fw = NULL;
960
961         kfree(adev->gfx.rlc.register_list_format);
962 }
963
964 static void gfx_v9_0_init_rlc_ext_microcode(struct amdgpu_device *adev)
965 {
966         const struct rlc_firmware_header_v2_1 *rlc_hdr;
967
968         rlc_hdr = (const struct rlc_firmware_header_v2_1 *)adev->gfx.rlc_fw->data;
969         adev->gfx.rlc_srlc_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_ucode_ver);
970         adev->gfx.rlc_srlc_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_feature_ver);
971         adev->gfx.rlc.save_restore_list_cntl_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_cntl_size_bytes);
972         adev->gfx.rlc.save_restore_list_cntl = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_cntl_offset_bytes);
973         adev->gfx.rlc_srlg_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_ucode_ver);
974         adev->gfx.rlc_srlg_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_feature_ver);
975         adev->gfx.rlc.save_restore_list_gpm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_gpm_size_bytes);
976         adev->gfx.rlc.save_restore_list_gpm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_gpm_offset_bytes);
977         adev->gfx.rlc_srls_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_ucode_ver);
978         adev->gfx.rlc_srls_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_feature_ver);
979         adev->gfx.rlc.save_restore_list_srm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_srm_size_bytes);
980         adev->gfx.rlc.save_restore_list_srm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_srm_offset_bytes);
981         adev->gfx.rlc.reg_list_format_direct_reg_list_length =
982                         le32_to_cpu(rlc_hdr->reg_list_format_direct_reg_list_length);
983 }
984
985 static void gfx_v9_0_check_fw_write_wait(struct amdgpu_device *adev)
986 {
987         adev->gfx.me_fw_write_wait = false;
988         adev->gfx.mec_fw_write_wait = false;
989
990         if ((adev->gfx.mec_fw_version < 0x000001a5) ||
991             (adev->gfx.mec_feature_version < 46) ||
992             (adev->gfx.pfp_fw_version < 0x000000b7) ||
993             (adev->gfx.pfp_feature_version < 46))
994                 DRM_WARN_ONCE("Warning: check cp_fw_version and update it to realize \
995                               GRBM requires 1-cycle delay in cp firmware\n");
996
997         switch (adev->asic_type) {
998         case CHIP_VEGA10:
999                 if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1000                     (adev->gfx.me_feature_version >= 42) &&
1001                     (adev->gfx.pfp_fw_version >=  0x000000b1) &&
1002                     (adev->gfx.pfp_feature_version >= 42))
1003                         adev->gfx.me_fw_write_wait = true;
1004
1005                 if ((adev->gfx.mec_fw_version >=  0x00000193) &&
1006                     (adev->gfx.mec_feature_version >= 42))
1007                         adev->gfx.mec_fw_write_wait = true;
1008                 break;
1009         case CHIP_VEGA12:
1010                 if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1011                     (adev->gfx.me_feature_version >= 44) &&
1012                     (adev->gfx.pfp_fw_version >=  0x000000b2) &&
1013                     (adev->gfx.pfp_feature_version >= 44))
1014                         adev->gfx.me_fw_write_wait = true;
1015
1016                 if ((adev->gfx.mec_fw_version >=  0x00000196) &&
1017                     (adev->gfx.mec_feature_version >= 44))
1018                         adev->gfx.mec_fw_write_wait = true;
1019                 break;
1020         case CHIP_VEGA20:
1021                 if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1022                     (adev->gfx.me_feature_version >= 44) &&
1023                     (adev->gfx.pfp_fw_version >=  0x000000b2) &&
1024                     (adev->gfx.pfp_feature_version >= 44))
1025                         adev->gfx.me_fw_write_wait = true;
1026
1027                 if ((adev->gfx.mec_fw_version >=  0x00000197) &&
1028                     (adev->gfx.mec_feature_version >= 44))
1029                         adev->gfx.mec_fw_write_wait = true;
1030                 break;
1031         case CHIP_RAVEN:
1032                 if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1033                     (adev->gfx.me_feature_version >= 42) &&
1034                     (adev->gfx.pfp_fw_version >=  0x000000b1) &&
1035                     (adev->gfx.pfp_feature_version >= 42))
1036                         adev->gfx.me_fw_write_wait = true;
1037
1038                 if ((adev->gfx.mec_fw_version >=  0x00000192) &&
1039                     (adev->gfx.mec_feature_version >= 42))
1040                         adev->gfx.mec_fw_write_wait = true;
1041                 break;
1042         default:
1043                 break;
1044         }
1045 }
1046
1047 static void gfx_v9_0_check_if_need_gfxoff(struct amdgpu_device *adev)
1048 {
1049         switch (adev->asic_type) {
1050         case CHIP_VEGA10:
1051         case CHIP_VEGA12:
1052         case CHIP_VEGA20:
1053                 break;
1054         case CHIP_RAVEN:
1055                 /* Disable GFXOFF on original raven.  There are combinations
1056                  * of sbios and platforms that are not stable.
1057                  */
1058                 if (!(adev->rev_id >= 0x8 || adev->pdev->device == 0x15d8))
1059                         adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
1060                 else if (!(adev->rev_id >= 0x8 || adev->pdev->device == 0x15d8)
1061                          &&((adev->gfx.rlc_fw_version != 106 &&
1062                              adev->gfx.rlc_fw_version < 531) ||
1063                             (adev->gfx.rlc_fw_version == 53815) ||
1064                             (adev->gfx.rlc_feature_version < 1) ||
1065                             !adev->gfx.rlc.is_rlc_v2_1))
1066                         adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
1067
1068                 if (adev->pm.pp_feature & PP_GFXOFF_MASK)
1069                         adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG |
1070                                 AMD_PG_SUPPORT_CP |
1071                                 AMD_PG_SUPPORT_RLC_SMU_HS;
1072                 break;
1073         case CHIP_RENOIR:
1074                 if (adev->pm.pp_feature & PP_GFXOFF_MASK)
1075                         adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG |
1076                                 AMD_PG_SUPPORT_CP |
1077                                 AMD_PG_SUPPORT_RLC_SMU_HS;
1078                 break;
1079         default:
1080                 break;
1081         }
1082 }
1083
1084 static int gfx_v9_0_init_cp_gfx_microcode(struct amdgpu_device *adev,
1085                                           const char *chip_name)
1086 {
1087         char fw_name[30];
1088         int err;
1089         struct amdgpu_firmware_info *info = NULL;
1090         const struct common_firmware_header *header = NULL;
1091         const struct gfx_firmware_header_v1_0 *cp_hdr;
1092
1093         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
1094         err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
1095         if (err)
1096                 goto out;
1097         err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
1098         if (err)
1099                 goto out;
1100         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
1101         adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1102         adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1103
1104         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
1105         err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
1106         if (err)
1107                 goto out;
1108         err = amdgpu_ucode_validate(adev->gfx.me_fw);
1109         if (err)
1110                 goto out;
1111         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
1112         adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1113         adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1114
1115         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
1116         err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
1117         if (err)
1118                 goto out;
1119         err = amdgpu_ucode_validate(adev->gfx.ce_fw);
1120         if (err)
1121                 goto out;
1122         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
1123         adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1124         adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1125
1126         if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
1127                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
1128                 info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
1129                 info->fw = adev->gfx.pfp_fw;
1130                 header = (const struct common_firmware_header *)info->fw->data;
1131                 adev->firmware.fw_size +=
1132                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1133
1134                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
1135                 info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
1136                 info->fw = adev->gfx.me_fw;
1137                 header = (const struct common_firmware_header *)info->fw->data;
1138                 adev->firmware.fw_size +=
1139                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1140
1141                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
1142                 info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
1143                 info->fw = adev->gfx.ce_fw;
1144                 header = (const struct common_firmware_header *)info->fw->data;
1145                 adev->firmware.fw_size +=
1146                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1147         }
1148
1149 out:
1150         if (err) {
1151                 dev_err(adev->dev,
1152                         "gfx9: Failed to load firmware \"%s\"\n",
1153                         fw_name);
1154                 release_firmware(adev->gfx.pfp_fw);
1155                 adev->gfx.pfp_fw = NULL;
1156                 release_firmware(adev->gfx.me_fw);
1157                 adev->gfx.me_fw = NULL;
1158                 release_firmware(adev->gfx.ce_fw);
1159                 adev->gfx.ce_fw = NULL;
1160         }
1161         return err;
1162 }
1163
1164 static int gfx_v9_0_init_rlc_microcode(struct amdgpu_device *adev,
1165                                           const char *chip_name)
1166 {
1167         char fw_name[30];
1168         int err;
1169         struct amdgpu_firmware_info *info = NULL;
1170         const struct common_firmware_header *header = NULL;
1171         const struct rlc_firmware_header_v2_0 *rlc_hdr;
1172         unsigned int *tmp = NULL;
1173         unsigned int i = 0;
1174         uint16_t version_major;
1175         uint16_t version_minor;
1176         uint32_t smu_version;
1177
1178         /*
1179          * For Picasso && AM4 SOCKET board, we use picasso_rlc_am4.bin
1180          * instead of picasso_rlc.bin.
1181          * Judgment method:
1182          * PCO AM4: revision >= 0xC8 && revision <= 0xCF
1183          *          or revision >= 0xD8 && revision <= 0xDF
1184          * otherwise is PCO FP5
1185          */
1186         if (!strcmp(chip_name, "picasso") &&
1187                 (((adev->pdev->revision >= 0xC8) && (adev->pdev->revision <= 0xCF)) ||
1188                 ((adev->pdev->revision >= 0xD8) && (adev->pdev->revision <= 0xDF))))
1189                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc_am4.bin", chip_name);
1190         else if (!strcmp(chip_name, "raven") && (amdgpu_pm_load_smu_firmware(adev, &smu_version) == 0) &&
1191                 (smu_version >= 0x41e2b))
1192                 /**
1193                 *SMC is loaded by SBIOS on APU and it's able to get the SMU version directly.
1194                 */
1195                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_kicker_rlc.bin", chip_name);
1196         else
1197                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
1198         err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
1199         if (err)
1200                 goto out;
1201         err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
1202         rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
1203
1204         version_major = le16_to_cpu(rlc_hdr->header.header_version_major);
1205         version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor);
1206         if (version_major == 2 && version_minor == 1)
1207                 adev->gfx.rlc.is_rlc_v2_1 = true;
1208
1209         adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
1210         adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
1211         adev->gfx.rlc.save_and_restore_offset =
1212                         le32_to_cpu(rlc_hdr->save_and_restore_offset);
1213         adev->gfx.rlc.clear_state_descriptor_offset =
1214                         le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
1215         adev->gfx.rlc.avail_scratch_ram_locations =
1216                         le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
1217         adev->gfx.rlc.reg_restore_list_size =
1218                         le32_to_cpu(rlc_hdr->reg_restore_list_size);
1219         adev->gfx.rlc.reg_list_format_start =
1220                         le32_to_cpu(rlc_hdr->reg_list_format_start);
1221         adev->gfx.rlc.reg_list_format_separate_start =
1222                         le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
1223         adev->gfx.rlc.starting_offsets_start =
1224                         le32_to_cpu(rlc_hdr->starting_offsets_start);
1225         adev->gfx.rlc.reg_list_format_size_bytes =
1226                         le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
1227         adev->gfx.rlc.reg_list_size_bytes =
1228                         le32_to_cpu(rlc_hdr->reg_list_size_bytes);
1229         adev->gfx.rlc.register_list_format =
1230                         kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
1231                                 adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
1232         if (!adev->gfx.rlc.register_list_format) {
1233                 err = -ENOMEM;
1234                 goto out;
1235         }
1236
1237         tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1238                         le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
1239         for (i = 0 ; i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2); i++)
1240                 adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]);
1241
1242         adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
1243
1244         tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1245                         le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
1246         for (i = 0 ; i < (adev->gfx.rlc.reg_list_size_bytes >> 2); i++)
1247                 adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
1248
1249         if (adev->gfx.rlc.is_rlc_v2_1)
1250                 gfx_v9_0_init_rlc_ext_microcode(adev);
1251
1252         if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
1253                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
1254                 info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
1255                 info->fw = adev->gfx.rlc_fw;
1256                 header = (const struct common_firmware_header *)info->fw->data;
1257                 adev->firmware.fw_size +=
1258                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1259
1260                 if (adev->gfx.rlc.is_rlc_v2_1 &&
1261                     adev->gfx.rlc.save_restore_list_cntl_size_bytes &&
1262                     adev->gfx.rlc.save_restore_list_gpm_size_bytes &&
1263                     adev->gfx.rlc.save_restore_list_srm_size_bytes) {
1264                         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL];
1265                         info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL;
1266                         info->fw = adev->gfx.rlc_fw;
1267                         adev->firmware.fw_size +=
1268                                 ALIGN(adev->gfx.rlc.save_restore_list_cntl_size_bytes, PAGE_SIZE);
1269
1270                         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM];
1271                         info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM;
1272                         info->fw = adev->gfx.rlc_fw;
1273                         adev->firmware.fw_size +=
1274                                 ALIGN(adev->gfx.rlc.save_restore_list_gpm_size_bytes, PAGE_SIZE);
1275
1276                         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM];
1277                         info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM;
1278                         info->fw = adev->gfx.rlc_fw;
1279                         adev->firmware.fw_size +=
1280                                 ALIGN(adev->gfx.rlc.save_restore_list_srm_size_bytes, PAGE_SIZE);
1281                 }
1282         }
1283
1284 out:
1285         if (err) {
1286                 dev_err(adev->dev,
1287                         "gfx9: Failed to load firmware \"%s\"\n",
1288                         fw_name);
1289                 release_firmware(adev->gfx.rlc_fw);
1290                 adev->gfx.rlc_fw = NULL;
1291         }
1292         return err;
1293 }
1294
1295 static int gfx_v9_0_init_cp_compute_microcode(struct amdgpu_device *adev,
1296                                           const char *chip_name)
1297 {
1298         char fw_name[30];
1299         int err;
1300         struct amdgpu_firmware_info *info = NULL;
1301         const struct common_firmware_header *header = NULL;
1302         const struct gfx_firmware_header_v1_0 *cp_hdr;
1303
1304         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
1305         err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1306         if (err)
1307                 goto out;
1308         err = amdgpu_ucode_validate(adev->gfx.mec_fw);
1309         if (err)
1310                 goto out;
1311         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1312         adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1313         adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1314
1315
1316         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1317         err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1318         if (!err) {
1319                 err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
1320                 if (err)
1321                         goto out;
1322                 cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1323                 adev->gfx.mec2_fw->data;
1324                 adev->gfx.mec2_fw_version =
1325                 le32_to_cpu(cp_hdr->header.ucode_version);
1326                 adev->gfx.mec2_feature_version =
1327                 le32_to_cpu(cp_hdr->ucode_feature_version);
1328         } else {
1329                 err = 0;
1330                 adev->gfx.mec2_fw = NULL;
1331         }
1332
1333         if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
1334                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
1335                 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
1336                 info->fw = adev->gfx.mec_fw;
1337                 header = (const struct common_firmware_header *)info->fw->data;
1338                 cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data;
1339                 adev->firmware.fw_size +=
1340                         ALIGN(le32_to_cpu(header->ucode_size_bytes) - le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
1341
1342                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1_JT];
1343                 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1_JT;
1344                 info->fw = adev->gfx.mec_fw;
1345                 adev->firmware.fw_size +=
1346                         ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
1347
1348                 if (adev->gfx.mec2_fw) {
1349                         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
1350                         info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
1351                         info->fw = adev->gfx.mec2_fw;
1352                         header = (const struct common_firmware_header *)info->fw->data;
1353                         cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data;
1354                         adev->firmware.fw_size +=
1355                                 ALIGN(le32_to_cpu(header->ucode_size_bytes) - le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
1356
1357                         /* TODO: Determine if MEC2 JT FW loading can be removed
1358                                  for all GFX V9 asic and above */
1359                         if (adev->asic_type != CHIP_ARCTURUS &&
1360                             adev->asic_type != CHIP_RENOIR) {
1361                                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2_JT];
1362                                 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2_JT;
1363                                 info->fw = adev->gfx.mec2_fw;
1364                                 adev->firmware.fw_size +=
1365                                         ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4,
1366                                         PAGE_SIZE);
1367                         }
1368                 }
1369         }
1370
1371 out:
1372         gfx_v9_0_check_if_need_gfxoff(adev);
1373         gfx_v9_0_check_fw_write_wait(adev);
1374         if (err) {
1375                 dev_err(adev->dev,
1376                         "gfx9: Failed to load firmware \"%s\"\n",
1377                         fw_name);
1378                 release_firmware(adev->gfx.mec_fw);
1379                 adev->gfx.mec_fw = NULL;
1380                 release_firmware(adev->gfx.mec2_fw);
1381                 adev->gfx.mec2_fw = NULL;
1382         }
1383         return err;
1384 }
1385
1386 static int gfx_v9_0_init_microcode(struct amdgpu_device *adev)
1387 {
1388         const char *chip_name;
1389         int r;
1390
1391         DRM_DEBUG("\n");
1392
1393         switch (adev->asic_type) {
1394         case CHIP_VEGA10:
1395                 chip_name = "vega10";
1396                 break;
1397         case CHIP_VEGA12:
1398                 chip_name = "vega12";
1399                 break;
1400         case CHIP_VEGA20:
1401                 chip_name = "vega20";
1402                 break;
1403         case CHIP_RAVEN:
1404                 if (adev->rev_id >= 8)
1405                         chip_name = "raven2";
1406                 else if (adev->pdev->device == 0x15d8)
1407                         chip_name = "picasso";
1408                 else
1409                         chip_name = "raven";
1410                 break;
1411         case CHIP_ARCTURUS:
1412                 chip_name = "arcturus";
1413                 break;
1414         case CHIP_RENOIR:
1415                 chip_name = "renoir";
1416                 break;
1417         default:
1418                 BUG();
1419         }
1420
1421         /* No CPG in Arcturus */
1422         if (adev->asic_type != CHIP_ARCTURUS) {
1423                 r = gfx_v9_0_init_cp_gfx_microcode(adev, chip_name);
1424                 if (r)
1425                         return r;
1426         }
1427
1428         r = gfx_v9_0_init_rlc_microcode(adev, chip_name);
1429         if (r)
1430                 return r;
1431
1432         r = gfx_v9_0_init_cp_compute_microcode(adev, chip_name);
1433         if (r)
1434                 return r;
1435
1436         return r;
1437 }
1438
1439 static u32 gfx_v9_0_get_csb_size(struct amdgpu_device *adev)
1440 {
1441         u32 count = 0;
1442         const struct cs_section_def *sect = NULL;
1443         const struct cs_extent_def *ext = NULL;
1444
1445         /* begin clear state */
1446         count += 2;
1447         /* context control state */
1448         count += 3;
1449
1450         for (sect = gfx9_cs_data; sect->section != NULL; ++sect) {
1451                 for (ext = sect->section; ext->extent != NULL; ++ext) {
1452                         if (sect->id == SECT_CONTEXT)
1453                                 count += 2 + ext->reg_count;
1454                         else
1455                                 return 0;
1456                 }
1457         }
1458
1459         /* end clear state */
1460         count += 2;
1461         /* clear state */
1462         count += 2;
1463
1464         return count;
1465 }
1466
1467 static void gfx_v9_0_get_csb_buffer(struct amdgpu_device *adev,
1468                                     volatile u32 *buffer)
1469 {
1470         u32 count = 0, i;
1471         const struct cs_section_def *sect = NULL;
1472         const struct cs_extent_def *ext = NULL;
1473
1474         if (adev->gfx.rlc.cs_data == NULL)
1475                 return;
1476         if (buffer == NULL)
1477                 return;
1478
1479         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1480         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1481
1482         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
1483         buffer[count++] = cpu_to_le32(0x80000000);
1484         buffer[count++] = cpu_to_le32(0x80000000);
1485
1486         for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
1487                 for (ext = sect->section; ext->extent != NULL; ++ext) {
1488                         if (sect->id == SECT_CONTEXT) {
1489                                 buffer[count++] =
1490                                         cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
1491                                 buffer[count++] = cpu_to_le32(ext->reg_index -
1492                                                 PACKET3_SET_CONTEXT_REG_START);
1493                                 for (i = 0; i < ext->reg_count; i++)
1494                                         buffer[count++] = cpu_to_le32(ext->extent[i]);
1495                         } else {
1496                                 return;
1497                         }
1498                 }
1499         }
1500
1501         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1502         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
1503
1504         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
1505         buffer[count++] = cpu_to_le32(0);
1506 }
1507
1508 static void gfx_v9_0_init_always_on_cu_mask(struct amdgpu_device *adev)
1509 {
1510         struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
1511         uint32_t pg_always_on_cu_num = 2;
1512         uint32_t always_on_cu_num;
1513         uint32_t i, j, k;
1514         uint32_t mask, cu_bitmap, counter;
1515
1516         if (adev->flags & AMD_IS_APU)
1517                 always_on_cu_num = 4;
1518         else if (adev->asic_type == CHIP_VEGA12)
1519                 always_on_cu_num = 8;
1520         else
1521                 always_on_cu_num = 12;
1522
1523         mutex_lock(&adev->grbm_idx_mutex);
1524         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
1525                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
1526                         mask = 1;
1527                         cu_bitmap = 0;
1528                         counter = 0;
1529                         gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
1530
1531                         for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
1532                                 if (cu_info->bitmap[i][j] & mask) {
1533                                         if (counter == pg_always_on_cu_num)
1534                                                 WREG32_SOC15(GC, 0, mmRLC_PG_ALWAYS_ON_CU_MASK, cu_bitmap);
1535                                         if (counter < always_on_cu_num)
1536                                                 cu_bitmap |= mask;
1537                                         else
1538                                                 break;
1539                                         counter++;
1540                                 }
1541                                 mask <<= 1;
1542                         }
1543
1544                         WREG32_SOC15(GC, 0, mmRLC_LB_ALWAYS_ACTIVE_CU_MASK, cu_bitmap);
1545                         cu_info->ao_cu_bitmap[i][j] = cu_bitmap;
1546                 }
1547         }
1548         gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1549         mutex_unlock(&adev->grbm_idx_mutex);
1550 }
1551
1552 static void gfx_v9_0_init_lbpw(struct amdgpu_device *adev)
1553 {
1554         uint32_t data;
1555
1556         /* set mmRLC_LB_THR_CONFIG_1/2/3/4 */
1557         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F);
1558         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x0333A5A7);
1559         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077);
1560         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x30 | 0x40 << 8 | 0x02FA << 16));
1561
1562         /* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */
1563         WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000);
1564
1565         /* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */
1566         WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000500);
1567
1568         mutex_lock(&adev->grbm_idx_mutex);
1569         /* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/
1570         gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1571         WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff);
1572
1573         /* set mmRLC_LB_PARAMS = 0x003F_1006 */
1574         data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003);
1575         data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010);
1576         data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F);
1577         WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data);
1578
1579         /* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */
1580         data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7);
1581         data &= 0x0000FFFF;
1582         data |= 0x00C00000;
1583         WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data);
1584
1585         /*
1586          * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xF (4 CUs AON for Raven),
1587          * programmed in gfx_v9_0_init_always_on_cu_mask()
1588          */
1589
1590         /* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved,
1591          * but used for RLC_LB_CNTL configuration */
1592         data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK;
1593         data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09);
1594         data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000);
1595         WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data);
1596         mutex_unlock(&adev->grbm_idx_mutex);
1597
1598         gfx_v9_0_init_always_on_cu_mask(adev);
1599 }
1600
1601 static void gfx_v9_4_init_lbpw(struct amdgpu_device *adev)
1602 {
1603         uint32_t data;
1604
1605         /* set mmRLC_LB_THR_CONFIG_1/2/3/4 */
1606         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F);
1607         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x033388F8);
1608         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077);
1609         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x10 | 0x27 << 8 | 0x02FA << 16));
1610
1611         /* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */
1612         WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000);
1613
1614         /* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */
1615         WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000800);
1616
1617         mutex_lock(&adev->grbm_idx_mutex);
1618         /* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/
1619         gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1620         WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff);
1621
1622         /* set mmRLC_LB_PARAMS = 0x003F_1006 */
1623         data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003);
1624         data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010);
1625         data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F);
1626         WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data);
1627
1628         /* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */
1629         data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7);
1630         data &= 0x0000FFFF;
1631         data |= 0x00C00000;
1632         WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data);
1633
1634         /*
1635          * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xFFF (12 CUs AON),
1636          * programmed in gfx_v9_0_init_always_on_cu_mask()
1637          */
1638
1639         /* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved,
1640          * but used for RLC_LB_CNTL configuration */
1641         data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK;
1642         data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09);
1643         data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000);
1644         WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data);
1645         mutex_unlock(&adev->grbm_idx_mutex);
1646
1647         gfx_v9_0_init_always_on_cu_mask(adev);
1648 }
1649
1650 static void gfx_v9_0_enable_lbpw(struct amdgpu_device *adev, bool enable)
1651 {
1652         WREG32_FIELD15(GC, 0, RLC_LB_CNTL, LOAD_BALANCE_ENABLE, enable ? 1 : 0);
1653 }
1654
1655 static int gfx_v9_0_cp_jump_table_num(struct amdgpu_device *adev)
1656 {
1657         return 5;
1658 }
1659
1660 static int gfx_v9_0_rlc_init(struct amdgpu_device *adev)
1661 {
1662         const struct cs_section_def *cs_data;
1663         int r;
1664
1665         adev->gfx.rlc.cs_data = gfx9_cs_data;
1666
1667         cs_data = adev->gfx.rlc.cs_data;
1668
1669         if (cs_data) {
1670                 /* init clear state block */
1671                 r = amdgpu_gfx_rlc_init_csb(adev);
1672                 if (r)
1673                         return r;
1674         }
1675
1676         if (adev->asic_type == CHIP_RAVEN || adev->asic_type == CHIP_RENOIR) {
1677                 /* TODO: double check the cp_table_size for RV */
1678                 adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
1679                 r = amdgpu_gfx_rlc_init_cpt(adev);
1680                 if (r)
1681                         return r;
1682         }
1683
1684         switch (adev->asic_type) {
1685         case CHIP_RAVEN:
1686                 gfx_v9_0_init_lbpw(adev);
1687                 break;
1688         case CHIP_VEGA20:
1689                 gfx_v9_4_init_lbpw(adev);
1690                 break;
1691         default:
1692                 break;
1693         }
1694
1695         return 0;
1696 }
1697
1698 static int gfx_v9_0_csb_vram_pin(struct amdgpu_device *adev)
1699 {
1700         int r;
1701
1702         r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false);
1703         if (unlikely(r != 0))
1704                 return r;
1705
1706         r = amdgpu_bo_pin(adev->gfx.rlc.clear_state_obj,
1707                         AMDGPU_GEM_DOMAIN_VRAM);
1708         if (!r)
1709                 adev->gfx.rlc.clear_state_gpu_addr =
1710                         amdgpu_bo_gpu_offset(adev->gfx.rlc.clear_state_obj);
1711
1712         amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1713
1714         return r;
1715 }
1716
1717 static void gfx_v9_0_csb_vram_unpin(struct amdgpu_device *adev)
1718 {
1719         int r;
1720
1721         if (!adev->gfx.rlc.clear_state_obj)
1722                 return;
1723
1724         r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, true);
1725         if (likely(r == 0)) {
1726                 amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj);
1727                 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1728         }
1729 }
1730
1731 static void gfx_v9_0_mec_fini(struct amdgpu_device *adev)
1732 {
1733         amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
1734         amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_obj, NULL, NULL);
1735 }
1736
1737 static int gfx_v9_0_mec_init(struct amdgpu_device *adev)
1738 {
1739         int r;
1740         u32 *hpd;
1741         const __le32 *fw_data;
1742         unsigned fw_size;
1743         u32 *fw;
1744         size_t mec_hpd_size;
1745
1746         const struct gfx_firmware_header_v1_0 *mec_hdr;
1747
1748         bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
1749
1750         /* take ownership of the relevant compute queues */
1751         amdgpu_gfx_compute_queue_acquire(adev);
1752         mec_hpd_size = adev->gfx.num_compute_rings * GFX9_MEC_HPD_SIZE;
1753
1754         r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
1755                                       AMDGPU_GEM_DOMAIN_VRAM,
1756                                       &adev->gfx.mec.hpd_eop_obj,
1757                                       &adev->gfx.mec.hpd_eop_gpu_addr,
1758                                       (void **)&hpd);
1759         if (r) {
1760                 dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
1761                 gfx_v9_0_mec_fini(adev);
1762                 return r;
1763         }
1764
1765         memset(hpd, 0, adev->gfx.mec.hpd_eop_obj->tbo.mem.size);
1766
1767         amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
1768         amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1769
1770         mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1771
1772         fw_data = (const __le32 *)
1773                 (adev->gfx.mec_fw->data +
1774                  le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
1775         fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
1776
1777         r = amdgpu_bo_create_reserved(adev, mec_hdr->header.ucode_size_bytes,
1778                                       PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
1779                                       &adev->gfx.mec.mec_fw_obj,
1780                                       &adev->gfx.mec.mec_fw_gpu_addr,
1781                                       (void **)&fw);
1782         if (r) {
1783                 dev_warn(adev->dev, "(%d) create mec firmware bo failed\n", r);
1784                 gfx_v9_0_mec_fini(adev);
1785                 return r;
1786         }
1787
1788         memcpy(fw, fw_data, fw_size);
1789
1790         amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj);
1791         amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj);
1792
1793         return 0;
1794 }
1795
1796 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
1797 {
1798         WREG32_SOC15(GC, 0, mmSQ_IND_INDEX,
1799                 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
1800                 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
1801                 (address << SQ_IND_INDEX__INDEX__SHIFT) |
1802                 (SQ_IND_INDEX__FORCE_READ_MASK));
1803         return RREG32_SOC15(GC, 0, mmSQ_IND_DATA);
1804 }
1805
1806 static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
1807                            uint32_t wave, uint32_t thread,
1808                            uint32_t regno, uint32_t num, uint32_t *out)
1809 {
1810         WREG32_SOC15(GC, 0, mmSQ_IND_INDEX,
1811                 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
1812                 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
1813                 (regno << SQ_IND_INDEX__INDEX__SHIFT) |
1814                 (thread << SQ_IND_INDEX__THREAD_ID__SHIFT) |
1815                 (SQ_IND_INDEX__FORCE_READ_MASK) |
1816                 (SQ_IND_INDEX__AUTO_INCR_MASK));
1817         while (num--)
1818                 *(out++) = RREG32_SOC15(GC, 0, mmSQ_IND_DATA);
1819 }
1820
1821 static void gfx_v9_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
1822 {
1823         /* type 1 wave data */
1824         dst[(*no_fields)++] = 1;
1825         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS);
1826         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO);
1827         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI);
1828         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO);
1829         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI);
1830         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID);
1831         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0);
1832         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1);
1833         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC);
1834         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC);
1835         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS);
1836         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS);
1837         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0);
1838         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0);
1839 }
1840
1841 static void gfx_v9_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
1842                                      uint32_t wave, uint32_t start,
1843                                      uint32_t size, uint32_t *dst)
1844 {
1845         wave_read_regs(
1846                 adev, simd, wave, 0,
1847                 start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
1848 }
1849
1850 static void gfx_v9_0_read_wave_vgprs(struct amdgpu_device *adev, uint32_t simd,
1851                                      uint32_t wave, uint32_t thread,
1852                                      uint32_t start, uint32_t size,
1853                                      uint32_t *dst)
1854 {
1855         wave_read_regs(
1856                 adev, simd, wave, thread,
1857                 start + SQIND_WAVE_VGPRS_OFFSET, size, dst);
1858 }
1859
1860 static void gfx_v9_0_select_me_pipe_q(struct amdgpu_device *adev,
1861                                   u32 me, u32 pipe, u32 q, u32 vm)
1862 {
1863         soc15_grbm_select(adev, me, pipe, q, vm);
1864 }
1865
1866 static const struct amdgpu_gfx_funcs gfx_v9_0_gfx_funcs = {
1867         .get_gpu_clock_counter = &gfx_v9_0_get_gpu_clock_counter,
1868         .select_se_sh = &gfx_v9_0_select_se_sh,
1869         .read_wave_data = &gfx_v9_0_read_wave_data,
1870         .read_wave_sgprs = &gfx_v9_0_read_wave_sgprs,
1871         .read_wave_vgprs = &gfx_v9_0_read_wave_vgprs,
1872         .select_me_pipe_q = &gfx_v9_0_select_me_pipe_q,
1873         .ras_error_inject = &gfx_v9_0_ras_error_inject,
1874         .query_ras_error_count = &gfx_v9_0_query_ras_error_count
1875 };
1876
1877 static int gfx_v9_0_gpu_early_init(struct amdgpu_device *adev)
1878 {
1879         u32 gb_addr_config;
1880         int err;
1881
1882         adev->gfx.funcs = &gfx_v9_0_gfx_funcs;
1883
1884         switch (adev->asic_type) {
1885         case CHIP_VEGA10:
1886                 adev->gfx.config.max_hw_contexts = 8;
1887                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1888                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1889                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1890                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1891                 gb_addr_config = VEGA10_GB_ADDR_CONFIG_GOLDEN;
1892                 break;
1893         case CHIP_VEGA12:
1894                 adev->gfx.config.max_hw_contexts = 8;
1895                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1896                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1897                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1898                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1899                 gb_addr_config = VEGA12_GB_ADDR_CONFIG_GOLDEN;
1900                 DRM_INFO("fix gfx.config for vega12\n");
1901                 break;
1902         case CHIP_VEGA20:
1903                 adev->gfx.config.max_hw_contexts = 8;
1904                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1905                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1906                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1907                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1908                 gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
1909                 gb_addr_config &= ~0xf3e777ff;
1910                 gb_addr_config |= 0x22014042;
1911                 /* check vbios table if gpu info is not available */
1912                 err = amdgpu_atomfirmware_get_gfx_info(adev);
1913                 if (err)
1914                         return err;
1915                 break;
1916         case CHIP_RAVEN:
1917                 adev->gfx.config.max_hw_contexts = 8;
1918                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1919                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1920                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1921                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1922                 if (adev->rev_id >= 8)
1923                         gb_addr_config = RAVEN2_GB_ADDR_CONFIG_GOLDEN;
1924                 else
1925                         gb_addr_config = RAVEN_GB_ADDR_CONFIG_GOLDEN;
1926                 break;
1927         case CHIP_ARCTURUS:
1928                 adev->gfx.config.max_hw_contexts = 8;
1929                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1930                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1931                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1932                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1933                 gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
1934                 gb_addr_config &= ~0xf3e777ff;
1935                 gb_addr_config |= 0x22014042;
1936                 break;
1937         case CHIP_RENOIR:
1938                 adev->gfx.config.max_hw_contexts = 8;
1939                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1940                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1941                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x80;
1942                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1943                 gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
1944                 gb_addr_config &= ~0xf3e777ff;
1945                 gb_addr_config |= 0x22010042;
1946                 break;
1947         default:
1948                 BUG();
1949                 break;
1950         }
1951
1952         adev->gfx.config.gb_addr_config = gb_addr_config;
1953
1954         adev->gfx.config.gb_addr_config_fields.num_pipes = 1 <<
1955                         REG_GET_FIELD(
1956                                         adev->gfx.config.gb_addr_config,
1957                                         GB_ADDR_CONFIG,
1958                                         NUM_PIPES);
1959
1960         adev->gfx.config.max_tile_pipes =
1961                 adev->gfx.config.gb_addr_config_fields.num_pipes;
1962
1963         adev->gfx.config.gb_addr_config_fields.num_banks = 1 <<
1964                         REG_GET_FIELD(
1965                                         adev->gfx.config.gb_addr_config,
1966                                         GB_ADDR_CONFIG,
1967                                         NUM_BANKS);
1968         adev->gfx.config.gb_addr_config_fields.max_compress_frags = 1 <<
1969                         REG_GET_FIELD(
1970                                         adev->gfx.config.gb_addr_config,
1971                                         GB_ADDR_CONFIG,
1972                                         MAX_COMPRESSED_FRAGS);
1973         adev->gfx.config.gb_addr_config_fields.num_rb_per_se = 1 <<
1974                         REG_GET_FIELD(
1975                                         adev->gfx.config.gb_addr_config,
1976                                         GB_ADDR_CONFIG,
1977                                         NUM_RB_PER_SE);
1978         adev->gfx.config.gb_addr_config_fields.num_se = 1 <<
1979                         REG_GET_FIELD(
1980                                         adev->gfx.config.gb_addr_config,
1981                                         GB_ADDR_CONFIG,
1982                                         NUM_SHADER_ENGINES);
1983         adev->gfx.config.gb_addr_config_fields.pipe_interleave_size = 1 << (8 +
1984                         REG_GET_FIELD(
1985                                         adev->gfx.config.gb_addr_config,
1986                                         GB_ADDR_CONFIG,
1987                                         PIPE_INTERLEAVE_SIZE));
1988
1989         return 0;
1990 }
1991
1992 static int gfx_v9_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
1993                                       int mec, int pipe, int queue)
1994 {
1995         int r;
1996         unsigned irq_type;
1997         struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
1998
1999         ring = &adev->gfx.compute_ring[ring_id];
2000
2001         /* mec0 is me1 */
2002         ring->me = mec + 1;
2003         ring->pipe = pipe;
2004         ring->queue = queue;
2005
2006         ring->ring_obj = NULL;
2007         ring->use_doorbell = true;
2008         ring->doorbell_index = (adev->doorbell_index.mec_ring0 + ring_id) << 1;
2009         ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
2010                                 + (ring_id * GFX9_MEC_HPD_SIZE);
2011         sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
2012
2013         irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
2014                 + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
2015                 + ring->pipe;
2016
2017         /* type-2 packets are deprecated on MEC, use type-3 instead */
2018         r = amdgpu_ring_init(adev, ring, 1024,
2019                              &adev->gfx.eop_irq, irq_type);
2020         if (r)
2021                 return r;
2022
2023
2024         return 0;
2025 }
2026
2027 static int gfx_v9_0_sw_init(void *handle)
2028 {
2029         int i, j, k, r, ring_id;
2030         struct amdgpu_ring *ring;
2031         struct amdgpu_kiq *kiq;
2032         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2033
2034         switch (adev->asic_type) {
2035         case CHIP_VEGA10:
2036         case CHIP_VEGA12:
2037         case CHIP_VEGA20:
2038         case CHIP_RAVEN:
2039         case CHIP_ARCTURUS:
2040         case CHIP_RENOIR:
2041                 adev->gfx.mec.num_mec = 2;
2042                 break;
2043         default:
2044                 adev->gfx.mec.num_mec = 1;
2045                 break;
2046         }
2047
2048         adev->gfx.mec.num_pipe_per_mec = 4;
2049         adev->gfx.mec.num_queue_per_pipe = 8;
2050
2051         /* EOP Event */
2052         r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_EOP_INTERRUPT, &adev->gfx.eop_irq);
2053         if (r)
2054                 return r;
2055
2056         /* Privileged reg */
2057         r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_REG_FAULT,
2058                               &adev->gfx.priv_reg_irq);
2059         if (r)
2060                 return r;
2061
2062         /* Privileged inst */
2063         r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_INSTR_FAULT,
2064                               &adev->gfx.priv_inst_irq);
2065         if (r)
2066                 return r;
2067
2068         /* ECC error */
2069         r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_ECC_ERROR,
2070                               &adev->gfx.cp_ecc_error_irq);
2071         if (r)
2072                 return r;
2073
2074         /* FUE error */
2075         r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_FUE_ERROR,
2076                               &adev->gfx.cp_ecc_error_irq);
2077         if (r)
2078                 return r;
2079
2080         adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
2081
2082         gfx_v9_0_scratch_init(adev);
2083
2084         r = gfx_v9_0_init_microcode(adev);
2085         if (r) {
2086                 DRM_ERROR("Failed to load gfx firmware!\n");
2087                 return r;
2088         }
2089
2090         r = adev->gfx.rlc.funcs->init(adev);
2091         if (r) {
2092                 DRM_ERROR("Failed to init rlc BOs!\n");
2093                 return r;
2094         }
2095
2096         r = gfx_v9_0_mec_init(adev);
2097         if (r) {
2098                 DRM_ERROR("Failed to init MEC BOs!\n");
2099                 return r;
2100         }
2101
2102         /* set up the gfx ring */
2103         for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
2104                 ring = &adev->gfx.gfx_ring[i];
2105                 ring->ring_obj = NULL;
2106                 if (!i)
2107                         sprintf(ring->name, "gfx");
2108                 else
2109                         sprintf(ring->name, "gfx_%d", i);
2110                 ring->use_doorbell = true;
2111                 ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1;
2112                 r = amdgpu_ring_init(adev, ring, 1024,
2113                                      &adev->gfx.eop_irq, AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP);
2114                 if (r)
2115                         return r;
2116         }
2117
2118         /* set up the compute queues - allocate horizontally across pipes */
2119         ring_id = 0;
2120         for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
2121                 for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
2122                         for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
2123                                 if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, j))
2124                                         continue;
2125
2126                                 r = gfx_v9_0_compute_ring_init(adev,
2127                                                                ring_id,
2128                                                                i, k, j);
2129                                 if (r)
2130                                         return r;
2131
2132                                 ring_id++;
2133                         }
2134                 }
2135         }
2136
2137         r = amdgpu_gfx_kiq_init(adev, GFX9_MEC_HPD_SIZE);
2138         if (r) {
2139                 DRM_ERROR("Failed to init KIQ BOs!\n");
2140                 return r;
2141         }
2142
2143         kiq = &adev->gfx.kiq;
2144         r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
2145         if (r)
2146                 return r;
2147
2148         /* create MQD for all compute queues as wel as KIQ for SRIOV case */
2149         r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct v9_mqd_allocation));
2150         if (r)
2151                 return r;
2152
2153         adev->gfx.ce_ram_size = 0x8000;
2154
2155         r = gfx_v9_0_gpu_early_init(adev);
2156         if (r)
2157                 return r;
2158
2159         return 0;
2160 }
2161
2162
2163 static int gfx_v9_0_sw_fini(void *handle)
2164 {
2165         int i;
2166         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2167
2168         amdgpu_gfx_ras_fini(adev);
2169
2170         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2171                 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
2172         for (i = 0; i < adev->gfx.num_compute_rings; i++)
2173                 amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
2174
2175         amdgpu_gfx_mqd_sw_fini(adev);
2176         amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring);
2177         amdgpu_gfx_kiq_fini(adev);
2178
2179         gfx_v9_0_mec_fini(adev);
2180         amdgpu_bo_unref(&adev->gfx.rlc.clear_state_obj);
2181         if (adev->asic_type == CHIP_RAVEN || adev->asic_type == CHIP_RENOIR) {
2182                 amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj,
2183                                 &adev->gfx.rlc.cp_table_gpu_addr,
2184                                 (void **)&adev->gfx.rlc.cp_table_ptr);
2185         }
2186         gfx_v9_0_free_microcode(adev);
2187
2188         return 0;
2189 }
2190
2191
2192 static void gfx_v9_0_tiling_mode_table_init(struct amdgpu_device *adev)
2193 {
2194         /* TODO */
2195 }
2196
2197 static void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num, u32 instance)
2198 {
2199         u32 data;
2200
2201         if (instance == 0xffffffff)
2202                 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
2203         else
2204                 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
2205
2206         if (se_num == 0xffffffff)
2207                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
2208         else
2209                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
2210
2211         if (sh_num == 0xffffffff)
2212                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
2213         else
2214                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
2215
2216         WREG32_SOC15_RLC_SHADOW(GC, 0, mmGRBM_GFX_INDEX, data);
2217 }
2218
2219 static u32 gfx_v9_0_get_rb_active_bitmap(struct amdgpu_device *adev)
2220 {
2221         u32 data, mask;
2222
2223         data = RREG32_SOC15(GC, 0, mmCC_RB_BACKEND_DISABLE);
2224         data |= RREG32_SOC15(GC, 0, mmGC_USER_RB_BACKEND_DISABLE);
2225
2226         data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK;
2227         data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT;
2228
2229         mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se /
2230                                          adev->gfx.config.max_sh_per_se);
2231
2232         return (~data) & mask;
2233 }
2234
2235 static void gfx_v9_0_setup_rb(struct amdgpu_device *adev)
2236 {
2237         int i, j;
2238         u32 data;
2239         u32 active_rbs = 0;
2240         u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
2241                                         adev->gfx.config.max_sh_per_se;
2242
2243         mutex_lock(&adev->grbm_idx_mutex);
2244         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
2245                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
2246                         gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
2247                         data = gfx_v9_0_get_rb_active_bitmap(adev);
2248                         active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
2249                                                rb_bitmap_width_per_sh);
2250                 }
2251         }
2252         gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
2253         mutex_unlock(&adev->grbm_idx_mutex);
2254
2255         adev->gfx.config.backend_enable_mask = active_rbs;
2256         adev->gfx.config.num_rbs = hweight32(active_rbs);
2257 }
2258
2259 #define DEFAULT_SH_MEM_BASES    (0x6000)
2260 #define FIRST_COMPUTE_VMID      (8)
2261 #define LAST_COMPUTE_VMID       (16)
2262 static void gfx_v9_0_init_compute_vmid(struct amdgpu_device *adev)
2263 {
2264         int i;
2265         uint32_t sh_mem_config;
2266         uint32_t sh_mem_bases;
2267
2268         /*
2269          * Configure apertures:
2270          * LDS:         0x60000000'00000000 - 0x60000001'00000000 (4GB)
2271          * Scratch:     0x60000001'00000000 - 0x60000002'00000000 (4GB)
2272          * GPUVM:       0x60010000'00000000 - 0x60020000'00000000 (1TB)
2273          */
2274         sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
2275
2276         sh_mem_config = SH_MEM_ADDRESS_MODE_64 |
2277                         SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
2278                         SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT;
2279
2280         mutex_lock(&adev->srbm_mutex);
2281         for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
2282                 soc15_grbm_select(adev, 0, 0, 0, i);
2283                 /* CP and shaders */
2284                 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, sh_mem_config);
2285                 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, sh_mem_bases);
2286         }
2287         soc15_grbm_select(adev, 0, 0, 0, 0);
2288         mutex_unlock(&adev->srbm_mutex);
2289
2290         /* Initialize all compute VMIDs to have no GDS, GWS, or OA
2291            acccess. These should be enabled by FW for target VMIDs. */
2292         for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
2293                 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * i, 0);
2294                 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * i, 0);
2295                 WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, i, 0);
2296                 WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, i, 0);
2297         }
2298 }
2299
2300 static void gfx_v9_0_init_gds_vmid(struct amdgpu_device *adev)
2301 {
2302         int vmid;
2303
2304         /*
2305          * Initialize all compute and user-gfx VMIDs to have no GDS, GWS, or OA
2306          * access. Compute VMIDs should be enabled by FW for target VMIDs,
2307          * the driver can enable them for graphics. VMID0 should maintain
2308          * access so that HWS firmware can save/restore entries.
2309          */
2310         for (vmid = 1; vmid < 16; vmid++) {
2311                 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * vmid, 0);
2312                 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * vmid, 0);
2313                 WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, vmid, 0);
2314                 WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, vmid, 0);
2315         }
2316 }
2317
2318 static void gfx_v9_0_constants_init(struct amdgpu_device *adev)
2319 {
2320         u32 tmp;
2321         int i;
2322
2323         WREG32_FIELD15_RLC(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff);
2324
2325         gfx_v9_0_tiling_mode_table_init(adev);
2326
2327         gfx_v9_0_setup_rb(adev);
2328         gfx_v9_0_get_cu_info(adev, &adev->gfx.cu_info);
2329         adev->gfx.config.db_debug2 = RREG32_SOC15(GC, 0, mmDB_DEBUG2);
2330
2331         /* XXX SH_MEM regs */
2332         /* where to put LDS, scratch, GPUVM in FSA64 space */
2333         mutex_lock(&adev->srbm_mutex);
2334         for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB_0].num_ids; i++) {
2335                 soc15_grbm_select(adev, 0, 0, 0, i);
2336                 /* CP and shaders */
2337                 if (i == 0) {
2338                         tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
2339                                             SH_MEM_ALIGNMENT_MODE_UNALIGNED);
2340                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE,
2341                                             !!amdgpu_noretry);
2342                         WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp);
2343                         WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, 0);
2344                 } else {
2345                         tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
2346                                             SH_MEM_ALIGNMENT_MODE_UNALIGNED);
2347                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE,
2348                                             !!amdgpu_noretry);
2349                         WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp);
2350                         tmp = REG_SET_FIELD(0, SH_MEM_BASES, PRIVATE_BASE,
2351                                 (adev->gmc.private_aperture_start >> 48));
2352                         tmp = REG_SET_FIELD(tmp, SH_MEM_BASES, SHARED_BASE,
2353                                 (adev->gmc.shared_aperture_start >> 48));
2354                         WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, tmp);
2355                 }
2356         }
2357         soc15_grbm_select(adev, 0, 0, 0, 0);
2358
2359         mutex_unlock(&adev->srbm_mutex);
2360
2361         gfx_v9_0_init_compute_vmid(adev);
2362         gfx_v9_0_init_gds_vmid(adev);
2363 }
2364
2365 static void gfx_v9_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
2366 {
2367         u32 i, j, k;
2368         u32 mask;
2369
2370         mutex_lock(&adev->grbm_idx_mutex);
2371         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
2372                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
2373                         gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
2374                         for (k = 0; k < adev->usec_timeout; k++) {
2375                                 if (RREG32_SOC15(GC, 0, mmRLC_SERDES_CU_MASTER_BUSY) == 0)
2376                                         break;
2377                                 udelay(1);
2378                         }
2379                         if (k == adev->usec_timeout) {
2380                                 gfx_v9_0_select_se_sh(adev, 0xffffffff,
2381                                                       0xffffffff, 0xffffffff);
2382                                 mutex_unlock(&adev->grbm_idx_mutex);
2383                                 DRM_INFO("Timeout wait for RLC serdes %u,%u\n",
2384                                          i, j);
2385                                 return;
2386                         }
2387                 }
2388         }
2389         gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
2390         mutex_unlock(&adev->grbm_idx_mutex);
2391
2392         mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
2393                 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
2394                 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
2395                 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
2396         for (k = 0; k < adev->usec_timeout; k++) {
2397                 if ((RREG32_SOC15(GC, 0, mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
2398                         break;
2399                 udelay(1);
2400         }
2401 }
2402
2403 static void gfx_v9_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
2404                                                bool enable)
2405 {
2406         u32 tmp = RREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0);
2407
2408         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
2409         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
2410         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
2411         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
2412
2413         WREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0, tmp);
2414 }
2415
2416 static void gfx_v9_0_init_csb(struct amdgpu_device *adev)
2417 {
2418         /* csib */
2419         WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_HI),
2420                         adev->gfx.rlc.clear_state_gpu_addr >> 32);
2421         WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_LO),
2422                         adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
2423         WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_LENGTH),
2424                         adev->gfx.rlc.clear_state_size);
2425 }
2426
2427 static void gfx_v9_1_parse_ind_reg_list(int *register_list_format,
2428                                 int indirect_offset,
2429                                 int list_size,
2430                                 int *unique_indirect_regs,
2431                                 int unique_indirect_reg_count,
2432                                 int *indirect_start_offsets,
2433                                 int *indirect_start_offsets_count,
2434                                 int max_start_offsets_count)
2435 {
2436         int idx;
2437
2438         for (; indirect_offset < list_size; indirect_offset++) {
2439                 WARN_ON(*indirect_start_offsets_count >= max_start_offsets_count);
2440                 indirect_start_offsets[*indirect_start_offsets_count] = indirect_offset;
2441                 *indirect_start_offsets_count = *indirect_start_offsets_count + 1;
2442
2443                 while (register_list_format[indirect_offset] != 0xFFFFFFFF) {
2444                         indirect_offset += 2;
2445
2446                         /* look for the matching indice */
2447                         for (idx = 0; idx < unique_indirect_reg_count; idx++) {
2448                                 if (unique_indirect_regs[idx] ==
2449                                         register_list_format[indirect_offset] ||
2450                                         !unique_indirect_regs[idx])
2451                                         break;
2452                         }
2453
2454                         BUG_ON(idx >= unique_indirect_reg_count);
2455
2456                         if (!unique_indirect_regs[idx])
2457                                 unique_indirect_regs[idx] = register_list_format[indirect_offset];
2458
2459                         indirect_offset++;
2460                 }
2461         }
2462 }
2463
2464 static int gfx_v9_1_init_rlc_save_restore_list(struct amdgpu_device *adev)
2465 {
2466         int unique_indirect_regs[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
2467         int unique_indirect_reg_count = 0;
2468
2469         int indirect_start_offsets[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
2470         int indirect_start_offsets_count = 0;
2471
2472         int list_size = 0;
2473         int i = 0, j = 0;
2474         u32 tmp = 0;
2475
2476         u32 *register_list_format =
2477                 kmemdup(adev->gfx.rlc.register_list_format,
2478                         adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
2479         if (!register_list_format)
2480                 return -ENOMEM;
2481
2482         /* setup unique_indirect_regs array and indirect_start_offsets array */
2483         unique_indirect_reg_count = ARRAY_SIZE(unique_indirect_regs);
2484         gfx_v9_1_parse_ind_reg_list(register_list_format,
2485                                     adev->gfx.rlc.reg_list_format_direct_reg_list_length,
2486                                     adev->gfx.rlc.reg_list_format_size_bytes >> 2,
2487                                     unique_indirect_regs,
2488                                     unique_indirect_reg_count,
2489                                     indirect_start_offsets,
2490                                     &indirect_start_offsets_count,
2491                                     ARRAY_SIZE(indirect_start_offsets));
2492
2493         /* enable auto inc in case it is disabled */
2494         tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL));
2495         tmp |= RLC_SRM_CNTL__AUTO_INCR_ADDR_MASK;
2496         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL), tmp);
2497
2498         /* write register_restore table to offset 0x0 using RLC_SRM_ARAM_ADDR/DATA */
2499         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_ADDR),
2500                 RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET);
2501         for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
2502                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_DATA),
2503                         adev->gfx.rlc.register_restore[i]);
2504
2505         /* load indirect register */
2506         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2507                 adev->gfx.rlc.reg_list_format_start);
2508
2509         /* direct register portion */
2510         for (i = 0; i < adev->gfx.rlc.reg_list_format_direct_reg_list_length; i++)
2511                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA),
2512                         register_list_format[i]);
2513
2514         /* indirect register portion */
2515         while (i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2)) {
2516                 if (register_list_format[i] == 0xFFFFFFFF) {
2517                         WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2518                         continue;
2519                 }
2520
2521                 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2522                 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2523
2524                 for (j = 0; j < unique_indirect_reg_count; j++) {
2525                         if (register_list_format[i] == unique_indirect_regs[j]) {
2526                                 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, j);
2527                                 break;
2528                         }
2529                 }
2530
2531                 BUG_ON(j >= unique_indirect_reg_count);
2532
2533                 i++;
2534         }
2535
2536         /* set save/restore list size */
2537         list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
2538         list_size = list_size >> 1;
2539         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2540                 adev->gfx.rlc.reg_restore_list_size);
2541         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA), list_size);
2542
2543         /* write the starting offsets to RLC scratch ram */
2544         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2545                 adev->gfx.rlc.starting_offsets_start);
2546         for (i = 0; i < ARRAY_SIZE(indirect_start_offsets); i++)
2547                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA),
2548                        indirect_start_offsets[i]);
2549
2550         /* load unique indirect regs*/
2551         for (i = 0; i < ARRAY_SIZE(unique_indirect_regs); i++) {
2552                 if (unique_indirect_regs[i] != 0) {
2553                         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_ADDR_0)
2554                                + GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[i],
2555                                unique_indirect_regs[i] & 0x3FFFF);
2556
2557                         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_DATA_0)
2558                                + GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[i],
2559                                unique_indirect_regs[i] >> 20);
2560                 }
2561         }
2562
2563         kfree(register_list_format);
2564         return 0;
2565 }
2566
2567 static void gfx_v9_0_enable_save_restore_machine(struct amdgpu_device *adev)
2568 {
2569         WREG32_FIELD15(GC, 0, RLC_SRM_CNTL, SRM_ENABLE, 1);
2570 }
2571
2572 static void pwr_10_0_gfxip_control_over_cgpg(struct amdgpu_device *adev,
2573                                              bool enable)
2574 {
2575         uint32_t data = 0;
2576         uint32_t default_data = 0;
2577
2578         default_data = data = RREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS));
2579         if (enable == true) {
2580                 /* enable GFXIP control over CGPG */
2581                 data |= PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK;
2582                 if(default_data != data)
2583                         WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2584
2585                 /* update status */
2586                 data &= ~PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK;
2587                 data |= (2 << PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT);
2588                 if(default_data != data)
2589                         WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2590         } else {
2591                 /* restore GFXIP control over GCPG */
2592                 data &= ~PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK;
2593                 if(default_data != data)
2594                         WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2595         }
2596 }
2597
2598 static void gfx_v9_0_init_gfx_power_gating(struct amdgpu_device *adev)
2599 {
2600         uint32_t data = 0;
2601
2602         if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
2603                               AMD_PG_SUPPORT_GFX_SMG |
2604                               AMD_PG_SUPPORT_GFX_DMG)) {
2605                 /* init IDLE_POLL_COUNT = 60 */
2606                 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL));
2607                 data &= ~CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT_MASK;
2608                 data |= (0x60 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
2609                 WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL), data);
2610
2611                 /* init RLC PG Delay */
2612                 data = 0;
2613                 data |= (0x10 << RLC_PG_DELAY__POWER_UP_DELAY__SHIFT);
2614                 data |= (0x10 << RLC_PG_DELAY__POWER_DOWN_DELAY__SHIFT);
2615                 data |= (0x10 << RLC_PG_DELAY__CMD_PROPAGATE_DELAY__SHIFT);
2616                 data |= (0x40 << RLC_PG_DELAY__MEM_SLEEP_DELAY__SHIFT);
2617                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY), data);
2618
2619                 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2));
2620                 data &= ~RLC_PG_DELAY_2__SERDES_CMD_DELAY_MASK;
2621                 data |= (0x4 << RLC_PG_DELAY_2__SERDES_CMD_DELAY__SHIFT);
2622                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2), data);
2623
2624                 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3));
2625                 data &= ~RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG_MASK;
2626                 data |= (0xff << RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG__SHIFT);
2627                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3), data);
2628
2629                 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL));
2630                 data &= ~RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD_MASK;
2631
2632                 /* program GRBM_REG_SAVE_GFX_IDLE_THRESHOLD to 0x55f0 */
2633                 data |= (0x55f0 << RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD__SHIFT);
2634                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL), data);
2635
2636                 pwr_10_0_gfxip_control_over_cgpg(adev, true);
2637         }
2638 }
2639
2640 static void gfx_v9_0_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
2641                                                 bool enable)
2642 {
2643         uint32_t data = 0;
2644         uint32_t default_data = 0;
2645
2646         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2647         data = REG_SET_FIELD(data, RLC_PG_CNTL,
2648                              SMU_CLK_SLOWDOWN_ON_PU_ENABLE,
2649                              enable ? 1 : 0);
2650         if (default_data != data)
2651                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2652 }
2653
2654 static void gfx_v9_0_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
2655                                                 bool enable)
2656 {
2657         uint32_t data = 0;
2658         uint32_t default_data = 0;
2659
2660         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2661         data = REG_SET_FIELD(data, RLC_PG_CNTL,
2662                              SMU_CLK_SLOWDOWN_ON_PD_ENABLE,
2663                              enable ? 1 : 0);
2664         if(default_data != data)
2665                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2666 }
2667
2668 static void gfx_v9_0_enable_cp_power_gating(struct amdgpu_device *adev,
2669                                         bool enable)
2670 {
2671         uint32_t data = 0;
2672         uint32_t default_data = 0;
2673
2674         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2675         data = REG_SET_FIELD(data, RLC_PG_CNTL,
2676                              CP_PG_DISABLE,
2677                              enable ? 0 : 1);
2678         if(default_data != data)
2679                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2680 }
2681
2682 static void gfx_v9_0_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
2683                                                 bool enable)
2684 {
2685         uint32_t data, default_data;
2686
2687         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2688         data = REG_SET_FIELD(data, RLC_PG_CNTL,
2689                              GFX_POWER_GATING_ENABLE,
2690                              enable ? 1 : 0);
2691         if(default_data != data)
2692                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2693 }
2694
2695 static void gfx_v9_0_enable_gfx_pipeline_powergating(struct amdgpu_device *adev,
2696                                                 bool enable)
2697 {
2698         uint32_t data, default_data;
2699
2700         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2701         data = REG_SET_FIELD(data, RLC_PG_CNTL,
2702                              GFX_PIPELINE_PG_ENABLE,
2703                              enable ? 1 : 0);
2704         if(default_data != data)
2705                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2706
2707         if (!enable)
2708                 /* read any GFX register to wake up GFX */
2709                 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmDB_RENDER_CONTROL));
2710 }
2711
2712 static void gfx_v9_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
2713                                                        bool enable)
2714 {
2715         uint32_t data, default_data;
2716
2717         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2718         data = REG_SET_FIELD(data, RLC_PG_CNTL,
2719                              STATIC_PER_CU_PG_ENABLE,
2720                              enable ? 1 : 0);
2721         if(default_data != data)
2722                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2723 }
2724
2725 static void gfx_v9_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
2726                                                 bool enable)
2727 {
2728         uint32_t data, default_data;
2729
2730         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2731         data = REG_SET_FIELD(data, RLC_PG_CNTL,
2732                              DYN_PER_CU_PG_ENABLE,
2733                              enable ? 1 : 0);
2734         if(default_data != data)
2735                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2736 }
2737
2738 static void gfx_v9_0_init_pg(struct amdgpu_device *adev)
2739 {
2740         gfx_v9_0_init_csb(adev);
2741
2742         /*
2743          * Rlc save restore list is workable since v2_1.
2744          * And it's needed by gfxoff feature.
2745          */
2746         if (adev->gfx.rlc.is_rlc_v2_1) {
2747                 if (adev->asic_type == CHIP_VEGA12 ||
2748                     (adev->asic_type == CHIP_RAVEN &&
2749                      adev->rev_id >= 8))
2750                         gfx_v9_1_init_rlc_save_restore_list(adev);
2751                 gfx_v9_0_enable_save_restore_machine(adev);
2752         }
2753
2754         if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
2755                               AMD_PG_SUPPORT_GFX_SMG |
2756                               AMD_PG_SUPPORT_GFX_DMG |
2757                               AMD_PG_SUPPORT_CP |
2758                               AMD_PG_SUPPORT_GDS |
2759                               AMD_PG_SUPPORT_RLC_SMU_HS)) {
2760                 WREG32(mmRLC_JUMP_TABLE_RESTORE,
2761                        adev->gfx.rlc.cp_table_gpu_addr >> 8);
2762                 gfx_v9_0_init_gfx_power_gating(adev);
2763         }
2764 }
2765
2766 void gfx_v9_0_rlc_stop(struct amdgpu_device *adev)
2767 {
2768         WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 0);
2769         gfx_v9_0_enable_gui_idle_interrupt(adev, false);
2770         gfx_v9_0_wait_for_rlc_serdes(adev);
2771 }
2772
2773 static void gfx_v9_0_rlc_reset(struct amdgpu_device *adev)
2774 {
2775         WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
2776         udelay(50);
2777         WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
2778         udelay(50);
2779 }
2780
2781 static void gfx_v9_0_rlc_start(struct amdgpu_device *adev)
2782 {
2783 #ifdef AMDGPU_RLC_DEBUG_RETRY
2784         u32 rlc_ucode_ver;
2785 #endif
2786
2787         WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 1);
2788         udelay(50);
2789
2790         /* carrizo do enable cp interrupt after cp inited */
2791         if (!(adev->flags & AMD_IS_APU)) {
2792                 gfx_v9_0_enable_gui_idle_interrupt(adev, true);
2793                 udelay(50);
2794         }
2795
2796 #ifdef AMDGPU_RLC_DEBUG_RETRY
2797         /* RLC_GPM_GENERAL_6 : RLC Ucode version */
2798         rlc_ucode_ver = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_6);
2799         if(rlc_ucode_ver == 0x108) {
2800                 DRM_INFO("Using rlc debug ucode. mmRLC_GPM_GENERAL_6 ==0x08%x / fw_ver == %i \n",
2801                                 rlc_ucode_ver, adev->gfx.rlc_fw_version);
2802                 /* RLC_GPM_TIMER_INT_3 : Timer interval in RefCLK cycles,
2803                  * default is 0x9C4 to create a 100us interval */
2804                 WREG32_SOC15(GC, 0, mmRLC_GPM_TIMER_INT_3, 0x9C4);
2805                 /* RLC_GPM_GENERAL_12 : Minimum gap between wptr and rptr
2806                  * to disable the page fault retry interrupts, default is
2807                  * 0x100 (256) */
2808                 WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_12, 0x100);
2809         }
2810 #endif
2811 }
2812
2813 static int gfx_v9_0_rlc_load_microcode(struct amdgpu_device *adev)
2814 {
2815         const struct rlc_firmware_header_v2_0 *hdr;
2816         const __le32 *fw_data;
2817         unsigned i, fw_size;
2818
2819         if (!adev->gfx.rlc_fw)
2820                 return -EINVAL;
2821
2822         hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
2823         amdgpu_ucode_print_rlc_hdr(&hdr->header);
2824
2825         fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
2826                            le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2827         fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
2828
2829         WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR,
2830                         RLCG_UCODE_LOADING_START_ADDRESS);
2831         for (i = 0; i < fw_size; i++)
2832                 WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
2833         WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
2834
2835         return 0;
2836 }
2837
2838 static int gfx_v9_0_rlc_resume(struct amdgpu_device *adev)
2839 {
2840         int r;
2841
2842         if (amdgpu_sriov_vf(adev)) {
2843                 gfx_v9_0_init_csb(adev);
2844                 return 0;
2845         }
2846
2847         adev->gfx.rlc.funcs->stop(adev);
2848
2849         /* disable CG */
2850         WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, 0);
2851
2852         gfx_v9_0_init_pg(adev);
2853
2854         if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
2855                 /* legacy rlc firmware loading */
2856                 r = gfx_v9_0_rlc_load_microcode(adev);
2857                 if (r)
2858                         return r;
2859         }
2860
2861         switch (adev->asic_type) {
2862         case CHIP_RAVEN:
2863                 if (amdgpu_lbpw == 0)
2864                         gfx_v9_0_enable_lbpw(adev, false);
2865                 else
2866                         gfx_v9_0_enable_lbpw(adev, true);
2867                 break;
2868         case CHIP_VEGA20:
2869                 if (amdgpu_lbpw > 0)
2870                         gfx_v9_0_enable_lbpw(adev, true);
2871                 else
2872                         gfx_v9_0_enable_lbpw(adev, false);
2873                 break;
2874         default:
2875                 break;
2876         }
2877
2878         adev->gfx.rlc.funcs->start(adev);
2879
2880         return 0;
2881 }
2882
2883 static void gfx_v9_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
2884 {
2885         int i;
2886         u32 tmp = RREG32_SOC15(GC, 0, mmCP_ME_CNTL);
2887
2888         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, enable ? 0 : 1);
2889         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, enable ? 0 : 1);
2890         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, enable ? 0 : 1);
2891         if (!enable) {
2892                 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2893                         adev->gfx.gfx_ring[i].sched.ready = false;
2894         }
2895         WREG32_SOC15_RLC(GC, 0, mmCP_ME_CNTL, tmp);
2896         udelay(50);
2897 }
2898
2899 static int gfx_v9_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
2900 {
2901         const struct gfx_firmware_header_v1_0 *pfp_hdr;
2902         const struct gfx_firmware_header_v1_0 *ce_hdr;
2903         const struct gfx_firmware_header_v1_0 *me_hdr;
2904         const __le32 *fw_data;
2905         unsigned i, fw_size;
2906
2907         if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
2908                 return -EINVAL;
2909
2910         pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
2911                 adev->gfx.pfp_fw->data;
2912         ce_hdr = (const struct gfx_firmware_header_v1_0 *)
2913                 adev->gfx.ce_fw->data;
2914         me_hdr = (const struct gfx_firmware_header_v1_0 *)
2915                 adev->gfx.me_fw->data;
2916
2917         amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
2918         amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
2919         amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
2920
2921         gfx_v9_0_cp_gfx_enable(adev, false);
2922
2923         /* PFP */
2924         fw_data = (const __le32 *)
2925                 (adev->gfx.pfp_fw->data +
2926                  le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
2927         fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
2928         WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, 0);
2929         for (i = 0; i < fw_size; i++)
2930                 WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
2931         WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
2932
2933         /* CE */
2934         fw_data = (const __le32 *)
2935                 (adev->gfx.ce_fw->data +
2936                  le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
2937         fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
2938         WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, 0);
2939         for (i = 0; i < fw_size; i++)
2940                 WREG32_SOC15(GC, 0, mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
2941         WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
2942
2943         /* ME */
2944         fw_data = (const __le32 *)
2945                 (adev->gfx.me_fw->data +
2946                  le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
2947         fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
2948         WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, 0);
2949         for (i = 0; i < fw_size; i++)
2950                 WREG32_SOC15(GC, 0, mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
2951         WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version);
2952
2953         return 0;
2954 }
2955
2956 static int gfx_v9_0_cp_gfx_start(struct amdgpu_device *adev)
2957 {
2958         struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
2959         const struct cs_section_def *sect = NULL;
2960         const struct cs_extent_def *ext = NULL;
2961         int r, i, tmp;
2962
2963         /* init the CP */
2964         WREG32_SOC15(GC, 0, mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
2965         WREG32_SOC15(GC, 0, mmCP_DEVICE_ID, 1);
2966
2967         gfx_v9_0_cp_gfx_enable(adev, true);
2968
2969         r = amdgpu_ring_alloc(ring, gfx_v9_0_get_csb_size(adev) + 4 + 3);
2970         if (r) {
2971                 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
2972                 return r;
2973         }
2974
2975         amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
2976         amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
2977
2978         amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
2979         amdgpu_ring_write(ring, 0x80000000);
2980         amdgpu_ring_write(ring, 0x80000000);
2981
2982         for (sect = gfx9_cs_data; sect->section != NULL; ++sect) {
2983                 for (ext = sect->section; ext->extent != NULL; ++ext) {
2984                         if (sect->id == SECT_CONTEXT) {
2985                                 amdgpu_ring_write(ring,
2986                                        PACKET3(PACKET3_SET_CONTEXT_REG,
2987                                                ext->reg_count));
2988                                 amdgpu_ring_write(ring,
2989                                        ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
2990                                 for (i = 0; i < ext->reg_count; i++)
2991                                         amdgpu_ring_write(ring, ext->extent[i]);
2992                         }
2993                 }
2994         }
2995
2996         amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
2997         amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
2998
2999         amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3000         amdgpu_ring_write(ring, 0);
3001
3002         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
3003         amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
3004         amdgpu_ring_write(ring, 0x8000);
3005         amdgpu_ring_write(ring, 0x8000);
3006
3007         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG,1));
3008         tmp = (PACKET3_SET_UCONFIG_REG_INDEX_TYPE |
3009                 (SOC15_REG_OFFSET(GC, 0, mmVGT_INDEX_TYPE) - PACKET3_SET_UCONFIG_REG_START));
3010         amdgpu_ring_write(ring, tmp);
3011         amdgpu_ring_write(ring, 0);
3012
3013         amdgpu_ring_commit(ring);
3014
3015         return 0;
3016 }
3017
3018 static int gfx_v9_0_cp_gfx_resume(struct amdgpu_device *adev)
3019 {
3020         struct amdgpu_ring *ring;
3021         u32 tmp;
3022         u32 rb_bufsz;
3023         u64 rb_addr, rptr_addr, wptr_gpu_addr;
3024
3025         /* Set the write pointer delay */
3026         WREG32_SOC15(GC, 0, mmCP_RB_WPTR_DELAY, 0);
3027
3028         /* set the RB to use vmid 0 */
3029         WREG32_SOC15(GC, 0, mmCP_RB_VMID, 0);
3030
3031         /* Set ring buffer size */
3032         ring = &adev->gfx.gfx_ring[0];
3033         rb_bufsz = order_base_2(ring->ring_size / 8);
3034         tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
3035         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
3036 #ifdef __BIG_ENDIAN
3037         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
3038 #endif
3039         WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp);
3040
3041         /* Initialize the ring buffer's write pointers */
3042         ring->wptr = 0;
3043         WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
3044         WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
3045
3046         /* set the wb address wether it's enabled or not */
3047         rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
3048         WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
3049         WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & CP_RB_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK);
3050
3051         wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
3052         WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr));
3053         WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr));
3054
3055         mdelay(1);
3056         WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp);
3057
3058         rb_addr = ring->gpu_addr >> 8;
3059         WREG32_SOC15(GC, 0, mmCP_RB0_BASE, rb_addr);
3060         WREG32_SOC15(GC, 0, mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
3061
3062         tmp = RREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL);
3063         if (ring->use_doorbell) {
3064                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3065                                     DOORBELL_OFFSET, ring->doorbell_index);
3066                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3067                                     DOORBELL_EN, 1);
3068         } else {
3069                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0);
3070         }
3071         WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL, tmp);
3072
3073         tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
3074                         DOORBELL_RANGE_LOWER, ring->doorbell_index);
3075         WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
3076
3077         WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_UPPER,
3078                        CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
3079
3080
3081         /* start the ring */
3082         gfx_v9_0_cp_gfx_start(adev);
3083         ring->sched.ready = true;
3084
3085         return 0;
3086 }
3087
3088 static void gfx_v9_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
3089 {
3090         int i;
3091
3092         if (enable) {
3093                 WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL, 0);
3094         } else {
3095                 WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL,
3096                         (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
3097                 for (i = 0; i < adev->gfx.num_compute_rings; i++)
3098                         adev->gfx.compute_ring[i].sched.ready = false;
3099                 adev->gfx.kiq.ring.sched.ready = false;
3100         }
3101         udelay(50);
3102 }
3103
3104 static int gfx_v9_0_cp_compute_load_microcode(struct amdgpu_device *adev)
3105 {
3106         const struct gfx_firmware_header_v1_0 *mec_hdr;
3107         const __le32 *fw_data;
3108         unsigned i;
3109         u32 tmp;
3110
3111         if (!adev->gfx.mec_fw)
3112                 return -EINVAL;
3113
3114         gfx_v9_0_cp_compute_enable(adev, false);
3115
3116         mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
3117         amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
3118
3119         fw_data = (const __le32 *)
3120                 (adev->gfx.mec_fw->data +
3121                  le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
3122         tmp = 0;
3123         tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, VMID, 0);
3124         tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0);
3125         WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_CNTL, tmp);
3126
3127         WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_LO,
3128                 adev->gfx.mec.mec_fw_gpu_addr & 0xFFFFF000);
3129         WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_HI,
3130                 upper_32_bits(adev->gfx.mec.mec_fw_gpu_addr));
3131
3132         /* MEC1 */
3133         WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR,
3134                          mec_hdr->jt_offset);
3135         for (i = 0; i < mec_hdr->jt_size; i++)
3136                 WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_DATA,
3137                         le32_to_cpup(fw_data + mec_hdr->jt_offset + i));
3138
3139         WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR,
3140                         adev->gfx.mec_fw_version);
3141         /* Todo : Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */
3142
3143         return 0;
3144 }
3145
3146 /* KIQ functions */
3147 static void gfx_v9_0_kiq_setting(struct amdgpu_ring *ring)
3148 {
3149         uint32_t tmp;
3150         struct amdgpu_device *adev = ring->adev;
3151
3152         /* tell RLC which is KIQ queue */
3153         tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS);
3154         tmp &= 0xffffff00;
3155         tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
3156         WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
3157         tmp |= 0x80;
3158         WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
3159 }
3160
3161 static int gfx_v9_0_kiq_kcq_enable(struct amdgpu_device *adev)
3162 {
3163         struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
3164         uint64_t queue_mask = 0;
3165         int r, i;
3166
3167         for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) {
3168                 if (!test_bit(i, adev->gfx.mec.queue_bitmap))
3169                         continue;
3170
3171                 /* This situation may be hit in the future if a new HW
3172                  * generation exposes more than 64 queues. If so, the
3173                  * definition of queue_mask needs updating */
3174                 if (WARN_ON(i >= (sizeof(queue_mask)*8))) {
3175                         DRM_ERROR("Invalid KCQ enabled: %d\n", i);
3176                         break;
3177                 }
3178
3179                 queue_mask |= (1ull << i);
3180         }
3181
3182         r = amdgpu_ring_alloc(kiq_ring, (7 * adev->gfx.num_compute_rings) + 8);
3183         if (r) {
3184                 DRM_ERROR("Failed to lock KIQ (%d).\n", r);
3185                 return r;
3186         }
3187
3188         /* set resources */
3189         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
3190         amdgpu_ring_write(kiq_ring, PACKET3_SET_RESOURCES_VMID_MASK(0) |
3191                           PACKET3_SET_RESOURCES_QUEUE_TYPE(0)); /* vmid_mask:0 queue_type:0 (KIQ) */
3192         amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask)); /* queue mask lo */
3193         amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask)); /* queue mask hi */
3194         amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */
3195         amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */
3196         amdgpu_ring_write(kiq_ring, 0); /* oac mask */
3197         amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */
3198         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3199                 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
3200                 uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
3201                 uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
3202
3203                 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
3204                 /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
3205                 amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
3206                                   PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */
3207                                   PACKET3_MAP_QUEUES_VMID(0) | /* VMID */
3208                                   PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
3209                                   PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
3210                                   PACKET3_MAP_QUEUES_ME((ring->me == 1 ? 0 : 1)) |
3211                                   PACKET3_MAP_QUEUES_QUEUE_TYPE(0) | /*queue_type: normal compute queue */
3212                                   PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) | /* alloc format: all_on_one_pipe */
3213                                   PACKET3_MAP_QUEUES_ENGINE_SEL(0) | /* engine_sel: compute */
3214                                   PACKET3_MAP_QUEUES_NUM_QUEUES(1)); /* num_queues: must be 1 */
3215                 amdgpu_ring_write(kiq_ring, PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index));
3216                 amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
3217                 amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
3218                 amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
3219                 amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
3220         }
3221
3222         r = amdgpu_ring_test_helper(kiq_ring);
3223         if (r)
3224                 DRM_ERROR("KCQ enable failed\n");
3225
3226         return r;
3227 }
3228
3229 static int gfx_v9_0_mqd_init(struct amdgpu_ring *ring)
3230 {
3231         struct amdgpu_device *adev = ring->adev;
3232         struct v9_mqd *mqd = ring->mqd_ptr;
3233         uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
3234         uint32_t tmp;
3235
3236         mqd->header = 0xC0310800;
3237         mqd->compute_pipelinestat_enable = 0x00000001;
3238         mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
3239         mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
3240         mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
3241         mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
3242         mqd->compute_static_thread_mgmt_se4 = 0xffffffff;
3243         mqd->compute_static_thread_mgmt_se5 = 0xffffffff;
3244         mqd->compute_static_thread_mgmt_se6 = 0xffffffff;
3245         mqd->compute_static_thread_mgmt_se7 = 0xffffffff;
3246         mqd->compute_misc_reserved = 0x00000003;
3247
3248         mqd->dynamic_cu_mask_addr_lo =
3249                 lower_32_bits(ring->mqd_gpu_addr
3250                               + offsetof(struct v9_mqd_allocation, dynamic_cu_mask));
3251         mqd->dynamic_cu_mask_addr_hi =
3252                 upper_32_bits(ring->mqd_gpu_addr
3253                               + offsetof(struct v9_mqd_allocation, dynamic_cu_mask));
3254
3255         eop_base_addr = ring->eop_gpu_addr >> 8;
3256         mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
3257         mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
3258
3259         /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
3260         tmp = RREG32_SOC15(GC, 0, mmCP_HQD_EOP_CONTROL);
3261         tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
3262                         (order_base_2(GFX9_MEC_HPD_SIZE / 4) - 1));
3263
3264         mqd->cp_hqd_eop_control = tmp;
3265
3266         /* enable doorbell? */
3267         tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
3268
3269         if (ring->use_doorbell) {
3270                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3271                                     DOORBELL_OFFSET, ring->doorbell_index);
3272                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3273                                     DOORBELL_EN, 1);
3274                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3275                                     DOORBELL_SOURCE, 0);
3276                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3277                                     DOORBELL_HIT, 0);
3278         } else {
3279                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3280                                          DOORBELL_EN, 0);
3281         }
3282
3283         mqd->cp_hqd_pq_doorbell_control = tmp;
3284
3285         /* disable the queue if it's active */
3286         ring->wptr = 0;
3287         mqd->cp_hqd_dequeue_request = 0;
3288         mqd->cp_hqd_pq_rptr = 0;
3289         mqd->cp_hqd_pq_wptr_lo = 0;
3290         mqd->cp_hqd_pq_wptr_hi = 0;
3291
3292         /* set the pointer to the MQD */
3293         mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
3294         mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
3295
3296         /* set MQD vmid to 0 */
3297         tmp = RREG32_SOC15(GC, 0, mmCP_MQD_CONTROL);
3298         tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
3299         mqd->cp_mqd_control = tmp;
3300
3301         /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3302         hqd_gpu_addr = ring->gpu_addr >> 8;
3303         mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
3304         mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
3305
3306         /* set up the HQD, this is similar to CP_RB0_CNTL */
3307         tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_CONTROL);
3308         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
3309                             (order_base_2(ring->ring_size / 4) - 1));
3310         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
3311                         ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
3312 #ifdef __BIG_ENDIAN
3313         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
3314 #endif
3315         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
3316         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
3317         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
3318         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
3319         mqd->cp_hqd_pq_control = tmp;
3320
3321         /* set the wb address whether it's enabled or not */
3322         wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
3323         mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
3324         mqd->cp_hqd_pq_rptr_report_addr_hi =
3325                 upper_32_bits(wb_gpu_addr) & 0xffff;
3326
3327         /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
3328         wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
3329         mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
3330         mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
3331
3332         tmp = 0;
3333         /* enable the doorbell if requested */
3334         if (ring->use_doorbell) {
3335                 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
3336                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3337                                 DOORBELL_OFFSET, ring->doorbell_index);
3338
3339                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3340                                          DOORBELL_EN, 1);
3341                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3342                                          DOORBELL_SOURCE, 0);
3343                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3344                                          DOORBELL_HIT, 0);
3345         }
3346
3347         mqd->cp_hqd_pq_doorbell_control = tmp;
3348
3349         /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3350         ring->wptr = 0;
3351         mqd->cp_hqd_pq_rptr = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR);
3352
3353         /* set the vmid for the queue */
3354         mqd->cp_hqd_vmid = 0;
3355
3356         tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PERSISTENT_STATE);
3357         tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
3358         mqd->cp_hqd_persistent_state = tmp;
3359
3360         /* set MIN_IB_AVAIL_SIZE */
3361         tmp = RREG32_SOC15(GC, 0, mmCP_HQD_IB_CONTROL);
3362         tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
3363         mqd->cp_hqd_ib_control = tmp;
3364
3365         /* activate the queue */
3366         mqd->cp_hqd_active = 1;
3367
3368         return 0;
3369 }
3370
3371 static int gfx_v9_0_kiq_init_register(struct amdgpu_ring *ring)
3372 {
3373         struct amdgpu_device *adev = ring->adev;
3374         struct v9_mqd *mqd = ring->mqd_ptr;
3375         int j;
3376
3377         /* disable wptr polling */
3378         WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
3379
3380         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR,
3381                mqd->cp_hqd_eop_base_addr_lo);
3382         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI,
3383                mqd->cp_hqd_eop_base_addr_hi);
3384
3385         /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
3386         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_CONTROL,
3387                mqd->cp_hqd_eop_control);
3388
3389         /* enable doorbell? */
3390         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
3391                mqd->cp_hqd_pq_doorbell_control);
3392
3393         /* disable the queue if it's active */
3394         if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) {
3395                 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
3396                 for (j = 0; j < adev->usec_timeout; j++) {
3397                         if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
3398                                 break;
3399                         udelay(1);
3400                 }
3401                 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST,
3402                        mqd->cp_hqd_dequeue_request);
3403                 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR,
3404                        mqd->cp_hqd_pq_rptr);
3405                 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO,
3406                        mqd->cp_hqd_pq_wptr_lo);
3407                 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI,
3408                        mqd->cp_hqd_pq_wptr_hi);
3409         }
3410
3411         /* set the pointer to the MQD */
3412         WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR,
3413                mqd->cp_mqd_base_addr_lo);
3414         WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR_HI,
3415                mqd->cp_mqd_base_addr_hi);
3416
3417         /* set MQD vmid to 0 */
3418         WREG32_SOC15_RLC(GC, 0, mmCP_MQD_CONTROL,
3419                mqd->cp_mqd_control);
3420
3421         /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3422         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE,
3423                mqd->cp_hqd_pq_base_lo);
3424         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE_HI,
3425                mqd->cp_hqd_pq_base_hi);
3426
3427         /* set up the HQD, this is similar to CP_RB0_CNTL */
3428         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_CONTROL,
3429                mqd->cp_hqd_pq_control);
3430
3431         /* set the wb address whether it's enabled or not */
3432         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR,
3433                                 mqd->cp_hqd_pq_rptr_report_addr_lo);
3434         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
3435                                 mqd->cp_hqd_pq_rptr_report_addr_hi);
3436
3437         /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
3438         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR,
3439                mqd->cp_hqd_pq_wptr_poll_addr_lo);
3440         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
3441                mqd->cp_hqd_pq_wptr_poll_addr_hi);
3442
3443         /* enable the doorbell if requested */
3444         if (ring->use_doorbell) {
3445                 WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_LOWER,
3446                                         (adev->doorbell_index.kiq * 2) << 2);
3447                 WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER,
3448                                         (adev->doorbell_index.userqueue_end * 2) << 2);
3449         }
3450
3451         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
3452                mqd->cp_hqd_pq_doorbell_control);
3453
3454         /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3455         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO,
3456                mqd->cp_hqd_pq_wptr_lo);
3457         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI,
3458                mqd->cp_hqd_pq_wptr_hi);
3459
3460         /* set the vmid for the queue */
3461         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_VMID, mqd->cp_hqd_vmid);
3462
3463         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE,
3464                mqd->cp_hqd_persistent_state);
3465
3466         /* activate the queue */
3467         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE,
3468                mqd->cp_hqd_active);
3469
3470         if (ring->use_doorbell)
3471                 WREG32_FIELD15(GC, 0, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
3472
3473         return 0;
3474 }
3475
3476 static int gfx_v9_0_kiq_fini_register(struct amdgpu_ring *ring)
3477 {
3478         struct amdgpu_device *adev = ring->adev;
3479         int j;
3480
3481         /* disable the queue if it's active */
3482         if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) {
3483
3484                 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
3485
3486                 for (j = 0; j < adev->usec_timeout; j++) {
3487                         if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
3488                                 break;
3489                         udelay(1);
3490                 }
3491
3492                 if (j == AMDGPU_MAX_USEC_TIMEOUT) {
3493                         DRM_DEBUG("KIQ dequeue request failed.\n");
3494
3495                         /* Manual disable if dequeue request times out */
3496                         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE, 0);
3497                 }
3498
3499                 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST,
3500                       0);
3501         }
3502
3503         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IQ_TIMER, 0);
3504         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IB_CONTROL, 0);
3505         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE, 0);
3506         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0x40000000);
3507         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0);
3508         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR, 0);
3509         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI, 0);
3510         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO, 0);
3511
3512         return 0;
3513 }
3514
3515 static int gfx_v9_0_kiq_init_queue(struct amdgpu_ring *ring)
3516 {
3517         struct amdgpu_device *adev = ring->adev;
3518         struct v9_mqd *mqd = ring->mqd_ptr;
3519         int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS;
3520
3521         gfx_v9_0_kiq_setting(ring);
3522
3523         if (adev->in_gpu_reset) { /* for GPU_RESET case */
3524                 /* reset MQD to a clean status */
3525                 if (adev->gfx.mec.mqd_backup[mqd_idx])
3526                         memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation));
3527
3528                 /* reset ring buffer */
3529                 ring->wptr = 0;
3530                 amdgpu_ring_clear_ring(ring);
3531
3532                 mutex_lock(&adev->srbm_mutex);
3533                 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3534                 gfx_v9_0_kiq_init_register(ring);
3535                 soc15_grbm_select(adev, 0, 0, 0, 0);
3536                 mutex_unlock(&adev->srbm_mutex);
3537         } else {
3538                 memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation));
3539                 ((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
3540                 ((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
3541                 mutex_lock(&adev->srbm_mutex);
3542                 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3543                 gfx_v9_0_mqd_init(ring);
3544                 gfx_v9_0_kiq_init_register(ring);
3545                 soc15_grbm_select(adev, 0, 0, 0, 0);
3546                 mutex_unlock(&adev->srbm_mutex);
3547
3548                 if (adev->gfx.mec.mqd_backup[mqd_idx])
3549                         memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation));
3550         }
3551
3552         return 0;
3553 }
3554
3555 static int gfx_v9_0_kcq_init_queue(struct amdgpu_ring *ring)
3556 {
3557         struct amdgpu_device *adev = ring->adev;
3558         struct v9_mqd *mqd = ring->mqd_ptr;
3559         int mqd_idx = ring - &adev->gfx.compute_ring[0];
3560
3561         if (!adev->in_gpu_reset && !adev->in_suspend) {
3562                 memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation));
3563                 ((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
3564                 ((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
3565                 mutex_lock(&adev->srbm_mutex);
3566                 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3567                 gfx_v9_0_mqd_init(ring);
3568                 soc15_grbm_select(adev, 0, 0, 0, 0);
3569                 mutex_unlock(&adev->srbm_mutex);
3570
3571                 if (adev->gfx.mec.mqd_backup[mqd_idx])
3572                         memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation));
3573         } else if (adev->in_gpu_reset) { /* for GPU_RESET case */
3574                 /* reset MQD to a clean status */
3575                 if (adev->gfx.mec.mqd_backup[mqd_idx])
3576                         memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation));
3577
3578                 /* reset ring buffer */
3579                 ring->wptr = 0;
3580                 amdgpu_ring_clear_ring(ring);
3581         } else {
3582                 amdgpu_ring_clear_ring(ring);
3583         }
3584
3585         return 0;
3586 }
3587
3588 static int gfx_v9_0_kiq_resume(struct amdgpu_device *adev)
3589 {
3590         struct amdgpu_ring *ring;
3591         int r;
3592
3593         ring = &adev->gfx.kiq.ring;
3594
3595         r = amdgpu_bo_reserve(ring->mqd_obj, false);
3596         if (unlikely(r != 0))
3597                 return r;
3598
3599         r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
3600         if (unlikely(r != 0))
3601                 return r;
3602
3603         gfx_v9_0_kiq_init_queue(ring);
3604         amdgpu_bo_kunmap(ring->mqd_obj);
3605         ring->mqd_ptr = NULL;
3606         amdgpu_bo_unreserve(ring->mqd_obj);
3607         ring->sched.ready = true;
3608         return 0;
3609 }
3610
3611 static int gfx_v9_0_kcq_resume(struct amdgpu_device *adev)
3612 {
3613         struct amdgpu_ring *ring = NULL;
3614         int r = 0, i;
3615
3616         gfx_v9_0_cp_compute_enable(adev, true);
3617
3618         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3619                 ring = &adev->gfx.compute_ring[i];
3620
3621                 r = amdgpu_bo_reserve(ring->mqd_obj, false);
3622                 if (unlikely(r != 0))
3623                         goto done;
3624                 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
3625                 if (!r) {
3626                         r = gfx_v9_0_kcq_init_queue(ring);
3627                         amdgpu_bo_kunmap(ring->mqd_obj);
3628                         ring->mqd_ptr = NULL;
3629                 }
3630                 amdgpu_bo_unreserve(ring->mqd_obj);
3631                 if (r)
3632                         goto done;
3633         }
3634
3635         r = gfx_v9_0_kiq_kcq_enable(adev);
3636 done:
3637         return r;
3638 }
3639
3640 static int gfx_v9_0_cp_resume(struct amdgpu_device *adev)
3641 {
3642         int r, i;
3643         struct amdgpu_ring *ring;
3644
3645         if (!(adev->flags & AMD_IS_APU))
3646                 gfx_v9_0_enable_gui_idle_interrupt(adev, false);
3647
3648         if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
3649                 if (adev->asic_type != CHIP_ARCTURUS) {
3650                         /* legacy firmware loading */
3651                         r = gfx_v9_0_cp_gfx_load_microcode(adev);
3652                         if (r)
3653                                 return r;
3654                 }
3655
3656                 r = gfx_v9_0_cp_compute_load_microcode(adev);
3657                 if (r)
3658                         return r;
3659         }
3660
3661         r = gfx_v9_0_kiq_resume(adev);
3662         if (r)
3663                 return r;
3664
3665         if (adev->asic_type != CHIP_ARCTURUS) {
3666                 r = gfx_v9_0_cp_gfx_resume(adev);
3667                 if (r)
3668                         return r;
3669         }
3670
3671         r = gfx_v9_0_kcq_resume(adev);
3672         if (r)
3673                 return r;
3674
3675         if (adev->asic_type != CHIP_ARCTURUS) {
3676                 ring = &adev->gfx.gfx_ring[0];
3677                 r = amdgpu_ring_test_helper(ring);
3678                 if (r)
3679                         return r;
3680         }
3681
3682         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3683                 ring = &adev->gfx.compute_ring[i];
3684                 amdgpu_ring_test_helper(ring);
3685         }
3686
3687         gfx_v9_0_enable_gui_idle_interrupt(adev, true);
3688
3689         return 0;
3690 }
3691
3692 static void gfx_v9_0_cp_enable(struct amdgpu_device *adev, bool enable)
3693 {
3694         if (adev->asic_type != CHIP_ARCTURUS)
3695                 gfx_v9_0_cp_gfx_enable(adev, enable);
3696         gfx_v9_0_cp_compute_enable(adev, enable);
3697 }
3698
3699 static int gfx_v9_0_hw_init(void *handle)
3700 {
3701         int r;
3702         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3703
3704         if (!amdgpu_sriov_vf(adev))
3705                 gfx_v9_0_init_golden_registers(adev);
3706
3707         gfx_v9_0_constants_init(adev);
3708
3709         r = gfx_v9_0_csb_vram_pin(adev);
3710         if (r)
3711                 return r;
3712
3713         r = adev->gfx.rlc.funcs->resume(adev);
3714         if (r)
3715                 return r;
3716
3717         r = gfx_v9_0_cp_resume(adev);
3718         if (r)
3719                 return r;
3720
3721         return r;
3722 }
3723
3724 static int gfx_v9_0_kcq_disable(struct amdgpu_device *adev)
3725 {
3726         int r, i;
3727         struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
3728
3729         r = amdgpu_ring_alloc(kiq_ring, 6 * adev->gfx.num_compute_rings);
3730         if (r)
3731                 DRM_ERROR("Failed to lock KIQ (%d).\n", r);
3732
3733         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3734                 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
3735
3736                 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
3737                 amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
3738                                                 PACKET3_UNMAP_QUEUES_ACTION(1) | /* RESET_QUEUES */
3739                                                 PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) |
3740                                                 PACKET3_UNMAP_QUEUES_ENGINE_SEL(0) |
3741                                                 PACKET3_UNMAP_QUEUES_NUM_QUEUES(1));
3742                 amdgpu_ring_write(kiq_ring, PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
3743                 amdgpu_ring_write(kiq_ring, 0);
3744                 amdgpu_ring_write(kiq_ring, 0);
3745                 amdgpu_ring_write(kiq_ring, 0);
3746         }
3747         r = amdgpu_ring_test_helper(kiq_ring);
3748         if (r)
3749                 DRM_ERROR("KCQ disable failed\n");
3750
3751         return r;
3752 }
3753
3754 static int gfx_v9_0_hw_fini(void *handle)
3755 {
3756         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3757
3758         amdgpu_irq_put(adev, &adev->gfx.cp_ecc_error_irq, 0);
3759         amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
3760         amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
3761
3762         /* DF freeze and kcq disable will fail */
3763         if (!amdgpu_ras_intr_triggered())
3764                 /* disable KCQ to avoid CPC touch memory not valid anymore */
3765                 gfx_v9_0_kcq_disable(adev);
3766
3767         if (amdgpu_sriov_vf(adev)) {
3768                 gfx_v9_0_cp_gfx_enable(adev, false);
3769                 /* must disable polling for SRIOV when hw finished, otherwise
3770                  * CPC engine may still keep fetching WB address which is already
3771                  * invalid after sw finished and trigger DMAR reading error in
3772                  * hypervisor side.
3773                  */
3774                 WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
3775                 return 0;
3776         }
3777
3778         /* Use deinitialize sequence from CAIL when unbinding device from driver,
3779          * otherwise KIQ is hanging when binding back
3780          */
3781         if (!adev->in_gpu_reset && !adev->in_suspend) {
3782                 mutex_lock(&adev->srbm_mutex);
3783                 soc15_grbm_select(adev, adev->gfx.kiq.ring.me,
3784                                 adev->gfx.kiq.ring.pipe,
3785                                 adev->gfx.kiq.ring.queue, 0);
3786                 gfx_v9_0_kiq_fini_register(&adev->gfx.kiq.ring);
3787                 soc15_grbm_select(adev, 0, 0, 0, 0);
3788                 mutex_unlock(&adev->srbm_mutex);
3789         }
3790
3791         gfx_v9_0_cp_enable(adev, false);
3792         adev->gfx.rlc.funcs->stop(adev);
3793
3794         gfx_v9_0_csb_vram_unpin(adev);
3795
3796         return 0;
3797 }
3798
3799 static int gfx_v9_0_suspend(void *handle)
3800 {
3801         return gfx_v9_0_hw_fini(handle);
3802 }
3803
3804 static int gfx_v9_0_resume(void *handle)
3805 {
3806         return gfx_v9_0_hw_init(handle);
3807 }
3808
3809 static bool gfx_v9_0_is_idle(void *handle)
3810 {
3811         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3812
3813         if (REG_GET_FIELD(RREG32_SOC15(GC, 0, mmGRBM_STATUS),
3814                                 GRBM_STATUS, GUI_ACTIVE))
3815                 return false;
3816         else
3817                 return true;
3818 }
3819
3820 static int gfx_v9_0_wait_for_idle(void *handle)
3821 {
3822         unsigned i;
3823         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3824
3825         for (i = 0; i < adev->usec_timeout; i++) {
3826                 if (gfx_v9_0_is_idle(handle))
3827                         return 0;
3828                 udelay(1);
3829         }
3830         return -ETIMEDOUT;
3831 }
3832
3833 static int gfx_v9_0_soft_reset(void *handle)
3834 {
3835         u32 grbm_soft_reset = 0;
3836         u32 tmp;
3837         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3838
3839         /* GRBM_STATUS */
3840         tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS);
3841         if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
3842                    GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
3843                    GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
3844                    GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
3845                    GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
3846                    GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK)) {
3847                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
3848                                                 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
3849                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
3850                                                 GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
3851         }
3852
3853         if (tmp & (GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
3854                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
3855                                                 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
3856         }
3857
3858         /* GRBM_STATUS2 */
3859         tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS2);
3860         if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
3861                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
3862                                                 GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
3863
3864
3865         if (grbm_soft_reset) {
3866                 /* stop the rlc */
3867                 adev->gfx.rlc.funcs->stop(adev);
3868
3869                 if (adev->asic_type != CHIP_ARCTURUS)
3870                         /* Disable GFX parsing/prefetching */
3871                         gfx_v9_0_cp_gfx_enable(adev, false);
3872
3873                 /* Disable MEC parsing/prefetching */
3874                 gfx_v9_0_cp_compute_enable(adev, false);
3875
3876                 if (grbm_soft_reset) {
3877                         tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
3878                         tmp |= grbm_soft_reset;
3879                         dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
3880                         WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
3881                         tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
3882
3883                         udelay(50);
3884
3885                         tmp &= ~grbm_soft_reset;
3886                         WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
3887                         tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
3888                 }
3889
3890                 /* Wait a little for things to settle down */
3891                 udelay(50);
3892         }
3893         return 0;
3894 }
3895
3896 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev)
3897 {
3898         uint64_t clock;
3899
3900         mutex_lock(&adev->gfx.gpu_clock_mutex);
3901         if (adev->asic_type == CHIP_VEGA10 && amdgpu_sriov_runtime(adev)) {
3902                 uint32_t tmp, lsb, msb, i = 0;
3903                 do {
3904                         if (i != 0)
3905                                 udelay(1);
3906                         tmp = RREG32_SOC15(GC, 0, mmRLC_REFCLOCK_TIMESTAMP_MSB);
3907                         lsb = RREG32_SOC15(GC, 0, mmRLC_REFCLOCK_TIMESTAMP_LSB);
3908                         msb = RREG32_SOC15(GC, 0, mmRLC_REFCLOCK_TIMESTAMP_MSB);
3909                         i++;
3910                 } while (unlikely(tmp != msb) && (i < adev->usec_timeout));
3911                 clock = (uint64_t)lsb | ((uint64_t)msb << 32ULL);
3912         } else {
3913                 WREG32_SOC15(GC, 0, mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
3914                 clock = (uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_LSB) |
3915                         ((uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
3916         }
3917         mutex_unlock(&adev->gfx.gpu_clock_mutex);
3918         return clock;
3919 }
3920
3921 static void gfx_v9_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
3922                                           uint32_t vmid,
3923                                           uint32_t gds_base, uint32_t gds_size,
3924                                           uint32_t gws_base, uint32_t gws_size,
3925                                           uint32_t oa_base, uint32_t oa_size)
3926 {
3927         struct amdgpu_device *adev = ring->adev;
3928
3929         /* GDS Base */
3930         gfx_v9_0_write_data_to_reg(ring, 0, false,
3931                                    SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_BASE) + 2 * vmid,
3932                                    gds_base);
3933
3934         /* GDS Size */
3935         gfx_v9_0_write_data_to_reg(ring, 0, false,
3936                                    SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE) + 2 * vmid,
3937                                    gds_size);
3938
3939         /* GWS */
3940         gfx_v9_0_write_data_to_reg(ring, 0, false,
3941                                    SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID0) + vmid,
3942                                    gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
3943
3944         /* OA */
3945         gfx_v9_0_write_data_to_reg(ring, 0, false,
3946                                    SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID0) + vmid,
3947                                    (1 << (oa_size + oa_base)) - (1 << oa_base));
3948 }
3949
3950 static const u32 vgpr_init_compute_shader[] =
3951 {
3952         0xb07c0000, 0xbe8000ff,
3953         0x000000f8, 0xbf110800,
3954         0x7e000280, 0x7e020280,
3955         0x7e040280, 0x7e060280,
3956         0x7e080280, 0x7e0a0280,
3957         0x7e0c0280, 0x7e0e0280,
3958         0x80808800, 0xbe803200,
3959         0xbf84fff5, 0xbf9c0000,
3960         0xd28c0001, 0x0001007f,
3961         0xd28d0001, 0x0002027e,
3962         0x10020288, 0xb8810904,
3963         0xb7814000, 0xd1196a01,
3964         0x00000301, 0xbe800087,
3965         0xbefc00c1, 0xd89c4000,
3966         0x00020201, 0xd89cc080,
3967         0x00040401, 0x320202ff,
3968         0x00000800, 0x80808100,
3969         0xbf84fff8, 0x7e020280,
3970         0xbf810000, 0x00000000,
3971 };
3972
3973 static const u32 sgpr_init_compute_shader[] =
3974 {
3975         0xb07c0000, 0xbe8000ff,
3976         0x0000005f, 0xbee50080,
3977         0xbe812c65, 0xbe822c65,
3978         0xbe832c65, 0xbe842c65,
3979         0xbe852c65, 0xb77c0005,
3980         0x80808500, 0xbf84fff8,
3981         0xbe800080, 0xbf810000,
3982 };
3983
3984 static const struct soc15_reg_entry vgpr_init_regs[] = {
3985    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff },
3986    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff },
3987    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff },
3988    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff },
3989    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x1000000 }, /* CU_GROUP_COUNT=1 */
3990    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 256*2 },
3991    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 1 },
3992    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
3993    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x100007f }, /* VGPRS=15 (256 logical VGPRs, SGPRS=1 (16 SGPRs, BULKY=1 */
3994    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x400000 },  /* 64KB LDS */
3995 };
3996
3997 static const struct soc15_reg_entry sgpr_init_regs[] = {
3998    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff },
3999    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff },
4000    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff },
4001    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff },
4002    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x1000000 }, /* CU_GROUP_COUNT=1 */
4003    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 256*2 },
4004    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 1 },
4005    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
4006    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x340 }, /* SGPRS=13 (112 GPRS) */
4007    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x0 },
4008 };
4009
4010 static const struct soc15_reg_entry sec_ded_counter_registers[] = {
4011    { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT), 0, 1, 1},
4012    { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT), 0, 1, 1},
4013    { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT), 0, 1, 1},
4014    { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT), 0, 1, 1},
4015    { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT), 0, 1, 1},
4016    { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT), 0, 1, 1},
4017    { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT), 0, 1, 1},
4018    { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT), 0, 1, 1},
4019    { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT), 0, 1, 1},
4020    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT), 0, 1, 1},
4021    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_GRBM_CNT), 0, 1, 1},
4022    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_DED), 0, 1, 1},
4023    { SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT), 0, 4, 1},
4024    { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 0, 4, 6},
4025    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_DED_CNT), 0, 4, 16},
4026    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_INFO), 0, 4, 16},
4027    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_SEC_CNT), 0, 4, 16},
4028    { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 1, 16},
4029    { SOC15_REG_ENTRY(GC, 0, mmTCP_ATC_EDC_GATCL1_CNT), 0, 4, 16},
4030    { SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT), 0, 4, 16},
4031    { SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 0, 4, 16},
4032    { SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 0, 4, 16},
4033    { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 0, 4, 6},
4034    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 0, 4, 16},
4035    { SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 0, 4, 16},
4036    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), 0, 1, 1},
4037    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 0, 1, 1},
4038    { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 1, 32},
4039    { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 1, 32},
4040    { SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT), 0, 1, 72},
4041    { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0, 1, 16},
4042    { SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT), 0, 1, 2},
4043    { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 0, 4, 6},
4044 };
4045
4046 static int gfx_v9_0_do_edc_gds_workarounds(struct amdgpu_device *adev)
4047 {
4048         struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
4049         int i, r;
4050
4051         /* only support when RAS is enabled */
4052         if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
4053                 return 0;
4054
4055         r = amdgpu_ring_alloc(ring, 7);
4056         if (r) {
4057                 DRM_ERROR("amdgpu: GDS workarounds failed to lock ring %s (%d).\n",
4058                         ring->name, r);
4059                 return r;
4060         }
4061
4062         WREG32_SOC15(GC, 0, mmGDS_VMID0_BASE, 0x00000000);
4063         WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, adev->gds.gds_size);
4064
4065         amdgpu_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
4066         amdgpu_ring_write(ring, (PACKET3_DMA_DATA_CP_SYNC |
4067                                 PACKET3_DMA_DATA_DST_SEL(1) |
4068                                 PACKET3_DMA_DATA_SRC_SEL(2) |
4069                                 PACKET3_DMA_DATA_ENGINE(0)));
4070         amdgpu_ring_write(ring, 0);
4071         amdgpu_ring_write(ring, 0);
4072         amdgpu_ring_write(ring, 0);
4073         amdgpu_ring_write(ring, 0);
4074         amdgpu_ring_write(ring, PACKET3_DMA_DATA_CMD_RAW_WAIT |
4075                                 adev->gds.gds_size);
4076
4077         amdgpu_ring_commit(ring);
4078
4079         for (i = 0; i < adev->usec_timeout; i++) {
4080                 if (ring->wptr == gfx_v9_0_ring_get_rptr_compute(ring))
4081                         break;
4082                 udelay(1);
4083         }
4084
4085         if (i >= adev->usec_timeout)
4086                 r = -ETIMEDOUT;
4087
4088         WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, 0x00000000);
4089
4090         return r;
4091 }
4092
4093 static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
4094 {
4095         struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
4096         struct amdgpu_ib ib;
4097         struct dma_fence *f = NULL;
4098         int r, i, j, k;
4099         unsigned total_size, vgpr_offset, sgpr_offset;
4100         u64 gpu_addr;
4101
4102         /* only support when RAS is enabled */
4103         if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
4104                 return 0;
4105
4106         /* bail if the compute ring is not ready */
4107         if (!ring->sched.ready)
4108                 return 0;
4109
4110         total_size =
4111                 ((ARRAY_SIZE(vgpr_init_regs) * 3) + 4 + 5 + 2) * 4;
4112         total_size +=
4113                 ((ARRAY_SIZE(sgpr_init_regs) * 3) + 4 + 5 + 2) * 4;
4114         total_size = ALIGN(total_size, 256);
4115         vgpr_offset = total_size;
4116         total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256);
4117         sgpr_offset = total_size;
4118         total_size += sizeof(sgpr_init_compute_shader);
4119
4120         /* allocate an indirect buffer to put the commands in */
4121         memset(&ib, 0, sizeof(ib));
4122         r = amdgpu_ib_get(adev, NULL, total_size, &ib);
4123         if (r) {
4124                 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
4125                 return r;
4126         }
4127
4128         /* load the compute shaders */
4129         for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++)
4130                 ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i];
4131
4132         for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
4133                 ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
4134
4135         /* init the ib length to 0 */
4136         ib.length_dw = 0;
4137
4138         /* VGPR */
4139         /* write the register state for the compute dispatch */
4140         for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i++) {
4141                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
4142                 ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(vgpr_init_regs[i])
4143                                                                 - PACKET3_SET_SH_REG_START;
4144                 ib.ptr[ib.length_dw++] = vgpr_init_regs[i].reg_value;
4145         }
4146         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
4147         gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
4148         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
4149         ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
4150                                                         - PACKET3_SET_SH_REG_START;
4151         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
4152         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
4153
4154         /* write dispatch packet */
4155         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
4156         ib.ptr[ib.length_dw++] = 128; /* x */
4157         ib.ptr[ib.length_dw++] = 1; /* y */
4158         ib.ptr[ib.length_dw++] = 1; /* z */
4159         ib.ptr[ib.length_dw++] =
4160                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
4161
4162         /* write CS partial flush packet */
4163         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
4164         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
4165
4166         /* SGPR */
4167         /* write the register state for the compute dispatch */
4168         for (i = 0; i < ARRAY_SIZE(sgpr_init_regs); i++) {
4169                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
4170                 ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(sgpr_init_regs[i])
4171                                                                 - PACKET3_SET_SH_REG_START;
4172                 ib.ptr[ib.length_dw++] = sgpr_init_regs[i].reg_value;
4173         }
4174         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
4175         gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
4176         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
4177         ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
4178                                                         - PACKET3_SET_SH_REG_START;
4179         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
4180         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
4181
4182         /* write dispatch packet */
4183         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
4184         ib.ptr[ib.length_dw++] = 128; /* x */
4185         ib.ptr[ib.length_dw++] = 1; /* y */
4186         ib.ptr[ib.length_dw++] = 1; /* z */
4187         ib.ptr[ib.length_dw++] =
4188                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
4189
4190         /* write CS partial flush packet */
4191         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
4192         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
4193
4194         /* shedule the ib on the ring */
4195         r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
4196         if (r) {
4197                 DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
4198                 goto fail;
4199         }
4200
4201         /* wait for the GPU to finish processing the IB */
4202         r = dma_fence_wait(f, false);
4203         if (r) {
4204                 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
4205                 goto fail;
4206         }
4207
4208         /* read back registers to clear the counters */
4209         mutex_lock(&adev->grbm_idx_mutex);
4210         for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++) {
4211                 for (j = 0; j < sec_ded_counter_registers[i].se_num; j++) {
4212                         for (k = 0; k < sec_ded_counter_registers[i].instance; k++) {
4213                                 gfx_v9_0_select_se_sh(adev, j, 0x0, k);
4214                                 RREG32(SOC15_REG_ENTRY_OFFSET(sec_ded_counter_registers[i]));
4215                         }
4216                 }
4217         }
4218         WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, 0xe0000000);
4219         mutex_unlock(&adev->grbm_idx_mutex);
4220
4221 fail:
4222         amdgpu_ib_free(adev, &ib, NULL);
4223         dma_fence_put(f);
4224
4225         return r;
4226 }
4227
4228 static int gfx_v9_0_early_init(void *handle)
4229 {
4230         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4231
4232         if (adev->asic_type == CHIP_ARCTURUS)
4233                 adev->gfx.num_gfx_rings = 0;
4234         else
4235                 adev->gfx.num_gfx_rings = GFX9_NUM_GFX_RINGS;
4236         adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
4237         gfx_v9_0_set_ring_funcs(adev);
4238         gfx_v9_0_set_irq_funcs(adev);
4239         gfx_v9_0_set_gds_init(adev);
4240         gfx_v9_0_set_rlc_funcs(adev);
4241
4242         return 0;
4243 }
4244
4245 static int gfx_v9_0_ecc_late_init(void *handle)
4246 {
4247         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4248         int r;
4249
4250         r = amdgpu_gfx_ras_late_init(adev);
4251         if (r)
4252                 return r;
4253
4254         r = gfx_v9_0_do_edc_gds_workarounds(adev);
4255         if (r)
4256                 return r;
4257
4258         /* requires IBs so do in late init after IB pool is initialized */
4259         r = gfx_v9_0_do_edc_gpr_workarounds(adev);
4260         if (r)
4261                 return r;
4262
4263         return 0;
4264 }
4265
4266 static int gfx_v9_0_late_init(void *handle)
4267 {
4268         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4269         int r;
4270
4271         r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
4272         if (r)
4273                 return r;
4274
4275         r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
4276         if (r)
4277                 return r;
4278
4279         r = gfx_v9_0_ecc_late_init(handle);
4280         if (r)
4281                 return r;
4282
4283         return 0;
4284 }
4285
4286 static bool gfx_v9_0_is_rlc_enabled(struct amdgpu_device *adev)
4287 {
4288         uint32_t rlc_setting;
4289
4290         /* if RLC is not enabled, do nothing */
4291         rlc_setting = RREG32_SOC15(GC, 0, mmRLC_CNTL);
4292         if (!(rlc_setting & RLC_CNTL__RLC_ENABLE_F32_MASK))
4293                 return false;
4294
4295         return true;
4296 }
4297
4298 static void gfx_v9_0_set_safe_mode(struct amdgpu_device *adev)
4299 {
4300         uint32_t data;
4301         unsigned i;
4302
4303         data = RLC_SAFE_MODE__CMD_MASK;
4304         data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
4305         WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data);
4306
4307         /* wait for RLC_SAFE_MODE */
4308         for (i = 0; i < adev->usec_timeout; i++) {
4309                 if (!REG_GET_FIELD(RREG32_SOC15(GC, 0, mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
4310                         break;
4311                 udelay(1);
4312         }
4313 }
4314
4315 static void gfx_v9_0_unset_safe_mode(struct amdgpu_device *adev)
4316 {
4317         uint32_t data;
4318
4319         data = RLC_SAFE_MODE__CMD_MASK;
4320         WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data);
4321 }
4322
4323 static void gfx_v9_0_update_gfx_cg_power_gating(struct amdgpu_device *adev,
4324                                                 bool enable)
4325 {
4326         amdgpu_gfx_rlc_enter_safe_mode(adev);
4327
4328         if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
4329                 gfx_v9_0_enable_gfx_cg_power_gating(adev, true);
4330                 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
4331                         gfx_v9_0_enable_gfx_pipeline_powergating(adev, true);
4332         } else {
4333                 gfx_v9_0_enable_gfx_cg_power_gating(adev, false);
4334                 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
4335                         gfx_v9_0_enable_gfx_pipeline_powergating(adev, false);
4336         }
4337
4338         amdgpu_gfx_rlc_exit_safe_mode(adev);
4339 }
4340
4341 static void gfx_v9_0_update_gfx_mg_power_gating(struct amdgpu_device *adev,
4342                                                 bool enable)
4343 {
4344         /* TODO: double check if we need to perform under safe mode */
4345         /* gfx_v9_0_enter_rlc_safe_mode(adev); */
4346
4347         if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
4348                 gfx_v9_0_enable_gfx_static_mg_power_gating(adev, true);
4349         else
4350                 gfx_v9_0_enable_gfx_static_mg_power_gating(adev, false);
4351
4352         if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
4353                 gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, true);
4354         else
4355                 gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, false);
4356
4357         /* gfx_v9_0_exit_rlc_safe_mode(adev); */
4358 }
4359
4360 static void gfx_v9_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
4361                                                       bool enable)
4362 {
4363         uint32_t data, def;
4364
4365         amdgpu_gfx_rlc_enter_safe_mode(adev);
4366
4367         /* It is disabled by HW by default */
4368         if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
4369                 /* 1 - RLC_CGTT_MGCG_OVERRIDE */
4370                 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4371
4372                 if (adev->asic_type != CHIP_VEGA12)
4373                         data &= ~RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK;
4374
4375                 data &= ~(RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
4376                           RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK |
4377                           RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK);
4378
4379                 /* only for Vega10 & Raven1 */
4380                 data |= RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK;
4381
4382                 if (def != data)
4383                         WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4384
4385                 /* MGLS is a global flag to control all MGLS in GFX */
4386                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
4387                         /* 2 - RLC memory Light sleep */
4388                         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
4389                                 def = data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
4390                                 data |= RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
4391                                 if (def != data)
4392                                         WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data);
4393                         }
4394                         /* 3 - CP memory Light sleep */
4395                         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
4396                                 def = data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
4397                                 data |= CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
4398                                 if (def != data)
4399                                         WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data);
4400                         }
4401                 }
4402         } else {
4403                 /* 1 - MGCG_OVERRIDE */
4404                 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4405
4406                 if (adev->asic_type != CHIP_VEGA12)
4407                         data |= RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK;
4408
4409                 data |= (RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK |
4410                          RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
4411                          RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK |
4412                          RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK);
4413
4414                 if (def != data)
4415                         WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4416
4417                 /* 2 - disable MGLS in RLC */
4418                 data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
4419                 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
4420                         data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
4421                         WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data);
4422                 }
4423
4424                 /* 3 - disable MGLS in CP */
4425                 data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
4426                 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
4427                         data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
4428                         WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data);
4429                 }
4430         }
4431
4432         amdgpu_gfx_rlc_exit_safe_mode(adev);
4433 }
4434
4435 static void gfx_v9_0_update_3d_clock_gating(struct amdgpu_device *adev,
4436                                            bool enable)
4437 {
4438         uint32_t data, def;
4439
4440         if (adev->asic_type == CHIP_ARCTURUS)
4441                 return;
4442
4443         amdgpu_gfx_rlc_enter_safe_mode(adev);
4444
4445         /* Enable 3D CGCG/CGLS */
4446         if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG)) {
4447                 /* write cmd to clear cgcg/cgls ov */
4448                 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4449                 /* unset CGCG override */
4450                 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_GFX3D_CG_OVERRIDE_MASK;
4451                 /* update CGCG and CGLS override bits */
4452                 if (def != data)
4453                         WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4454
4455                 /* enable 3Dcgcg FSM(0x0000363f) */
4456                 def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
4457
4458                 data = (0x36 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
4459                         RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK;
4460                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS)
4461                         data |= (0x000F << RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
4462                                 RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK;
4463                 if (def != data)
4464                         WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data);
4465
4466                 /* set IDLE_POLL_COUNT(0x00900100) */
4467                 def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL);
4468                 data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
4469                         (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
4470                 if (def != data)
4471                         WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data);
4472         } else {
4473                 /* Disable CGCG/CGLS */
4474                 def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
4475                 /* disable cgcg, cgls should be disabled */
4476                 data &= ~(RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK |
4477                           RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK);
4478                 /* disable cgcg and cgls in FSM */
4479                 if (def != data)
4480                         WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data);
4481         }
4482
4483         amdgpu_gfx_rlc_exit_safe_mode(adev);
4484 }
4485
4486 static void gfx_v9_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
4487                                                       bool enable)
4488 {
4489         uint32_t def, data;
4490
4491         amdgpu_gfx_rlc_enter_safe_mode(adev);
4492
4493         if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
4494                 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4495                 /* unset CGCG override */
4496                 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGCG_OVERRIDE_MASK;
4497                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
4498                         data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
4499                 else
4500                         data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
4501                 /* update CGCG and CGLS override bits */
4502                 if (def != data)
4503                         WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4504
4505                 /* enable cgcg FSM(0x0000363F) */
4506                 def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
4507
4508                 if (adev->asic_type == CHIP_ARCTURUS)
4509                         data = (0x2000 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
4510                                 RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
4511                 else
4512                         data = (0x36 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
4513                                 RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
4514                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
4515                         data |= (0x000F << RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
4516                                 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
4517                 if (def != data)
4518                         WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data);
4519
4520                 /* set IDLE_POLL_COUNT(0x00900100) */
4521                 def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL);
4522                 data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
4523                         (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
4524                 if (def != data)
4525                         WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data);
4526         } else {
4527                 def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
4528                 /* reset CGCG/CGLS bits */
4529                 data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
4530                 /* disable cgcg and cgls in FSM */
4531                 if (def != data)
4532                         WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data);
4533         }
4534
4535         amdgpu_gfx_rlc_exit_safe_mode(adev);
4536 }
4537
4538 static int gfx_v9_0_update_gfx_clock_gating(struct amdgpu_device *adev,
4539                                             bool enable)
4540 {
4541         if (enable) {
4542                 /* CGCG/CGLS should be enabled after MGCG/MGLS
4543                  * ===  MGCG + MGLS ===
4544                  */
4545                 gfx_v9_0_update_medium_grain_clock_gating(adev, enable);
4546                 /* ===  CGCG /CGLS for GFX 3D Only === */
4547                 gfx_v9_0_update_3d_clock_gating(adev, enable);
4548                 /* ===  CGCG + CGLS === */
4549                 gfx_v9_0_update_coarse_grain_clock_gating(adev, enable);
4550         } else {
4551                 /* CGCG/CGLS should be disabled before MGCG/MGLS
4552                  * ===  CGCG + CGLS ===
4553                  */
4554                 gfx_v9_0_update_coarse_grain_clock_gating(adev, enable);
4555                 /* ===  CGCG /CGLS for GFX 3D Only === */
4556                 gfx_v9_0_update_3d_clock_gating(adev, enable);
4557                 /* ===  MGCG + MGLS === */
4558                 gfx_v9_0_update_medium_grain_clock_gating(adev, enable);
4559         }
4560         return 0;
4561 }
4562
4563 static const struct amdgpu_rlc_funcs gfx_v9_0_rlc_funcs = {
4564         .is_rlc_enabled = gfx_v9_0_is_rlc_enabled,
4565         .set_safe_mode = gfx_v9_0_set_safe_mode,
4566         .unset_safe_mode = gfx_v9_0_unset_safe_mode,
4567         .init = gfx_v9_0_rlc_init,
4568         .get_csb_size = gfx_v9_0_get_csb_size,
4569         .get_csb_buffer = gfx_v9_0_get_csb_buffer,
4570         .get_cp_table_num = gfx_v9_0_cp_jump_table_num,
4571         .resume = gfx_v9_0_rlc_resume,
4572         .stop = gfx_v9_0_rlc_stop,
4573         .reset = gfx_v9_0_rlc_reset,
4574         .start = gfx_v9_0_rlc_start
4575 };
4576
4577 static int gfx_v9_0_set_powergating_state(void *handle,
4578                                           enum amd_powergating_state state)
4579 {
4580         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4581         bool enable = (state == AMD_PG_STATE_GATE) ? true : false;
4582
4583         switch (adev->asic_type) {
4584         case CHIP_RAVEN:
4585         case CHIP_RENOIR:
4586                 if (!enable) {
4587                         amdgpu_gfx_off_ctrl(adev, false);
4588                         cancel_delayed_work_sync(&adev->gfx.gfx_off_delay_work);
4589                 }
4590                 if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
4591                         gfx_v9_0_enable_sck_slow_down_on_power_up(adev, true);
4592                         gfx_v9_0_enable_sck_slow_down_on_power_down(adev, true);
4593                 } else {
4594                         gfx_v9_0_enable_sck_slow_down_on_power_up(adev, false);
4595                         gfx_v9_0_enable_sck_slow_down_on_power_down(adev, false);
4596                 }
4597
4598                 if (adev->pg_flags & AMD_PG_SUPPORT_CP)
4599                         gfx_v9_0_enable_cp_power_gating(adev, true);
4600                 else
4601                         gfx_v9_0_enable_cp_power_gating(adev, false);
4602
4603                 /* update gfx cgpg state */
4604                 gfx_v9_0_update_gfx_cg_power_gating(adev, enable);
4605
4606                 /* update mgcg state */
4607                 gfx_v9_0_update_gfx_mg_power_gating(adev, enable);
4608
4609                 if (enable)
4610                         amdgpu_gfx_off_ctrl(adev, true);
4611                 break;
4612         case CHIP_VEGA12:
4613                 if (!enable) {
4614                         amdgpu_gfx_off_ctrl(adev, false);
4615                         cancel_delayed_work_sync(&adev->gfx.gfx_off_delay_work);
4616                 } else {
4617                         amdgpu_gfx_off_ctrl(adev, true);
4618                 }
4619                 break;
4620         default:
4621                 break;
4622         }
4623
4624         return 0;
4625 }
4626
4627 static int gfx_v9_0_set_clockgating_state(void *handle,
4628                                           enum amd_clockgating_state state)
4629 {
4630         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4631
4632         if (amdgpu_sriov_vf(adev))
4633                 return 0;
4634
4635         switch (adev->asic_type) {
4636         case CHIP_VEGA10:
4637         case CHIP_VEGA12:
4638         case CHIP_VEGA20:
4639         case CHIP_RAVEN:
4640         case CHIP_ARCTURUS:
4641         case CHIP_RENOIR:
4642                 gfx_v9_0_update_gfx_clock_gating(adev,
4643                                                  state == AMD_CG_STATE_GATE ? true : false);
4644                 break;
4645         default:
4646                 break;
4647         }
4648         return 0;
4649 }
4650
4651 static void gfx_v9_0_get_clockgating_state(void *handle, u32 *flags)
4652 {
4653         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4654         int data;
4655
4656         if (amdgpu_sriov_vf(adev))
4657                 *flags = 0;
4658
4659         /* AMD_CG_SUPPORT_GFX_MGCG */
4660         data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4661         if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK))
4662                 *flags |= AMD_CG_SUPPORT_GFX_MGCG;
4663
4664         /* AMD_CG_SUPPORT_GFX_CGCG */
4665         data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
4666         if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
4667                 *flags |= AMD_CG_SUPPORT_GFX_CGCG;
4668
4669         /* AMD_CG_SUPPORT_GFX_CGLS */
4670         if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
4671                 *flags |= AMD_CG_SUPPORT_GFX_CGLS;
4672
4673         /* AMD_CG_SUPPORT_GFX_RLC_LS */
4674         data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
4675         if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK)
4676                 *flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS;
4677
4678         /* AMD_CG_SUPPORT_GFX_CP_LS */
4679         data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
4680         if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK)
4681                 *flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS;
4682
4683         if (adev->asic_type != CHIP_ARCTURUS) {
4684                 /* AMD_CG_SUPPORT_GFX_3D_CGCG */
4685                 data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
4686                 if (data & RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK)
4687                         *flags |= AMD_CG_SUPPORT_GFX_3D_CGCG;
4688
4689                 /* AMD_CG_SUPPORT_GFX_3D_CGLS */
4690                 if (data & RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK)
4691                         *flags |= AMD_CG_SUPPORT_GFX_3D_CGLS;
4692         }
4693 }
4694
4695 static u64 gfx_v9_0_ring_get_rptr_gfx(struct amdgpu_ring *ring)
4696 {
4697         return ring->adev->wb.wb[ring->rptr_offs]; /* gfx9 is 32bit rptr*/
4698 }
4699
4700 static u64 gfx_v9_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
4701 {
4702         struct amdgpu_device *adev = ring->adev;
4703         u64 wptr;
4704
4705         /* XXX check if swapping is necessary on BE */
4706         if (ring->use_doorbell) {
4707                 wptr = atomic64_read((atomic64_t *)&adev->wb.wb[ring->wptr_offs]);
4708         } else {
4709                 wptr = RREG32_SOC15(GC, 0, mmCP_RB0_WPTR);
4710                 wptr += (u64)RREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI) << 32;
4711         }
4712
4713         return wptr;
4714 }
4715
4716 static void gfx_v9_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
4717 {
4718         struct amdgpu_device *adev = ring->adev;
4719
4720         if (ring->use_doorbell) {
4721                 /* XXX check if swapping is necessary on BE */
4722                 atomic64_set((atomic64_t*)&adev->wb.wb[ring->wptr_offs], ring->wptr);
4723                 WDOORBELL64(ring->doorbell_index, ring->wptr);
4724         } else {
4725                 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
4726                 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
4727         }
4728 }
4729
4730 static void gfx_v9_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
4731 {
4732         struct amdgpu_device *adev = ring->adev;
4733         u32 ref_and_mask, reg_mem_engine;
4734         const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio.hdp_flush_reg;
4735
4736         if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
4737                 switch (ring->me) {
4738                 case 1:
4739                         ref_and_mask = nbio_hf_reg->ref_and_mask_cp2 << ring->pipe;
4740                         break;
4741                 case 2:
4742                         ref_and_mask = nbio_hf_reg->ref_and_mask_cp6 << ring->pipe;
4743                         break;
4744                 default:
4745                         return;
4746                 }
4747                 reg_mem_engine = 0;
4748         } else {
4749                 ref_and_mask = nbio_hf_reg->ref_and_mask_cp0;
4750                 reg_mem_engine = 1; /* pfp */
4751         }
4752
4753         gfx_v9_0_wait_reg_mem(ring, reg_mem_engine, 0, 1,
4754                               adev->nbio.funcs->get_hdp_flush_req_offset(adev),
4755                               adev->nbio.funcs->get_hdp_flush_done_offset(adev),
4756                               ref_and_mask, ref_and_mask, 0x20);
4757 }
4758
4759 static void gfx_v9_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
4760                                         struct amdgpu_job *job,
4761                                         struct amdgpu_ib *ib,
4762                                         uint32_t flags)
4763 {
4764         unsigned vmid = AMDGPU_JOB_GET_VMID(job);
4765         u32 header, control = 0;
4766
4767         if (ib->flags & AMDGPU_IB_FLAG_CE)
4768                 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
4769         else
4770                 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
4771
4772         control |= ib->length_dw | (vmid << 24);
4773
4774         if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) {
4775                 control |= INDIRECT_BUFFER_PRE_ENB(1);
4776
4777                 if (!(ib->flags & AMDGPU_IB_FLAG_CE))
4778                         gfx_v9_0_ring_emit_de_meta(ring);
4779         }
4780
4781         amdgpu_ring_write(ring, header);
4782         BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
4783         amdgpu_ring_write(ring,
4784 #ifdef __BIG_ENDIAN
4785                 (2 << 0) |
4786 #endif
4787                 lower_32_bits(ib->gpu_addr));
4788         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
4789         amdgpu_ring_write(ring, control);
4790 }
4791
4792 static void gfx_v9_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
4793                                           struct amdgpu_job *job,
4794                                           struct amdgpu_ib *ib,
4795                                           uint32_t flags)
4796 {
4797         unsigned vmid = AMDGPU_JOB_GET_VMID(job);
4798         u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
4799
4800         /* Currently, there is a high possibility to get wave ID mismatch
4801          * between ME and GDS, leading to a hw deadlock, because ME generates
4802          * different wave IDs than the GDS expects. This situation happens
4803          * randomly when at least 5 compute pipes use GDS ordered append.
4804          * The wave IDs generated by ME are also wrong after suspend/resume.
4805          * Those are probably bugs somewhere else in the kernel driver.
4806          *
4807          * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and
4808          * GDS to 0 for this ring (me/pipe).
4809          */
4810         if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) {
4811                 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
4812                 amdgpu_ring_write(ring, mmGDS_COMPUTE_MAX_WAVE_ID);
4813                 amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id);
4814         }
4815
4816         amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
4817         BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
4818         amdgpu_ring_write(ring,
4819 #ifdef __BIG_ENDIAN
4820                                 (2 << 0) |
4821 #endif
4822                                 lower_32_bits(ib->gpu_addr));
4823         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
4824         amdgpu_ring_write(ring, control);
4825 }
4826
4827 static void gfx_v9_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
4828                                      u64 seq, unsigned flags)
4829 {
4830         bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
4831         bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
4832         bool writeback = flags & AMDGPU_FENCE_FLAG_TC_WB_ONLY;
4833
4834         /* RELEASE_MEM - flush caches, send int */
4835         amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6));
4836         amdgpu_ring_write(ring, ((writeback ? (EOP_TC_WB_ACTION_EN |
4837                                                EOP_TC_NC_ACTION_EN) :
4838                                               (EOP_TCL1_ACTION_EN |
4839                                                EOP_TC_ACTION_EN |
4840                                                EOP_TC_WB_ACTION_EN |
4841                                                EOP_TC_MD_ACTION_EN)) |
4842                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
4843                                  EVENT_INDEX(5)));
4844         amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
4845
4846         /*
4847          * the address should be Qword aligned if 64bit write, Dword
4848          * aligned if only send 32bit data low (discard data high)
4849          */
4850         if (write64bit)
4851                 BUG_ON(addr & 0x7);
4852         else
4853                 BUG_ON(addr & 0x3);
4854         amdgpu_ring_write(ring, lower_32_bits(addr));
4855         amdgpu_ring_write(ring, upper_32_bits(addr));
4856         amdgpu_ring_write(ring, lower_32_bits(seq));
4857         amdgpu_ring_write(ring, upper_32_bits(seq));
4858         amdgpu_ring_write(ring, 0);
4859 }
4860
4861 static void gfx_v9_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
4862 {
4863         int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
4864         uint32_t seq = ring->fence_drv.sync_seq;
4865         uint64_t addr = ring->fence_drv.gpu_addr;
4866
4867         gfx_v9_0_wait_reg_mem(ring, usepfp, 1, 0,
4868                               lower_32_bits(addr), upper_32_bits(addr),
4869                               seq, 0xffffffff, 4);
4870 }
4871
4872 static void gfx_v9_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
4873                                         unsigned vmid, uint64_t pd_addr)
4874 {
4875         amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
4876
4877         /* compute doesn't have PFP */
4878         if (ring->funcs->type == AMDGPU_RING_TYPE_GFX) {
4879                 /* sync PFP to ME, otherwise we might get invalid PFP reads */
4880                 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
4881                 amdgpu_ring_write(ring, 0x0);
4882         }
4883 }
4884
4885 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
4886 {
4887         return ring->adev->wb.wb[ring->rptr_offs]; /* gfx9 hardware is 32bit rptr */
4888 }
4889
4890 static u64 gfx_v9_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
4891 {
4892         u64 wptr;
4893
4894         /* XXX check if swapping is necessary on BE */
4895         if (ring->use_doorbell)
4896                 wptr = atomic64_read((atomic64_t *)&ring->adev->wb.wb[ring->wptr_offs]);
4897         else
4898                 BUG();
4899         return wptr;
4900 }
4901
4902 static void gfx_v9_0_ring_set_pipe_percent(struct amdgpu_ring *ring,
4903                                            bool acquire)
4904 {
4905         struct amdgpu_device *adev = ring->adev;
4906         int pipe_num, tmp, reg;
4907         int pipe_percent = acquire ? SPI_WCL_PIPE_PERCENT_GFX__VALUE_MASK : 0x1;
4908
4909         pipe_num = ring->me * adev->gfx.mec.num_pipe_per_mec + ring->pipe;
4910
4911         /* first me only has 2 entries, GFX and HP3D */
4912         if (ring->me > 0)
4913                 pipe_num -= 2;
4914
4915         reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_GFX) + pipe_num;
4916         tmp = RREG32(reg);
4917         tmp = REG_SET_FIELD(tmp, SPI_WCL_PIPE_PERCENT_GFX, VALUE, pipe_percent);
4918         WREG32(reg, tmp);
4919 }
4920
4921 static void gfx_v9_0_pipe_reserve_resources(struct amdgpu_device *adev,
4922                                             struct amdgpu_ring *ring,
4923                                             bool acquire)
4924 {
4925         int i, pipe;
4926         bool reserve;
4927         struct amdgpu_ring *iring;
4928
4929         mutex_lock(&adev->gfx.pipe_reserve_mutex);
4930         pipe = amdgpu_gfx_mec_queue_to_bit(adev, ring->me, ring->pipe, 0);
4931         if (acquire)
4932                 set_bit(pipe, adev->gfx.pipe_reserve_bitmap);
4933         else
4934                 clear_bit(pipe, adev->gfx.pipe_reserve_bitmap);
4935
4936         if (!bitmap_weight(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES)) {
4937                 /* Clear all reservations - everyone reacquires all resources */
4938                 for (i = 0; i < adev->gfx.num_gfx_rings; ++i)
4939                         gfx_v9_0_ring_set_pipe_percent(&adev->gfx.gfx_ring[i],
4940                                                        true);
4941
4942                 for (i = 0; i < adev->gfx.num_compute_rings; ++i)
4943                         gfx_v9_0_ring_set_pipe_percent(&adev->gfx.compute_ring[i],
4944                                                        true);
4945         } else {
4946                 /* Lower all pipes without a current reservation */
4947                 for (i = 0; i < adev->gfx.num_gfx_rings; ++i) {
4948                         iring = &adev->gfx.gfx_ring[i];
4949                         pipe = amdgpu_gfx_mec_queue_to_bit(adev,
4950                                                            iring->me,
4951                                                            iring->pipe,
4952                                                            0);
4953                         reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap);
4954                         gfx_v9_0_ring_set_pipe_percent(iring, reserve);
4955                 }
4956
4957                 for (i = 0; i < adev->gfx.num_compute_rings; ++i) {
4958                         iring = &adev->gfx.compute_ring[i];
4959                         pipe = amdgpu_gfx_mec_queue_to_bit(adev,
4960                                                            iring->me,
4961                                                            iring->pipe,
4962                                                            0);
4963                         reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap);
4964                         gfx_v9_0_ring_set_pipe_percent(iring, reserve);
4965                 }
4966         }
4967
4968         mutex_unlock(&adev->gfx.pipe_reserve_mutex);
4969 }
4970
4971 static void gfx_v9_0_hqd_set_priority(struct amdgpu_device *adev,
4972                                       struct amdgpu_ring *ring,
4973                                       bool acquire)
4974 {
4975         uint32_t pipe_priority = acquire ? 0x2 : 0x0;
4976         uint32_t queue_priority = acquire ? 0xf : 0x0;
4977
4978         mutex_lock(&adev->srbm_mutex);
4979         soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4980
4981         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PIPE_PRIORITY, pipe_priority);
4982         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_QUEUE_PRIORITY, queue_priority);
4983
4984         soc15_grbm_select(adev, 0, 0, 0, 0);
4985         mutex_unlock(&adev->srbm_mutex);
4986 }
4987
4988 static void gfx_v9_0_ring_set_priority_compute(struct amdgpu_ring *ring,
4989                                                enum drm_sched_priority priority)
4990 {
4991         struct amdgpu_device *adev = ring->adev;
4992         bool acquire = priority == DRM_SCHED_PRIORITY_HIGH_HW;
4993
4994         if (ring->funcs->type != AMDGPU_RING_TYPE_COMPUTE)
4995                 return;
4996
4997         gfx_v9_0_hqd_set_priority(adev, ring, acquire);
4998         gfx_v9_0_pipe_reserve_resources(adev, ring, acquire);
4999 }
5000
5001 static void gfx_v9_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
5002 {
5003         struct amdgpu_device *adev = ring->adev;
5004
5005         /* XXX check if swapping is necessary on BE */
5006         if (ring->use_doorbell) {
5007                 atomic64_set((atomic64_t*)&adev->wb.wb[ring->wptr_offs], ring->wptr);
5008                 WDOORBELL64(ring->doorbell_index, ring->wptr);
5009         } else{
5010                 BUG(); /* only DOORBELL method supported on gfx9 now */
5011         }
5012 }
5013
5014 static void gfx_v9_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
5015                                          u64 seq, unsigned int flags)
5016 {
5017         struct amdgpu_device *adev = ring->adev;
5018
5019         /* we only allocate 32bit for each seq wb address */
5020         BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
5021
5022         /* write fence seq to the "addr" */
5023         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5024         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5025                                  WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
5026         amdgpu_ring_write(ring, lower_32_bits(addr));
5027         amdgpu_ring_write(ring, upper_32_bits(addr));
5028         amdgpu_ring_write(ring, lower_32_bits(seq));
5029
5030         if (flags & AMDGPU_FENCE_FLAG_INT) {
5031                 /* set register to trigger INT */
5032                 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5033                 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5034                                          WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
5035                 amdgpu_ring_write(ring, SOC15_REG_OFFSET(GC, 0, mmCPC_INT_STATUS));
5036                 amdgpu_ring_write(ring, 0);
5037                 amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
5038         }
5039 }
5040
5041 static void gfx_v9_ring_emit_sb(struct amdgpu_ring *ring)
5042 {
5043         amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
5044         amdgpu_ring_write(ring, 0);
5045 }
5046
5047 static void gfx_v9_0_ring_emit_ce_meta(struct amdgpu_ring *ring)
5048 {
5049         struct v9_ce_ib_state ce_payload = {0};
5050         uint64_t csa_addr;
5051         int cnt;
5052
5053         cnt = (sizeof(ce_payload) >> 2) + 4 - 2;
5054         csa_addr = amdgpu_csa_vaddr(ring->adev);
5055
5056         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
5057         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
5058                                  WRITE_DATA_DST_SEL(8) |
5059                                  WR_CONFIRM) |
5060                                  WRITE_DATA_CACHE_POLICY(0));
5061         amdgpu_ring_write(ring, lower_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, ce_payload)));
5062         amdgpu_ring_write(ring, upper_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, ce_payload)));
5063         amdgpu_ring_write_multiple(ring, (void *)&ce_payload, sizeof(ce_payload) >> 2);
5064 }
5065
5066 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring)
5067 {
5068         struct v9_de_ib_state de_payload = {0};
5069         uint64_t csa_addr, gds_addr;
5070         int cnt;
5071
5072         csa_addr = amdgpu_csa_vaddr(ring->adev);
5073         gds_addr = csa_addr + 4096;
5074         de_payload.gds_backup_addrlo = lower_32_bits(gds_addr);
5075         de_payload.gds_backup_addrhi = upper_32_bits(gds_addr);
5076
5077         cnt = (sizeof(de_payload) >> 2) + 4 - 2;
5078         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
5079         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
5080                                  WRITE_DATA_DST_SEL(8) |
5081                                  WR_CONFIRM) |
5082                                  WRITE_DATA_CACHE_POLICY(0));
5083         amdgpu_ring_write(ring, lower_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, de_payload)));
5084         amdgpu_ring_write(ring, upper_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, de_payload)));
5085         amdgpu_ring_write_multiple(ring, (void *)&de_payload, sizeof(de_payload) >> 2);
5086 }
5087
5088 static void gfx_v9_0_ring_emit_tmz(struct amdgpu_ring *ring, bool start)
5089 {
5090         amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0));
5091         amdgpu_ring_write(ring, FRAME_CMD(start ? 0 : 1)); /* frame_end */
5092 }
5093
5094 static void gfx_v9_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
5095 {
5096         uint32_t dw2 = 0;
5097
5098         if (amdgpu_sriov_vf(ring->adev))
5099                 gfx_v9_0_ring_emit_ce_meta(ring);
5100
5101         gfx_v9_0_ring_emit_tmz(ring, true);
5102
5103         dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
5104         if (flags & AMDGPU_HAVE_CTX_SWITCH) {
5105                 /* set load_global_config & load_global_uconfig */
5106                 dw2 |= 0x8001;
5107                 /* set load_cs_sh_regs */
5108                 dw2 |= 0x01000000;
5109                 /* set load_per_context_state & load_gfx_sh_regs for GFX */
5110                 dw2 |= 0x10002;
5111
5112                 /* set load_ce_ram if preamble presented */
5113                 if (AMDGPU_PREAMBLE_IB_PRESENT & flags)
5114                         dw2 |= 0x10000000;
5115         } else {
5116                 /* still load_ce_ram if this is the first time preamble presented
5117                  * although there is no context switch happens.
5118                  */
5119                 if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags)
5120                         dw2 |= 0x10000000;
5121         }
5122
5123         amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
5124         amdgpu_ring_write(ring, dw2);
5125         amdgpu_ring_write(ring, 0);
5126 }
5127
5128 static unsigned gfx_v9_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring)
5129 {
5130         unsigned ret;
5131         amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
5132         amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
5133         amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
5134         amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */
5135         ret = ring->wptr & ring->buf_mask;
5136         amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */
5137         return ret;
5138 }
5139
5140 static void gfx_v9_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset)
5141 {
5142         unsigned cur;
5143         BUG_ON(offset > ring->buf_mask);
5144         BUG_ON(ring->ring[offset] != 0x55aa55aa);
5145
5146         cur = (ring->wptr & ring->buf_mask) - 1;
5147         if (likely(cur > offset))
5148                 ring->ring[offset] = cur - offset;
5149         else
5150                 ring->ring[offset] = (ring->ring_size>>2) - offset + cur;
5151 }
5152
5153 static void gfx_v9_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg)
5154 {
5155         struct amdgpu_device *adev = ring->adev;
5156
5157         amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
5158         amdgpu_ring_write(ring, 0 |     /* src: register*/
5159                                 (5 << 8) |      /* dst: memory */
5160                                 (1 << 20));     /* write confirm */
5161         amdgpu_ring_write(ring, reg);
5162         amdgpu_ring_write(ring, 0);
5163         amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
5164                                 adev->virt.reg_val_offs * 4));
5165         amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
5166                                 adev->virt.reg_val_offs * 4));
5167 }
5168
5169 static void gfx_v9_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
5170                                     uint32_t val)
5171 {
5172         uint32_t cmd = 0;
5173
5174         switch (ring->funcs->type) {
5175         case AMDGPU_RING_TYPE_GFX:
5176                 cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM;
5177                 break;
5178         case AMDGPU_RING_TYPE_KIQ:
5179                 cmd = (1 << 16); /* no inc addr */
5180                 break;
5181         default:
5182                 cmd = WR_CONFIRM;
5183                 break;
5184         }
5185         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5186         amdgpu_ring_write(ring, cmd);
5187         amdgpu_ring_write(ring, reg);
5188         amdgpu_ring_write(ring, 0);
5189         amdgpu_ring_write(ring, val);
5190 }
5191
5192 static void gfx_v9_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
5193                                         uint32_t val, uint32_t mask)
5194 {
5195         gfx_v9_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20);
5196 }
5197
5198 static void gfx_v9_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
5199                                                   uint32_t reg0, uint32_t reg1,
5200                                                   uint32_t ref, uint32_t mask)
5201 {
5202         int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
5203         struct amdgpu_device *adev = ring->adev;
5204         bool fw_version_ok = (ring->funcs->type == AMDGPU_RING_TYPE_GFX) ?
5205                 adev->gfx.me_fw_write_wait : adev->gfx.mec_fw_write_wait;
5206
5207         if (fw_version_ok)
5208                 gfx_v9_0_wait_reg_mem(ring, usepfp, 0, 1, reg0, reg1,
5209                                       ref, mask, 0x20);
5210         else
5211                 amdgpu_ring_emit_reg_write_reg_wait_helper(ring, reg0, reg1,
5212                                                            ref, mask);
5213 }
5214
5215 static void gfx_v9_0_ring_soft_recovery(struct amdgpu_ring *ring, unsigned vmid)
5216 {
5217         struct amdgpu_device *adev = ring->adev;
5218         uint32_t value = 0;
5219
5220         value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03);
5221         value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01);
5222         value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1);
5223         value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid);
5224         WREG32_SOC15(GC, 0, mmSQ_CMD, value);
5225 }
5226
5227 static void gfx_v9_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
5228                                                  enum amdgpu_interrupt_state state)
5229 {
5230         switch (state) {
5231         case AMDGPU_IRQ_STATE_DISABLE:
5232         case AMDGPU_IRQ_STATE_ENABLE:
5233                 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5234                                TIME_STAMP_INT_ENABLE,
5235                                state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
5236                 break;
5237         default:
5238                 break;
5239         }
5240 }
5241
5242 static void gfx_v9_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
5243                                                      int me, int pipe,
5244                                                      enum amdgpu_interrupt_state state)
5245 {
5246         u32 mec_int_cntl, mec_int_cntl_reg;
5247
5248         /*
5249          * amdgpu controls only the first MEC. That's why this function only
5250          * handles the setting of interrupts for this specific MEC. All other
5251          * pipes' interrupts are set by amdkfd.
5252          */
5253
5254         if (me == 1) {
5255                 switch (pipe) {
5256                 case 0:
5257                         mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE0_INT_CNTL);
5258                         break;
5259                 case 1:
5260                         mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE1_INT_CNTL);
5261                         break;
5262                 case 2:
5263                         mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE2_INT_CNTL);
5264                         break;
5265                 case 3:
5266                         mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE3_INT_CNTL);
5267                         break;
5268                 default:
5269                         DRM_DEBUG("invalid pipe %d\n", pipe);
5270                         return;
5271                 }
5272         } else {
5273                 DRM_DEBUG("invalid me %d\n", me);
5274                 return;
5275         }
5276
5277         switch (state) {
5278         case AMDGPU_IRQ_STATE_DISABLE:
5279                 mec_int_cntl = RREG32(mec_int_cntl_reg);
5280                 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
5281                                              TIME_STAMP_INT_ENABLE, 0);
5282                 WREG32(mec_int_cntl_reg, mec_int_cntl);
5283                 break;
5284         case AMDGPU_IRQ_STATE_ENABLE:
5285                 mec_int_cntl = RREG32(mec_int_cntl_reg);
5286                 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
5287                                              TIME_STAMP_INT_ENABLE, 1);
5288                 WREG32(mec_int_cntl_reg, mec_int_cntl);
5289                 break;
5290         default:
5291                 break;
5292         }
5293 }
5294
5295 static int gfx_v9_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
5296                                              struct amdgpu_irq_src *source,
5297                                              unsigned type,
5298                                              enum amdgpu_interrupt_state state)
5299 {
5300         switch (state) {
5301         case AMDGPU_IRQ_STATE_DISABLE:
5302         case AMDGPU_IRQ_STATE_ENABLE:
5303                 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5304                                PRIV_REG_INT_ENABLE,
5305                                state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
5306                 break;
5307         default:
5308                 break;
5309         }
5310
5311         return 0;
5312 }
5313
5314 static int gfx_v9_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
5315                                               struct amdgpu_irq_src *source,
5316                                               unsigned type,
5317                                               enum amdgpu_interrupt_state state)
5318 {
5319         switch (state) {
5320         case AMDGPU_IRQ_STATE_DISABLE:
5321         case AMDGPU_IRQ_STATE_ENABLE:
5322                 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5323                                PRIV_INSTR_INT_ENABLE,
5324                                state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
5325         default:
5326                 break;
5327         }
5328
5329         return 0;
5330 }
5331
5332 #define ENABLE_ECC_ON_ME_PIPE(me, pipe)                         \
5333         WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\
5334                         CP_ECC_ERROR_INT_ENABLE, 1)
5335
5336 #define DISABLE_ECC_ON_ME_PIPE(me, pipe)                        \
5337         WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\
5338                         CP_ECC_ERROR_INT_ENABLE, 0)
5339
5340 static int gfx_v9_0_set_cp_ecc_error_state(struct amdgpu_device *adev,
5341                                               struct amdgpu_irq_src *source,
5342                                               unsigned type,
5343                                               enum amdgpu_interrupt_state state)
5344 {
5345         switch (state) {
5346         case AMDGPU_IRQ_STATE_DISABLE:
5347                 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5348                                 CP_ECC_ERROR_INT_ENABLE, 0);
5349                 DISABLE_ECC_ON_ME_PIPE(1, 0);
5350                 DISABLE_ECC_ON_ME_PIPE(1, 1);
5351                 DISABLE_ECC_ON_ME_PIPE(1, 2);
5352                 DISABLE_ECC_ON_ME_PIPE(1, 3);
5353                 break;
5354
5355         case AMDGPU_IRQ_STATE_ENABLE:
5356                 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5357                                 CP_ECC_ERROR_INT_ENABLE, 1);
5358                 ENABLE_ECC_ON_ME_PIPE(1, 0);
5359                 ENABLE_ECC_ON_ME_PIPE(1, 1);
5360                 ENABLE_ECC_ON_ME_PIPE(1, 2);
5361                 ENABLE_ECC_ON_ME_PIPE(1, 3);
5362                 break;
5363         default:
5364                 break;
5365         }
5366
5367         return 0;
5368 }
5369
5370
5371 static int gfx_v9_0_set_eop_interrupt_state(struct amdgpu_device *adev,
5372                                             struct amdgpu_irq_src *src,
5373                                             unsigned type,
5374                                             enum amdgpu_interrupt_state state)
5375 {
5376         switch (type) {
5377         case AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP:
5378                 gfx_v9_0_set_gfx_eop_interrupt_state(adev, state);
5379                 break;
5380         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
5381                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
5382                 break;
5383         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
5384                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
5385                 break;
5386         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
5387                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
5388                 break;
5389         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
5390                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
5391                 break;
5392         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
5393                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
5394                 break;
5395         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
5396                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
5397                 break;
5398         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
5399                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
5400                 break;
5401         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
5402                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
5403                 break;
5404         default:
5405                 break;
5406         }
5407         return 0;
5408 }
5409
5410 static int gfx_v9_0_eop_irq(struct amdgpu_device *adev,
5411                             struct amdgpu_irq_src *source,
5412                             struct amdgpu_iv_entry *entry)
5413 {
5414         int i;
5415         u8 me_id, pipe_id, queue_id;
5416         struct amdgpu_ring *ring;
5417
5418         DRM_DEBUG("IH: CP EOP\n");
5419         me_id = (entry->ring_id & 0x0c) >> 2;
5420         pipe_id = (entry->ring_id & 0x03) >> 0;
5421         queue_id = (entry->ring_id & 0x70) >> 4;
5422
5423         switch (me_id) {
5424         case 0:
5425                 amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
5426                 break;
5427         case 1:
5428         case 2:
5429                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5430                         ring = &adev->gfx.compute_ring[i];
5431                         /* Per-queue interrupt is supported for MEC starting from VI.
5432                           * The interrupt can only be enabled/disabled per pipe instead of per queue.
5433                           */
5434                         if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
5435                                 amdgpu_fence_process(ring);
5436                 }
5437                 break;
5438         }
5439         return 0;
5440 }
5441
5442 static void gfx_v9_0_fault(struct amdgpu_device *adev,
5443                            struct amdgpu_iv_entry *entry)
5444 {
5445         u8 me_id, pipe_id, queue_id;
5446         struct amdgpu_ring *ring;
5447         int i;
5448
5449         me_id = (entry->ring_id & 0x0c) >> 2;
5450         pipe_id = (entry->ring_id & 0x03) >> 0;
5451         queue_id = (entry->ring_id & 0x70) >> 4;
5452
5453         switch (me_id) {
5454         case 0:
5455                 drm_sched_fault(&adev->gfx.gfx_ring[0].sched);
5456                 break;
5457         case 1:
5458         case 2:
5459                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5460                         ring = &adev->gfx.compute_ring[i];
5461                         if (ring->me == me_id && ring->pipe == pipe_id &&
5462                             ring->queue == queue_id)
5463                                 drm_sched_fault(&ring->sched);
5464                 }
5465                 break;
5466         }
5467 }
5468
5469 static int gfx_v9_0_priv_reg_irq(struct amdgpu_device *adev,
5470                                  struct amdgpu_irq_src *source,
5471                                  struct amdgpu_iv_entry *entry)
5472 {
5473         DRM_ERROR("Illegal register access in command stream\n");
5474         gfx_v9_0_fault(adev, entry);
5475         return 0;
5476 }
5477
5478 static int gfx_v9_0_priv_inst_irq(struct amdgpu_device *adev,
5479                                   struct amdgpu_irq_src *source,
5480                                   struct amdgpu_iv_entry *entry)
5481 {
5482         DRM_ERROR("Illegal instruction in command stream\n");
5483         gfx_v9_0_fault(adev, entry);
5484         return 0;
5485 }
5486
5487
5488 static const struct ras_gfx_subblock_reg ras_subblock_regs[] = {
5489         { "CPC_SCRATCH", SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT),
5490           SOC15_REG_FIELD(CPC_EDC_SCRATCH_CNT, SEC_COUNT),
5491           SOC15_REG_FIELD(CPC_EDC_SCRATCH_CNT, DED_COUNT)
5492         },
5493         { "CPC_UCODE", SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT),
5494           SOC15_REG_FIELD(CPC_EDC_UCODE_CNT, SEC_COUNT),
5495           SOC15_REG_FIELD(CPC_EDC_UCODE_CNT, DED_COUNT)
5496         },
5497         { "CPF_ROQ_ME1", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT),
5498           SOC15_REG_FIELD(CPF_EDC_ROQ_CNT, COUNT_ME1),
5499           0, 0
5500         },
5501         { "CPF_ROQ_ME2", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT),
5502           SOC15_REG_FIELD(CPF_EDC_ROQ_CNT, COUNT_ME2),
5503           0, 0
5504         },
5505         { "CPF_TAG", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT),
5506           SOC15_REG_FIELD(CPF_EDC_TAG_CNT, SEC_COUNT),
5507           SOC15_REG_FIELD(CPF_EDC_TAG_CNT, DED_COUNT)
5508         },
5509         { "CPG_DMA_ROQ", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT),
5510           SOC15_REG_FIELD(CPG_EDC_DMA_CNT, ROQ_COUNT),
5511           0, 0
5512         },
5513         { "CPG_DMA_TAG", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT),
5514           SOC15_REG_FIELD(CPG_EDC_DMA_CNT, TAG_SEC_COUNT),
5515           SOC15_REG_FIELD(CPG_EDC_DMA_CNT, TAG_DED_COUNT)
5516         },
5517         { "CPG_TAG", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT),
5518           SOC15_REG_FIELD(CPG_EDC_TAG_CNT, SEC_COUNT),
5519           SOC15_REG_FIELD(CPG_EDC_TAG_CNT, DED_COUNT)
5520         },
5521         { "DC_CSINVOC", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT),
5522           SOC15_REG_FIELD(DC_EDC_CSINVOC_CNT, COUNT_ME1),
5523           0, 0
5524         },
5525         { "DC_RESTORE", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT),
5526           SOC15_REG_FIELD(DC_EDC_RESTORE_CNT, COUNT_ME1),
5527           0, 0
5528         },
5529         { "DC_STATE", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT),
5530           SOC15_REG_FIELD(DC_EDC_STATE_CNT, COUNT_ME1),
5531           0, 0
5532         },
5533         { "GDS_MEM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT),
5534           SOC15_REG_FIELD(GDS_EDC_CNT, GDS_MEM_SEC),
5535           SOC15_REG_FIELD(GDS_EDC_CNT, GDS_MEM_DED)
5536         },
5537         { "GDS_INPUT_QUEUE", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT),
5538           SOC15_REG_FIELD(GDS_EDC_CNT, GDS_INPUT_QUEUE_SED),
5539           0, 0
5540         },
5541         { "GDS_ME0_CS_PIPE_MEM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT),
5542           SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, ME0_CS_PIPE_MEM_SEC),
5543           SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, ME0_CS_PIPE_MEM_DED)
5544         },
5545         { "GDS_OA_PHY_PHY_CMD_RAM_MEM",
5546           SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT),
5547           SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_CMD_RAM_MEM_SEC),
5548           SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_CMD_RAM_MEM_DED)
5549         },
5550         { "GDS_OA_PHY_PHY_DATA_RAM_MEM",
5551           SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT),
5552           SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_DATA_RAM_MEM_SED),
5553           0, 0
5554         },
5555         { "GDS_OA_PIPE_ME1_PIPE0_PIPE_MEM",
5556           SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
5557           SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE0_PIPE_MEM_SEC),
5558           SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE0_PIPE_MEM_DED)
5559         },
5560         { "GDS_OA_PIPE_ME1_PIPE1_PIPE_MEM",
5561           SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
5562           SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE1_PIPE_MEM_SEC),
5563           SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE1_PIPE_MEM_DED)
5564         },
5565         { "GDS_OA_PIPE_ME1_PIPE2_PIPE_MEM",
5566           SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
5567           SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE2_PIPE_MEM_SEC),
5568           SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE2_PIPE_MEM_DED)
5569         },
5570         { "GDS_OA_PIPE_ME1_PIPE3_PIPE_MEM",
5571           SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
5572           SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE3_PIPE_MEM_SEC),
5573           SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE3_PIPE_MEM_DED)
5574         },
5575         { "SPI_SR_MEM", SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT),
5576           SOC15_REG_FIELD(SPI_EDC_CNT, SPI_SR_MEM_SED_COUNT),
5577           0, 0
5578         },
5579         { "TA_FS_DFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
5580           SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_DFIFO_SEC_COUNT),
5581           SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_DFIFO_DED_COUNT)
5582         },
5583         { "TA_FS_AFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
5584           SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_AFIFO_SED_COUNT),
5585           0, 0
5586         },
5587         { "TA_FL_LFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
5588           SOC15_REG_FIELD(TA_EDC_CNT, TA_FL_LFIFO_SED_COUNT),
5589           0, 0
5590         },
5591         { "TA_FX_LFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
5592           SOC15_REG_FIELD(TA_EDC_CNT, TA_FX_LFIFO_SED_COUNT),
5593           0, 0
5594         },
5595         { "TA_FS_CFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
5596           SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_CFIFO_SED_COUNT),
5597           0, 0
5598         },
5599         { "TCA_HOLE_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT),
5600           SOC15_REG_FIELD(TCA_EDC_CNT, HOLE_FIFO_SED_COUNT),
5601           0, 0
5602         },
5603         { "TCA_REQ_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT),
5604           SOC15_REG_FIELD(TCA_EDC_CNT, REQ_FIFO_SED_COUNT),
5605           0, 0
5606         },
5607         { "TCC_CACHE_DATA", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5608           SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DATA_SEC_COUNT),
5609           SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DATA_DED_COUNT)
5610         },
5611         { "TCC_CACHE_DIRTY", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5612           SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DIRTY_SEC_COUNT),
5613           SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DIRTY_DED_COUNT)
5614         },
5615         { "TCC_HIGH_RATE_TAG", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5616           SOC15_REG_FIELD(TCC_EDC_CNT, HIGH_RATE_TAG_SEC_COUNT),
5617           SOC15_REG_FIELD(TCC_EDC_CNT, HIGH_RATE_TAG_DED_COUNT)
5618         },
5619         { "TCC_LOW_RATE_TAG", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5620           SOC15_REG_FIELD(TCC_EDC_CNT, LOW_RATE_TAG_SEC_COUNT),
5621           SOC15_REG_FIELD(TCC_EDC_CNT, LOW_RATE_TAG_DED_COUNT)
5622         },
5623         { "TCC_SRC_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5624           SOC15_REG_FIELD(TCC_EDC_CNT, SRC_FIFO_SEC_COUNT),
5625           SOC15_REG_FIELD(TCC_EDC_CNT, SRC_FIFO_DED_COUNT)
5626         },
5627         { "TCC_IN_USE_DEC", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5628           SOC15_REG_FIELD(TCC_EDC_CNT, IN_USE_DEC_SED_COUNT),
5629           0, 0
5630         },
5631         { "TCC_IN_USE_TRANSFER", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5632           SOC15_REG_FIELD(TCC_EDC_CNT, IN_USE_TRANSFER_SED_COUNT),
5633           0, 0
5634         },
5635         { "TCC_LATENCY_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5636           SOC15_REG_FIELD(TCC_EDC_CNT, LATENCY_FIFO_SED_COUNT),
5637           0, 0
5638         },
5639         { "TCC_RETURN_DATA", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5640           SOC15_REG_FIELD(TCC_EDC_CNT, RETURN_DATA_SED_COUNT),
5641           0, 0
5642         },
5643         { "TCC_RETURN_CONTROL", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5644           SOC15_REG_FIELD(TCC_EDC_CNT, RETURN_CONTROL_SED_COUNT),
5645           0, 0
5646         },
5647         { "TCC_UC_ATOMIC_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5648           SOC15_REG_FIELD(TCC_EDC_CNT, UC_ATOMIC_FIFO_SED_COUNT),
5649           0, 0
5650         },
5651         { "TCC_WRITE_RETURN", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
5652           SOC15_REG_FIELD(TCC_EDC_CNT2, WRITE_RETURN_SED_COUNT),
5653           0, 0
5654         },
5655         { "TCC_WRITE_CACHE_READ", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
5656           SOC15_REG_FIELD(TCC_EDC_CNT2, WRITE_CACHE_READ_SED_COUNT),
5657           0, 0
5658         },
5659         { "TCC_SRC_FIFO_NEXT_RAM", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
5660           SOC15_REG_FIELD(TCC_EDC_CNT2, SRC_FIFO_NEXT_RAM_SED_COUNT),
5661           0, 0
5662         },
5663         { "TCC_LATENCY_FIFO_NEXT_RAM", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
5664           SOC15_REG_FIELD(TCC_EDC_CNT2, LATENCY_FIFO_NEXT_RAM_SED_COUNT),
5665           0, 0
5666         },
5667         { "TCC_CACHE_TAG_PROBE_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
5668           SOC15_REG_FIELD(TCC_EDC_CNT2, CACHE_TAG_PROBE_FIFO_SED_COUNT),
5669           0, 0
5670         },
5671         { "TCC_WRRET_TAG_WRITE_RETURN", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
5672           SOC15_REG_FIELD(TCC_EDC_CNT2, WRRET_TAG_WRITE_RETURN_SED_COUNT),
5673           0, 0
5674         },
5675         { "TCC_ATOMIC_RETURN_BUFFER", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
5676           SOC15_REG_FIELD(TCC_EDC_CNT2, ATOMIC_RETURN_BUFFER_SED_COUNT),
5677           0, 0
5678         },
5679         { "TCI_WRITE_RAM", SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT),
5680           SOC15_REG_FIELD(TCI_EDC_CNT, WRITE_RAM_SED_COUNT),
5681           0, 0
5682         },
5683         { "TCP_CACHE_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
5684           SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CACHE_RAM_SEC_COUNT),
5685           SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CACHE_RAM_DED_COUNT)
5686         },
5687         { "TCP_LFIFO_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
5688           SOC15_REG_FIELD(TCP_EDC_CNT_NEW, LFIFO_RAM_SEC_COUNT),
5689           SOC15_REG_FIELD(TCP_EDC_CNT_NEW, LFIFO_RAM_DED_COUNT)
5690         },
5691         { "TCP_CMD_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
5692           SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CMD_FIFO_SED_COUNT),
5693           0, 0
5694         },
5695         { "TCP_VM_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
5696           SOC15_REG_FIELD(TCP_EDC_CNT_NEW, VM_FIFO_SEC_COUNT),
5697           0, 0
5698         },
5699         { "TCP_DB_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
5700           SOC15_REG_FIELD(TCP_EDC_CNT_NEW, DB_RAM_SED_COUNT),
5701           0, 0
5702         },
5703         { "TCP_UTCL1_LFIFO0", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
5704           SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO0_SEC_COUNT),
5705           SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO0_DED_COUNT)
5706         },
5707         { "TCP_UTCL1_LFIFO1", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
5708           SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO1_SEC_COUNT),
5709           SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO1_DED_COUNT)
5710         },
5711         { "TD_SS_FIFO_LO", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT),
5712           SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_LO_SEC_COUNT),
5713           SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_LO_DED_COUNT)
5714         },
5715         { "TD_SS_FIFO_HI", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT),
5716           SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_HI_SEC_COUNT),
5717           SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_HI_DED_COUNT)
5718         },
5719         { "TD_CS_FIFO", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT),
5720           SOC15_REG_FIELD(TD_EDC_CNT, CS_FIFO_SED_COUNT),
5721           0, 0
5722         },
5723         { "SQ_LDS_D", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
5724           SOC15_REG_FIELD(SQ_EDC_CNT, LDS_D_SEC_COUNT),
5725           SOC15_REG_FIELD(SQ_EDC_CNT, LDS_D_DED_COUNT)
5726         },
5727         { "SQ_LDS_I", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
5728           SOC15_REG_FIELD(SQ_EDC_CNT, LDS_I_SEC_COUNT),
5729           SOC15_REG_FIELD(SQ_EDC_CNT, LDS_I_DED_COUNT)
5730         },
5731         { "SQ_SGPR", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
5732           SOC15_REG_FIELD(SQ_EDC_CNT, SGPR_SEC_COUNT),
5733           SOC15_REG_FIELD(SQ_EDC_CNT, SGPR_DED_COUNT)
5734         },
5735         { "SQ_VGPR0", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
5736           SOC15_REG_FIELD(SQ_EDC_CNT, VGPR0_SEC_COUNT),
5737           SOC15_REG_FIELD(SQ_EDC_CNT, VGPR0_DED_COUNT)
5738         },
5739         { "SQ_VGPR1", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
5740           SOC15_REG_FIELD(SQ_EDC_CNT, VGPR1_SEC_COUNT),
5741           SOC15_REG_FIELD(SQ_EDC_CNT, VGPR1_DED_COUNT)
5742         },
5743         { "SQ_VGPR2", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
5744           SOC15_REG_FIELD(SQ_EDC_CNT, VGPR2_SEC_COUNT),
5745           SOC15_REG_FIELD(SQ_EDC_CNT, VGPR2_DED_COUNT)
5746         },
5747         { "SQ_VGPR3", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
5748           SOC15_REG_FIELD(SQ_EDC_CNT, VGPR3_SEC_COUNT),
5749           SOC15_REG_FIELD(SQ_EDC_CNT, VGPR3_DED_COUNT)
5750         },
5751         { "SQC_DATA_CU0_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
5752           SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_WRITE_DATA_BUF_SEC_COUNT),
5753           SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_WRITE_DATA_BUF_DED_COUNT)
5754         },
5755         { "SQC_DATA_CU0_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
5756           SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_UTCL1_LFIFO_SEC_COUNT),
5757           SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_UTCL1_LFIFO_DED_COUNT)
5758         },
5759         { "SQC_DATA_CU1_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
5760           SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_WRITE_DATA_BUF_SEC_COUNT),
5761           SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_WRITE_DATA_BUF_DED_COUNT)
5762         },
5763         { "SQC_DATA_CU1_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
5764           SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_UTCL1_LFIFO_SEC_COUNT),
5765           SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_UTCL1_LFIFO_DED_COUNT)
5766         },
5767         { "SQC_DATA_CU2_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
5768           SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_WRITE_DATA_BUF_SEC_COUNT),
5769           SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_WRITE_DATA_BUF_DED_COUNT)
5770         },
5771         { "SQC_DATA_CU2_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
5772           SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_UTCL1_LFIFO_SEC_COUNT),
5773           SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_UTCL1_LFIFO_DED_COUNT)
5774         },
5775         { "SQC_INST_BANKA_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
5776           SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_TAG_RAM_SEC_COUNT),
5777           SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_TAG_RAM_DED_COUNT)
5778         },
5779         { "SQC_INST_BANKA_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
5780           SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_BANK_RAM_SEC_COUNT),
5781           SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_BANK_RAM_DED_COUNT)
5782         },
5783         { "SQC_DATA_BANKA_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
5784           SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_TAG_RAM_SEC_COUNT),
5785           SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_TAG_RAM_DED_COUNT)
5786         },
5787         { "SQC_DATA_BANKA_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
5788           SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_BANK_RAM_SEC_COUNT),
5789           SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_BANK_RAM_DED_COUNT)
5790         },
5791         { "SQC_INST_BANKA_UTCL1_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
5792           SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_UTCL1_MISS_FIFO_SED_COUNT),
5793           0, 0
5794         },
5795         { "SQC_INST_BANKA_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
5796           SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_MISS_FIFO_SED_COUNT),
5797           0, 0
5798         },
5799         { "SQC_DATA_BANKA_HIT_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
5800           SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_HIT_FIFO_SED_COUNT),
5801           0, 0
5802         },
5803         { "SQC_DATA_BANKA_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
5804           SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_MISS_FIFO_SED_COUNT),
5805           0, 0
5806         },
5807         { "SQC_DATA_BANKA_DIRTY_BIT_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
5808           SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_DIRTY_BIT_RAM_SED_COUNT),
5809           0, 0
5810         },
5811         { "SQC_INST_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
5812           SOC15_REG_FIELD(SQC_EDC_CNT2, INST_UTCL1_LFIFO_SEC_COUNT),
5813           SOC15_REG_FIELD(SQC_EDC_CNT2, INST_UTCL1_LFIFO_DED_COUNT)
5814         },
5815         { "SQC_INST_BANKB_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
5816           SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_TAG_RAM_SEC_COUNT),
5817           SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_TAG_RAM_DED_COUNT)
5818         },
5819         { "SQC_INST_BANKB_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
5820           SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_BANK_RAM_SEC_COUNT),
5821           SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_BANK_RAM_DED_COUNT)
5822         },
5823         { "SQC_DATA_BANKB_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
5824           SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_TAG_RAM_SEC_COUNT),
5825           SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_TAG_RAM_DED_COUNT)
5826         },
5827         { "SQC_DATA_BANKB_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
5828           SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_BANK_RAM_SEC_COUNT),
5829           SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_BANK_RAM_DED_COUNT)
5830         },
5831         { "SQC_INST_BANKB_UTCL1_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
5832           SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_UTCL1_MISS_FIFO_SED_COUNT),
5833           0, 0
5834         },
5835         { "SQC_INST_BANKB_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
5836           SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_MISS_FIFO_SED_COUNT),
5837           0, 0
5838         },
5839         { "SQC_DATA_BANKB_HIT_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
5840           SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_HIT_FIFO_SED_COUNT),
5841           0, 0
5842         },
5843         { "SQC_DATA_BANKB_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
5844           SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_MISS_FIFO_SED_COUNT),
5845           0, 0
5846         },
5847         { "SQC_DATA_BANKB_DIRTY_BIT_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
5848           SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_DIRTY_BIT_RAM_SED_COUNT),
5849           0, 0
5850         },
5851         { "EA_DRAMRD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
5852           SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_CMDMEM_SEC_COUNT),
5853           SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_CMDMEM_DED_COUNT)
5854         },
5855         { "EA_DRAMWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
5856           SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_CMDMEM_SEC_COUNT),
5857           SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_CMDMEM_DED_COUNT)
5858         },
5859         { "EA_DRAMWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
5860           SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_DATAMEM_SEC_COUNT),
5861           SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_DATAMEM_DED_COUNT)
5862         },
5863         { "EA_RRET_TAGMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
5864           SOC15_REG_FIELD(GCEA_EDC_CNT, RRET_TAGMEM_SEC_COUNT),
5865           SOC15_REG_FIELD(GCEA_EDC_CNT, RRET_TAGMEM_DED_COUNT)
5866         },
5867         { "EA_WRET_TAGMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
5868           SOC15_REG_FIELD(GCEA_EDC_CNT, WRET_TAGMEM_SEC_COUNT),
5869           SOC15_REG_FIELD(GCEA_EDC_CNT, WRET_TAGMEM_DED_COUNT)
5870         },
5871         { "EA_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
5872           SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_PAGEMEM_SED_COUNT),
5873           0, 0
5874         },
5875         { "EA_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
5876           SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_PAGEMEM_SED_COUNT),
5877           0, 0
5878         },
5879         { "EA_IORD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
5880           SOC15_REG_FIELD(GCEA_EDC_CNT, IORD_CMDMEM_SED_COUNT),
5881           0, 0
5882         },
5883         { "EA_IOWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
5884           SOC15_REG_FIELD(GCEA_EDC_CNT, IOWR_CMDMEM_SED_COUNT),
5885           0, 0
5886         },
5887         { "EA_IOWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
5888           SOC15_REG_FIELD(GCEA_EDC_CNT, IOWR_DATAMEM_SED_COUNT),
5889           0, 0
5890         },
5891         { "GMIRD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
5892           SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_CMDMEM_SEC_COUNT),
5893           SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_CMDMEM_DED_COUNT)
5894         },
5895         { "GMIWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
5896           SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_CMDMEM_SEC_COUNT),
5897           SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_CMDMEM_DED_COUNT)
5898         },
5899         { "GMIWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
5900           SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_DATAMEM_SEC_COUNT),
5901           SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_DATAMEM_DED_COUNT)
5902         },
5903         { "GMIRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
5904           SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_PAGEMEM_SED_COUNT),
5905           0, 0
5906         },
5907         { "GMIWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
5908           SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_PAGEMEM_SED_COUNT),
5909           0, 0
5910         },
5911         { "MAM_D0MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
5912           SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D0MEM_SED_COUNT),
5913           0, 0
5914         },
5915         { "MAM_D1MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
5916           SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D1MEM_SED_COUNT),
5917           0, 0
5918         },
5919         { "MAM_D2MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
5920           SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D2MEM_SED_COUNT),
5921           0, 0
5922         },
5923         { "MAM_D3MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
5924           SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D3MEM_SED_COUNT),
5925           0, 0
5926         }
5927 };
5928
5929 static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev,
5930                                      void *inject_if)
5931 {
5932         struct ras_inject_if *info = (struct ras_inject_if *)inject_if;
5933         int ret;
5934         struct ta_ras_trigger_error_input block_info = { 0 };
5935
5936         if (adev->asic_type != CHIP_VEGA20)
5937                 return -EINVAL;
5938
5939         if (info->head.sub_block_index >= ARRAY_SIZE(ras_gfx_subblocks))
5940                 return -EINVAL;
5941
5942         if (!ras_gfx_subblocks[info->head.sub_block_index].name)
5943                 return -EPERM;
5944
5945         if (!(ras_gfx_subblocks[info->head.sub_block_index].hw_supported_error_type &
5946               info->head.type)) {
5947                 DRM_ERROR("GFX Subblock %s, hardware do not support type 0x%x\n",
5948                         ras_gfx_subblocks[info->head.sub_block_index].name,
5949                         info->head.type);
5950                 return -EPERM;
5951         }
5952
5953         if (!(ras_gfx_subblocks[info->head.sub_block_index].sw_supported_error_type &
5954               info->head.type)) {
5955                 DRM_ERROR("GFX Subblock %s, driver do not support type 0x%x\n",
5956                         ras_gfx_subblocks[info->head.sub_block_index].name,
5957                         info->head.type);
5958                 return -EPERM;
5959         }
5960
5961         block_info.block_id = amdgpu_ras_block_to_ta(info->head.block);
5962         block_info.sub_block_index =
5963                 ras_gfx_subblocks[info->head.sub_block_index].ta_subblock;
5964         block_info.inject_error_type = amdgpu_ras_error_to_ta(info->head.type);
5965         block_info.address = info->address;
5966         block_info.value = info->value;
5967
5968         mutex_lock(&adev->grbm_idx_mutex);
5969         ret = psp_ras_trigger_error(&adev->psp, &block_info);
5970         mutex_unlock(&adev->grbm_idx_mutex);
5971
5972         return ret;
5973 }
5974
5975 static const char *vml2_mems[] = {
5976         "UTC_VML2_BANK_CACHE_0_BIGK_MEM0",
5977         "UTC_VML2_BANK_CACHE_0_BIGK_MEM1",
5978         "UTC_VML2_BANK_CACHE_0_4K_MEM0",
5979         "UTC_VML2_BANK_CACHE_0_4K_MEM1",
5980         "UTC_VML2_BANK_CACHE_1_BIGK_MEM0",
5981         "UTC_VML2_BANK_CACHE_1_BIGK_MEM1",
5982         "UTC_VML2_BANK_CACHE_1_4K_MEM0",
5983         "UTC_VML2_BANK_CACHE_1_4K_MEM1",
5984         "UTC_VML2_BANK_CACHE_2_BIGK_MEM0",
5985         "UTC_VML2_BANK_CACHE_2_BIGK_MEM1",
5986         "UTC_VML2_BANK_CACHE_2_4K_MEM0",
5987         "UTC_VML2_BANK_CACHE_2_4K_MEM1",
5988         "UTC_VML2_BANK_CACHE_3_BIGK_MEM0",
5989         "UTC_VML2_BANK_CACHE_3_BIGK_MEM1",
5990         "UTC_VML2_BANK_CACHE_3_4K_MEM0",
5991         "UTC_VML2_BANK_CACHE_3_4K_MEM1",
5992 };
5993
5994 static const char *vml2_walker_mems[] = {
5995         "UTC_VML2_CACHE_PDE0_MEM0",
5996         "UTC_VML2_CACHE_PDE0_MEM1",
5997         "UTC_VML2_CACHE_PDE1_MEM0",
5998         "UTC_VML2_CACHE_PDE1_MEM1",
5999         "UTC_VML2_CACHE_PDE2_MEM0",
6000         "UTC_VML2_CACHE_PDE2_MEM1",
6001         "UTC_VML2_RDIF_LOG_FIFO",
6002 };
6003
6004 static const char *atc_l2_cache_2m_mems[] = {
6005         "UTC_ATCL2_CACHE_2M_BANK0_WAY0_MEM",
6006         "UTC_ATCL2_CACHE_2M_BANK0_WAY1_MEM",
6007         "UTC_ATCL2_CACHE_2M_BANK1_WAY0_MEM",
6008         "UTC_ATCL2_CACHE_2M_BANK1_WAY1_MEM",
6009 };
6010
6011 static const char *atc_l2_cache_4k_mems[] = {
6012         "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM0",
6013         "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM1",
6014         "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM2",
6015         "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM3",
6016         "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM4",
6017         "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM5",
6018         "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM6",
6019         "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM7",
6020         "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM0",
6021         "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM1",
6022         "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM2",
6023         "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM3",
6024         "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM4",
6025         "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM5",
6026         "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM6",
6027         "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM7",
6028         "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM0",
6029         "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM1",
6030         "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM2",
6031         "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM3",
6032         "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM4",
6033         "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM5",
6034         "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM6",
6035         "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM7",
6036         "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM0",
6037         "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM1",
6038         "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM2",
6039         "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM3",
6040         "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM4",
6041         "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM5",
6042         "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM6",
6043         "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM7",
6044 };
6045
6046 static int gfx_v9_0_query_utc_edc_status(struct amdgpu_device *adev,
6047                                          struct ras_err_data *err_data)
6048 {
6049         uint32_t i, data;
6050         uint32_t sec_count, ded_count;
6051
6052         WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
6053         WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT, 0);
6054         WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
6055         WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT, 0);
6056         WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
6057         WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT, 0);
6058         WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
6059         WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT, 0);
6060
6061         for (i = 0; i < 16; i++) {
6062                 WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, i);
6063                 data = RREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT);
6064
6065                 sec_count = REG_GET_FIELD(data, VM_L2_MEM_ECC_CNT, SEC_COUNT);
6066                 if (sec_count) {
6067                         DRM_INFO("Instance[%d]: SubBlock %s, SEC %d\n", i,
6068                                  vml2_mems[i], sec_count);
6069                         err_data->ce_count += sec_count;
6070                 }
6071
6072                 ded_count = REG_GET_FIELD(data, VM_L2_MEM_ECC_CNT, DED_COUNT);
6073                 if (ded_count) {
6074                         DRM_INFO("Instance[%d]: SubBlock %s, DED %d\n", i,
6075                                  vml2_mems[i], ded_count);
6076                         err_data->ue_count += ded_count;
6077                 }
6078         }
6079
6080         for (i = 0; i < 7; i++) {
6081                 WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, i);
6082                 data = RREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT);
6083
6084                 sec_count = REG_GET_FIELD(data, VM_L2_WALKER_MEM_ECC_CNT,
6085                                                 SEC_COUNT);
6086                 if (sec_count) {
6087                         DRM_INFO("Instance[%d]: SubBlock %s, SEC %d\n", i,
6088                                  vml2_walker_mems[i], sec_count);
6089                         err_data->ce_count += sec_count;
6090                 }
6091
6092                 ded_count = REG_GET_FIELD(data, VM_L2_WALKER_MEM_ECC_CNT,
6093                                                 DED_COUNT);
6094                 if (ded_count) {
6095                         DRM_INFO("Instance[%d]: SubBlock %s, DED %d\n", i,
6096                                  vml2_walker_mems[i], ded_count);
6097                         err_data->ue_count += ded_count;
6098                 }
6099         }
6100
6101         for (i = 0; i < 4; i++) {
6102                 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, i);
6103                 data = RREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT);
6104
6105                 sec_count = (data & 0x00006000L) >> 0xd;
6106                 if (sec_count) {
6107                         DRM_INFO("Instance[%d]: SubBlock %s, SEC %d\n", i,
6108                                  atc_l2_cache_2m_mems[i], sec_count);
6109                         err_data->ce_count += sec_count;
6110                 }
6111         }
6112
6113         for (i = 0; i < 32; i++) {
6114                 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, i);
6115                 data = RREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT);
6116
6117                 sec_count = (data & 0x00006000L) >> 0xd;
6118                 if (sec_count) {
6119                         DRM_INFO("Instance[%d]: SubBlock %s, SEC %d\n", i,
6120                                  atc_l2_cache_4k_mems[i], sec_count);
6121                         err_data->ce_count += sec_count;
6122                 }
6123
6124                 ded_count = (data & 0x00018000L) >> 0xf;
6125                 if (ded_count) {
6126                         DRM_INFO("Instance[%d]: SubBlock %s, DED %d\n", i,
6127                                  atc_l2_cache_4k_mems[i], ded_count);
6128                         err_data->ue_count += ded_count;
6129                 }
6130         }
6131
6132         WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
6133         WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
6134         WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
6135         WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
6136
6137         return 0;
6138 }
6139
6140 static int __get_ras_error_count(const struct soc15_reg_entry *reg,
6141         uint32_t se_id, uint32_t inst_id, uint32_t value,
6142         uint32_t *sec_count, uint32_t *ded_count)
6143 {
6144         uint32_t i;
6145         uint32_t sec_cnt, ded_cnt;
6146
6147         for (i = 0; i < ARRAY_SIZE(ras_subblock_regs); i++) {
6148                 if(ras_subblock_regs[i].reg_offset != reg->reg_offset ||
6149                         ras_subblock_regs[i].seg != reg->seg ||
6150                         ras_subblock_regs[i].inst != reg->inst)
6151                         continue;
6152
6153                 sec_cnt = (value &
6154                                 ras_subblock_regs[i].sec_count_mask) >>
6155                                 ras_subblock_regs[i].sec_count_shift;
6156                 if (sec_cnt) {
6157                         DRM_INFO("GFX SubBlock %s, Instance[%d][%d], SEC %d\n",
6158                                 ras_subblock_regs[i].name,
6159                                 se_id, inst_id,
6160                                 sec_cnt);
6161                         *sec_count += sec_cnt;
6162                 }
6163
6164                 ded_cnt = (value &
6165                                 ras_subblock_regs[i].ded_count_mask) >>
6166                                 ras_subblock_regs[i].ded_count_shift;
6167                 if (ded_cnt) {
6168                         DRM_INFO("GFX SubBlock %s, Instance[%d][%d], DED %d\n",
6169                                 ras_subblock_regs[i].name,
6170                                 se_id, inst_id,
6171                                 ded_cnt);
6172                         *ded_count += ded_cnt;
6173                 }
6174         }
6175
6176         return 0;
6177 }
6178
6179 static int gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev,
6180                                           void *ras_error_status)
6181 {
6182         struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
6183         uint32_t sec_count = 0, ded_count = 0;
6184         uint32_t i, j, k;
6185         uint32_t reg_value;
6186
6187         if (adev->asic_type != CHIP_VEGA20)
6188                 return -EINVAL;
6189
6190         err_data->ue_count = 0;
6191         err_data->ce_count = 0;
6192
6193         mutex_lock(&adev->grbm_idx_mutex);
6194
6195         for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++) {
6196                 for (j = 0; j < sec_ded_counter_registers[i].se_num; j++) {
6197                         for (k = 0; k < sec_ded_counter_registers[i].instance; k++) {
6198                                 gfx_v9_0_select_se_sh(adev, j, 0, k);
6199                                 reg_value =
6200                                         RREG32(SOC15_REG_ENTRY_OFFSET(sec_ded_counter_registers[i]));
6201                                 if (reg_value)
6202                                         __get_ras_error_count(&sec_ded_counter_registers[i],
6203                                                         j, k, reg_value,
6204                                                         &sec_count, &ded_count);
6205                         }
6206                 }
6207         }
6208
6209         err_data->ce_count += sec_count;
6210         err_data->ue_count += ded_count;
6211
6212         gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
6213         mutex_unlock(&adev->grbm_idx_mutex);
6214
6215         gfx_v9_0_query_utc_edc_status(adev, err_data);
6216
6217         return 0;
6218 }
6219
6220 static const struct amd_ip_funcs gfx_v9_0_ip_funcs = {
6221         .name = "gfx_v9_0",
6222         .early_init = gfx_v9_0_early_init,
6223         .late_init = gfx_v9_0_late_init,
6224         .sw_init = gfx_v9_0_sw_init,
6225         .sw_fini = gfx_v9_0_sw_fini,
6226         .hw_init = gfx_v9_0_hw_init,
6227         .hw_fini = gfx_v9_0_hw_fini,
6228         .suspend = gfx_v9_0_suspend,
6229         .resume = gfx_v9_0_resume,
6230         .is_idle = gfx_v9_0_is_idle,
6231         .wait_for_idle = gfx_v9_0_wait_for_idle,
6232         .soft_reset = gfx_v9_0_soft_reset,
6233         .set_clockgating_state = gfx_v9_0_set_clockgating_state,
6234         .set_powergating_state = gfx_v9_0_set_powergating_state,
6235         .get_clockgating_state = gfx_v9_0_get_clockgating_state,
6236 };
6237
6238 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = {
6239         .type = AMDGPU_RING_TYPE_GFX,
6240         .align_mask = 0xff,
6241         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6242         .support_64bit_ptrs = true,
6243         .vmhub = AMDGPU_GFXHUB_0,
6244         .get_rptr = gfx_v9_0_ring_get_rptr_gfx,
6245         .get_wptr = gfx_v9_0_ring_get_wptr_gfx,
6246         .set_wptr = gfx_v9_0_ring_set_wptr_gfx,
6247         .emit_frame_size = /* totally 242 maximum if 16 IBs */
6248                 5 +  /* COND_EXEC */
6249                 7 +  /* PIPELINE_SYNC */
6250                 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
6251                 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
6252                 2 + /* VM_FLUSH */
6253                 8 +  /* FENCE for VM_FLUSH */
6254                 20 + /* GDS switch */
6255                 4 + /* double SWITCH_BUFFER,
6256                        the first COND_EXEC jump to the place just
6257                            prior to this double SWITCH_BUFFER  */
6258                 5 + /* COND_EXEC */
6259                 7 +      /*     HDP_flush */
6260                 4 +      /*     VGT_flush */
6261                 14 + /* CE_META */
6262                 31 + /* DE_META */
6263                 3 + /* CNTX_CTRL */
6264                 5 + /* HDP_INVL */
6265                 8 + 8 + /* FENCE x2 */
6266                 2, /* SWITCH_BUFFER */
6267         .emit_ib_size = 4, /* gfx_v9_0_ring_emit_ib_gfx */
6268         .emit_ib = gfx_v9_0_ring_emit_ib_gfx,
6269         .emit_fence = gfx_v9_0_ring_emit_fence,
6270         .emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
6271         .emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
6272         .emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
6273         .emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
6274         .test_ring = gfx_v9_0_ring_test_ring,
6275         .test_ib = gfx_v9_0_ring_test_ib,
6276         .insert_nop = amdgpu_ring_insert_nop,
6277         .pad_ib = amdgpu_ring_generic_pad_ib,
6278         .emit_switch_buffer = gfx_v9_ring_emit_sb,
6279         .emit_cntxcntl = gfx_v9_ring_emit_cntxcntl,
6280         .init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec,
6281         .patch_cond_exec = gfx_v9_0_ring_emit_patch_cond_exec,
6282         .emit_tmz = gfx_v9_0_ring_emit_tmz,
6283         .emit_wreg = gfx_v9_0_ring_emit_wreg,
6284         .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
6285         .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
6286         .soft_recovery = gfx_v9_0_ring_soft_recovery,
6287 };
6288
6289 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = {
6290         .type = AMDGPU_RING_TYPE_COMPUTE,
6291         .align_mask = 0xff,
6292         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6293         .support_64bit_ptrs = true,
6294         .vmhub = AMDGPU_GFXHUB_0,
6295         .get_rptr = gfx_v9_0_ring_get_rptr_compute,
6296         .get_wptr = gfx_v9_0_ring_get_wptr_compute,
6297         .set_wptr = gfx_v9_0_ring_set_wptr_compute,
6298         .emit_frame_size =
6299                 20 + /* gfx_v9_0_ring_emit_gds_switch */
6300                 7 + /* gfx_v9_0_ring_emit_hdp_flush */
6301                 5 + /* hdp invalidate */
6302                 7 + /* gfx_v9_0_ring_emit_pipeline_sync */
6303                 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
6304                 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
6305                 2 + /* gfx_v9_0_ring_emit_vm_flush */
6306                 8 + 8 + 8, /* gfx_v9_0_ring_emit_fence x3 for user fence, vm fence */
6307         .emit_ib_size = 7, /* gfx_v9_0_ring_emit_ib_compute */
6308         .emit_ib = gfx_v9_0_ring_emit_ib_compute,
6309         .emit_fence = gfx_v9_0_ring_emit_fence,
6310         .emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
6311         .emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
6312         .emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
6313         .emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
6314         .test_ring = gfx_v9_0_ring_test_ring,
6315         .test_ib = gfx_v9_0_ring_test_ib,
6316         .insert_nop = amdgpu_ring_insert_nop,
6317         .pad_ib = amdgpu_ring_generic_pad_ib,
6318         .set_priority = gfx_v9_0_ring_set_priority_compute,
6319         .emit_wreg = gfx_v9_0_ring_emit_wreg,
6320         .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
6321         .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
6322 };
6323
6324 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = {
6325         .type = AMDGPU_RING_TYPE_KIQ,
6326         .align_mask = 0xff,
6327         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6328         .support_64bit_ptrs = true,
6329         .vmhub = AMDGPU_GFXHUB_0,
6330         .get_rptr = gfx_v9_0_ring_get_rptr_compute,
6331         .get_wptr = gfx_v9_0_ring_get_wptr_compute,
6332         .set_wptr = gfx_v9_0_ring_set_wptr_compute,
6333         .emit_frame_size =
6334                 20 + /* gfx_v9_0_ring_emit_gds_switch */
6335                 7 + /* gfx_v9_0_ring_emit_hdp_flush */
6336                 5 + /* hdp invalidate */
6337                 7 + /* gfx_v9_0_ring_emit_pipeline_sync */
6338                 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
6339                 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
6340                 2 + /* gfx_v9_0_ring_emit_vm_flush */
6341                 8 + 8 + 8, /* gfx_v9_0_ring_emit_fence_kiq x3 for user fence, vm fence */
6342         .emit_ib_size = 7, /* gfx_v9_0_ring_emit_ib_compute */
6343         .emit_fence = gfx_v9_0_ring_emit_fence_kiq,
6344         .test_ring = gfx_v9_0_ring_test_ring,
6345         .insert_nop = amdgpu_ring_insert_nop,
6346         .pad_ib = amdgpu_ring_generic_pad_ib,
6347         .emit_rreg = gfx_v9_0_ring_emit_rreg,
6348         .emit_wreg = gfx_v9_0_ring_emit_wreg,
6349         .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
6350         .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
6351 };
6352
6353 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev)
6354 {
6355         int i;
6356
6357         adev->gfx.kiq.ring.funcs = &gfx_v9_0_ring_funcs_kiq;
6358
6359         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
6360                 adev->gfx.gfx_ring[i].funcs = &gfx_v9_0_ring_funcs_gfx;
6361
6362         for (i = 0; i < adev->gfx.num_compute_rings; i++)
6363                 adev->gfx.compute_ring[i].funcs = &gfx_v9_0_ring_funcs_compute;
6364 }
6365
6366 static const struct amdgpu_irq_src_funcs gfx_v9_0_eop_irq_funcs = {
6367         .set = gfx_v9_0_set_eop_interrupt_state,
6368         .process = gfx_v9_0_eop_irq,
6369 };
6370
6371 static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_reg_irq_funcs = {
6372         .set = gfx_v9_0_set_priv_reg_fault_state,
6373         .process = gfx_v9_0_priv_reg_irq,
6374 };
6375
6376 static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_inst_irq_funcs = {
6377         .set = gfx_v9_0_set_priv_inst_fault_state,
6378         .process = gfx_v9_0_priv_inst_irq,
6379 };
6380
6381 static const struct amdgpu_irq_src_funcs gfx_v9_0_cp_ecc_error_irq_funcs = {
6382         .set = gfx_v9_0_set_cp_ecc_error_state,
6383         .process = amdgpu_gfx_cp_ecc_error_irq,
6384 };
6385
6386
6387 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev)
6388 {
6389         adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
6390         adev->gfx.eop_irq.funcs = &gfx_v9_0_eop_irq_funcs;
6391
6392         adev->gfx.priv_reg_irq.num_types = 1;
6393         adev->gfx.priv_reg_irq.funcs = &gfx_v9_0_priv_reg_irq_funcs;
6394
6395         adev->gfx.priv_inst_irq.num_types = 1;
6396         adev->gfx.priv_inst_irq.funcs = &gfx_v9_0_priv_inst_irq_funcs;
6397
6398         adev->gfx.cp_ecc_error_irq.num_types = 2; /*C5 ECC error and C9 FUE error*/
6399         adev->gfx.cp_ecc_error_irq.funcs = &gfx_v9_0_cp_ecc_error_irq_funcs;
6400 }
6401
6402 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev)
6403 {
6404         switch (adev->asic_type) {
6405         case CHIP_VEGA10:
6406         case CHIP_VEGA12:
6407         case CHIP_VEGA20:
6408         case CHIP_RAVEN:
6409         case CHIP_ARCTURUS:
6410         case CHIP_RENOIR:
6411                 adev->gfx.rlc.funcs = &gfx_v9_0_rlc_funcs;
6412                 break;
6413         default:
6414                 break;
6415         }
6416 }
6417
6418 static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev)
6419 {
6420         /* init asci gds info */
6421         switch (adev->asic_type) {
6422         case CHIP_VEGA10:
6423         case CHIP_VEGA12:
6424         case CHIP_VEGA20:
6425                 adev->gds.gds_size = 0x10000;
6426                 break;
6427         case CHIP_RAVEN:
6428         case CHIP_ARCTURUS:
6429                 adev->gds.gds_size = 0x1000;
6430                 break;
6431         default:
6432                 adev->gds.gds_size = 0x10000;
6433                 break;
6434         }
6435
6436         switch (adev->asic_type) {
6437         case CHIP_VEGA10:
6438         case CHIP_VEGA20:
6439                 adev->gds.gds_compute_max_wave_id = 0x7ff;
6440                 break;
6441         case CHIP_VEGA12:
6442                 adev->gds.gds_compute_max_wave_id = 0x27f;
6443                 break;
6444         case CHIP_RAVEN:
6445                 if (adev->rev_id >= 0x8)
6446                         adev->gds.gds_compute_max_wave_id = 0x77; /* raven2 */
6447                 else
6448                         adev->gds.gds_compute_max_wave_id = 0x15f; /* raven1 */
6449                 break;
6450         case CHIP_ARCTURUS:
6451                 adev->gds.gds_compute_max_wave_id = 0xfff;
6452                 break;
6453         default:
6454                 /* this really depends on the chip */
6455                 adev->gds.gds_compute_max_wave_id = 0x7ff;
6456                 break;
6457         }
6458
6459         adev->gds.gws_size = 64;
6460         adev->gds.oa_size = 16;
6461 }
6462
6463 static void gfx_v9_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
6464                                                  u32 bitmap)
6465 {
6466         u32 data;
6467
6468         if (!bitmap)
6469                 return;
6470
6471         data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
6472         data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
6473
6474         WREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG, data);
6475 }
6476
6477 static u32 gfx_v9_0_get_cu_active_bitmap(struct amdgpu_device *adev)
6478 {
6479         u32 data, mask;
6480
6481         data = RREG32_SOC15(GC, 0, mmCC_GC_SHADER_ARRAY_CONFIG);
6482         data |= RREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG);
6483
6484         data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
6485         data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
6486
6487         mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh);
6488
6489         return (~data) & mask;
6490 }
6491
6492 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
6493                                  struct amdgpu_cu_info *cu_info)
6494 {
6495         int i, j, k, counter, active_cu_number = 0;
6496         u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
6497         unsigned disable_masks[4 * 4];
6498
6499         if (!adev || !cu_info)
6500                 return -EINVAL;
6501
6502         /*
6503          * 16 comes from bitmap array size 4*4, and it can cover all gfx9 ASICs
6504          */
6505         if (adev->gfx.config.max_shader_engines *
6506                 adev->gfx.config.max_sh_per_se > 16)
6507                 return -EINVAL;
6508
6509         amdgpu_gfx_parse_disable_cu(disable_masks,
6510                                     adev->gfx.config.max_shader_engines,
6511                                     adev->gfx.config.max_sh_per_se);
6512
6513         mutex_lock(&adev->grbm_idx_mutex);
6514         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
6515                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
6516                         mask = 1;
6517                         ao_bitmap = 0;
6518                         counter = 0;
6519                         gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
6520                         gfx_v9_0_set_user_cu_inactive_bitmap(
6521                                 adev, disable_masks[i * adev->gfx.config.max_sh_per_se + j]);
6522                         bitmap = gfx_v9_0_get_cu_active_bitmap(adev);
6523
6524                         /*
6525                          * The bitmap(and ao_cu_bitmap) in cu_info structure is
6526                          * 4x4 size array, and it's usually suitable for Vega
6527                          * ASICs which has 4*2 SE/SH layout.
6528                          * But for Arcturus, SE/SH layout is changed to 8*1.
6529                          * To mostly reduce the impact, we make it compatible
6530                          * with current bitmap array as below:
6531                          *    SE4,SH0 --> bitmap[0][1]
6532                          *    SE5,SH0 --> bitmap[1][1]
6533                          *    SE6,SH0 --> bitmap[2][1]
6534                          *    SE7,SH0 --> bitmap[3][1]
6535                          */
6536                         cu_info->bitmap[i % 4][j + i / 4] = bitmap;
6537
6538                         for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
6539                                 if (bitmap & mask) {
6540                                         if (counter < adev->gfx.config.max_cu_per_sh)
6541                                                 ao_bitmap |= mask;
6542                                         counter ++;
6543                                 }
6544                                 mask <<= 1;
6545                         }
6546                         active_cu_number += counter;
6547                         if (i < 2 && j < 2)
6548                                 ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
6549                         cu_info->ao_cu_bitmap[i % 4][j + i / 4] = ao_bitmap;
6550                 }
6551         }
6552         gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
6553         mutex_unlock(&adev->grbm_idx_mutex);
6554
6555         cu_info->number = active_cu_number;
6556         cu_info->ao_cu_mask = ao_cu_mask;
6557         cu_info->simd_per_cu = NUM_SIMD_PER_CU;
6558
6559         return 0;
6560 }
6561
6562 const struct amdgpu_ip_block_version gfx_v9_0_ip_block =
6563 {
6564         .type = AMD_IP_BLOCK_TYPE_GFX,
6565         .major = 9,
6566         .minor = 0,
6567         .rev = 0,
6568         .funcs = &gfx_v9_0_ip_funcs,
6569 };
This page took 0.441838 seconds and 4 git commands to generate.