]> Git Repo - linux.git/blob - drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c
Merge tag 'sh-for-v6.12-tag1' of git://git.kernel.org/pub/scm/linux/kernel/git/glaubi...
[linux.git] / drivers / gpu / drm / amd / amdgpu / gfx_v12_0.c
1 /*
2  * Copyright 2023 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23 #include <linux/delay.h>
24 #include <linux/kernel.h>
25 #include <linux/firmware.h>
26 #include <linux/module.h>
27 #include <linux/pci.h>
28 #include "amdgpu.h"
29 #include "amdgpu_gfx.h"
30 #include "amdgpu_psp.h"
31 #include "amdgpu_smu.h"
32 #include "amdgpu_atomfirmware.h"
33 #include "imu_v12_0.h"
34 #include "soc24.h"
35 #include "nvd.h"
36
37 #include "gc/gc_12_0_0_offset.h"
38 #include "gc/gc_12_0_0_sh_mask.h"
39 #include "soc24_enum.h"
40 #include "ivsrcid/gfx/irqsrcs_gfx_11_0_0.h"
41
42 #include "soc15.h"
43 #include "soc15d.h"
44 #include "clearstate_gfx12.h"
45 #include "v12_structs.h"
46 #include "gfx_v12_0.h"
47 #include "nbif_v6_3_1.h"
48 #include "mes_v12_0.h"
49
50 #define GFX12_NUM_GFX_RINGS     1
51 #define GFX12_MEC_HPD_SIZE      2048
52
53 #define RLCG_UCODE_LOADING_START_ADDRESS        0x00002000L
54
55 MODULE_FIRMWARE("amdgpu/gc_12_0_0_pfp.bin");
56 MODULE_FIRMWARE("amdgpu/gc_12_0_0_me.bin");
57 MODULE_FIRMWARE("amdgpu/gc_12_0_0_mec.bin");
58 MODULE_FIRMWARE("amdgpu/gc_12_0_0_rlc.bin");
59 MODULE_FIRMWARE("amdgpu/gc_12_0_0_toc.bin");
60 MODULE_FIRMWARE("amdgpu/gc_12_0_1_pfp.bin");
61 MODULE_FIRMWARE("amdgpu/gc_12_0_1_me.bin");
62 MODULE_FIRMWARE("amdgpu/gc_12_0_1_mec.bin");
63 MODULE_FIRMWARE("amdgpu/gc_12_0_1_rlc.bin");
64 MODULE_FIRMWARE("amdgpu/gc_12_0_1_toc.bin");
65
66 static const struct amdgpu_hwip_reg_entry gc_reg_list_12_0[] = {
67         SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS),
68         SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS2),
69         SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS3),
70         SOC15_REG_ENTRY_STR(GC, 0, regCP_STALLED_STAT1),
71         SOC15_REG_ENTRY_STR(GC, 0, regCP_STALLED_STAT2),
72         SOC15_REG_ENTRY_STR(GC, 0, regCP_STALLED_STAT3),
73         SOC15_REG_ENTRY_STR(GC, 0, regCP_CPC_STALLED_STAT1),
74         SOC15_REG_ENTRY_STR(GC, 0, regCP_CPF_STALLED_STAT1),
75         SOC15_REG_ENTRY_STR(GC, 0, regCP_BUSY_STAT),
76         SOC15_REG_ENTRY_STR(GC, 0, regCP_CPC_BUSY_STAT),
77         SOC15_REG_ENTRY_STR(GC, 0, regCP_CPF_BUSY_STAT),
78         SOC15_REG_ENTRY_STR(GC, 0, regCP_CPC_BUSY_STAT2),
79         SOC15_REG_ENTRY_STR(GC, 0, regCP_CPF_BUSY_STAT2),
80         SOC15_REG_ENTRY_STR(GC, 0, regCP_CPF_STATUS),
81         SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_ERROR),
82         SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HPD_STATUS0),
83         SOC15_REG_ENTRY_STR(GC, 0, regCP_RB_BASE),
84         SOC15_REG_ENTRY_STR(GC, 0, regCP_RB_RPTR),
85         SOC15_REG_ENTRY_STR(GC, 0, regCP_RB_WPTR),
86         SOC15_REG_ENTRY_STR(GC, 0, regCP_RB0_BASE),
87         SOC15_REG_ENTRY_STR(GC, 0, regCP_RB0_RPTR),
88         SOC15_REG_ENTRY_STR(GC, 0, regCP_RB0_WPTR),
89         SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_CMD_BUFSZ),
90         SOC15_REG_ENTRY_STR(GC, 0, regCP_IB2_CMD_BUFSZ),
91         SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BASE_LO),
92         SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BASE_HI),
93         SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BUFSZ),
94         SOC15_REG_ENTRY_STR(GC, 0, regCP_IB2_BASE_LO),
95         SOC15_REG_ENTRY_STR(GC, 0, regCP_IB2_BASE_HI),
96         SOC15_REG_ENTRY_STR(GC, 0, regCP_IB2_BUFSZ),
97         SOC15_REG_ENTRY_STR(GC, 0, regCPF_UTCL1_STATUS),
98         SOC15_REG_ENTRY_STR(GC, 0, regCPC_UTCL1_STATUS),
99         SOC15_REG_ENTRY_STR(GC, 0, regCPG_UTCL1_STATUS),
100         SOC15_REG_ENTRY_STR(GC, 0, regIA_UTCL1_STATUS),
101         SOC15_REG_ENTRY_STR(GC, 0, regIA_UTCL1_STATUS_2),
102         SOC15_REG_ENTRY_STR(GC, 0, regPA_CL_CNTL_STATUS),
103         SOC15_REG_ENTRY_STR(GC, 0, regRMI_UTCL1_STATUS),
104         SOC15_REG_ENTRY_STR(GC, 0, regSQC_CACHES),
105         SOC15_REG_ENTRY_STR(GC, 0, regSQG_STATUS),
106         SOC15_REG_ENTRY_STR(GC, 0, regWD_UTCL1_STATUS),
107         SOC15_REG_ENTRY_STR(GC, 0, regGCVM_L2_PROTECTION_FAULT_CNTL),
108         SOC15_REG_ENTRY_STR(GC, 0, regGCVM_L2_PROTECTION_FAULT_STATUS_LO32),
109         SOC15_REG_ENTRY_STR(GC, 0, regGCVM_L2_PROTECTION_FAULT_STATUS_HI32),
110         SOC15_REG_ENTRY_STR(GC, 0, regCP_DEBUG),
111         SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_CNTL),
112         SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_CNTL),
113         SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_INSTR_PNTR),
114         SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_INSTR_PNTR),
115         SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_INSTR_PNTR),
116         SOC15_REG_ENTRY_STR(GC, 0, regCP_CPC_STATUS),
117         SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_RS64_INSTR_PNTR0),
118         SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_RS64_INSTR_PNTR1),
119         SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_RS64_INSTR_PNTR),
120
121         /* cp header registers */
122         SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_ME1_HEADER_DUMP),
123         SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP),
124         SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
125         SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP),
126         /* SE status registers */
127         SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE0),
128         SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE1),
129         SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE2),
130         SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE3)
131 };
132
133 static const struct amdgpu_hwip_reg_entry gc_cp_reg_list_12[] = {
134         /* compute registers */
135         SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_VMID),
136         SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PERSISTENT_STATE),
137         SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PIPE_PRIORITY),
138         SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_QUEUE_PRIORITY),
139         SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_QUANTUM),
140         SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_BASE),
141         SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_BASE_HI),
142         SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_RPTR),
143         SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_WPTR_POLL_ADDR),
144         SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_WPTR_POLL_ADDR_HI),
145         SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL),
146         SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_CONTROL),
147         SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_IB_BASE_ADDR),
148         SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_IB_BASE_ADDR_HI),
149         SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_IB_RPTR),
150         SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_IB_CONTROL),
151         SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_DEQUEUE_REQUEST),
152         SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_BASE_ADDR),
153         SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_BASE_ADDR_HI),
154         SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_CONTROL),
155         SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_RPTR),
156         SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_WPTR),
157         SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_EVENTS),
158         SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CTX_SAVE_BASE_ADDR_LO),
159         SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CTX_SAVE_BASE_ADDR_HI),
160         SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CTX_SAVE_CONTROL),
161         SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CNTL_STACK_OFFSET),
162         SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CNTL_STACK_SIZE),
163         SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_WG_STATE_OFFSET),
164         SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CTX_SAVE_SIZE),
165         SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_GDS_RESOURCE_STATE),
166         SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_ERROR),
167         SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_WPTR_MEM),
168         SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_WPTR_LO),
169         SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_WPTR_HI),
170         SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_SUSPEND_CNTL_STACK_OFFSET),
171         SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_SUSPEND_CNTL_STACK_DW_CNT),
172         SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_SUSPEND_WG_STATE_OFFSET),
173         SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_DEQUEUE_STATUS)
174 };
175
176 static const struct amdgpu_hwip_reg_entry gc_gfx_queue_reg_list_12[] = {
177         /* gfx queue registers */
178         SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_ACTIVE),
179         SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_VMID),
180         SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_QUEUE_PRIORITY),
181         SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_QUANTUM),
182         SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_BASE),
183         SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_BASE_HI),
184         SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_OFFSET),
185         SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_CNTL),
186         SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_CSMD_RPTR),
187         SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_WPTR),
188         SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_WPTR_HI),
189         SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_DEQUEUE_REQUEST),
190         SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_MAPPED),
191         SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_QUE_MGR_CONTROL),
192         SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_HQ_CONTROL0),
193         SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_HQ_STATUS0),
194         SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_MQD_BASE_ADDR),
195         SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_MQD_BASE_ADDR_HI),
196         SOC15_REG_ENTRY_STR(GC, 0, regCP_RB_WPTR_POLL_ADDR_LO),
197         SOC15_REG_ENTRY_STR(GC, 0, regCP_RB_WPTR_POLL_ADDR_HI),
198         SOC15_REG_ENTRY_STR(GC, 0, regCP_RB_RPTR),
199         SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BASE_LO),
200         SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BASE_HI),
201         SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_CMD_BUFSZ),
202         SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BUFSZ)
203 };
204
205 static const struct soc15_reg_golden golden_settings_gc_12_0[] = {
206         SOC15_REG_GOLDEN_VALUE(GC, 0, regDB_MEM_CONFIG, 0x0000000f, 0x0000000f),
207         SOC15_REG_GOLDEN_VALUE(GC, 0, regCB_HW_CONTROL_1, 0x03000000, 0x03000000),
208         SOC15_REG_GOLDEN_VALUE(GC, 0, regGL2C_CTRL5, 0x00000070, 0x00000020)
209 };
210
211 #define DEFAULT_SH_MEM_CONFIG \
212         ((SH_MEM_ADDRESS_MODE_64 << SH_MEM_CONFIG__ADDRESS_MODE__SHIFT) | \
213          (SH_MEM_ALIGNMENT_MODE_UNALIGNED << SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT) | \
214          (3 << SH_MEM_CONFIG__INITIAL_INST_PREFETCH__SHIFT))
215
216 static void gfx_v12_0_disable_gpa_mode(struct amdgpu_device *adev);
217 static void gfx_v12_0_set_ring_funcs(struct amdgpu_device *adev);
218 static void gfx_v12_0_set_irq_funcs(struct amdgpu_device *adev);
219 static void gfx_v12_0_set_rlc_funcs(struct amdgpu_device *adev);
220 static void gfx_v12_0_set_mqd_funcs(struct amdgpu_device *adev);
221 static void gfx_v12_0_set_imu_funcs(struct amdgpu_device *adev);
222 static int gfx_v12_0_get_cu_info(struct amdgpu_device *adev,
223                                  struct amdgpu_cu_info *cu_info);
224 static uint64_t gfx_v12_0_get_gpu_clock_counter(struct amdgpu_device *adev);
225 static void gfx_v12_0_select_se_sh(struct amdgpu_device *adev, u32 se_num,
226                                    u32 sh_num, u32 instance, int xcc_id);
227 static u32 gfx_v12_0_get_wgp_active_bitmap_per_sh(struct amdgpu_device *adev);
228
229 static void gfx_v12_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start, bool secure);
230 static void gfx_v12_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
231                                      uint32_t val);
232 static int gfx_v12_0_wait_for_rlc_autoload_complete(struct amdgpu_device *adev);
233 static void gfx_v12_0_ring_invalidate_tlbs(struct amdgpu_ring *ring,
234                                            uint16_t pasid, uint32_t flush_type,
235                                            bool all_hub, uint8_t dst_sel);
236 static void gfx_v12_0_set_safe_mode(struct amdgpu_device *adev, int xcc_id);
237 static void gfx_v12_0_unset_safe_mode(struct amdgpu_device *adev, int xcc_id);
238 static void gfx_v12_0_update_perf_clk(struct amdgpu_device *adev,
239                                       bool enable);
240
241 static void gfx_v12_0_kiq_set_resources(struct amdgpu_ring *kiq_ring,
242                                         uint64_t queue_mask)
243 {
244         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
245         amdgpu_ring_write(kiq_ring, PACKET3_SET_RESOURCES_VMID_MASK(0) |
246                           PACKET3_SET_RESOURCES_QUEUE_TYPE(0)); /* vmid_mask:0 queue_type:0 (KIQ) */
247         amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask)); /* queue mask lo */
248         amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask)); /* queue mask hi */
249         amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */
250         amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */
251         amdgpu_ring_write(kiq_ring, 0); /* oac mask */
252         amdgpu_ring_write(kiq_ring, 0);
253 }
254
255 static void gfx_v12_0_kiq_map_queues(struct amdgpu_ring *kiq_ring,
256                                      struct amdgpu_ring *ring)
257 {
258         uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
259         uint64_t wptr_addr = ring->wptr_gpu_addr;
260         uint32_t me = 0, eng_sel = 0;
261
262         switch (ring->funcs->type) {
263         case AMDGPU_RING_TYPE_COMPUTE:
264                 me = 1;
265                 eng_sel = 0;
266                 break;
267         case AMDGPU_RING_TYPE_GFX:
268                 me = 0;
269                 eng_sel = 4;
270                 break;
271         case AMDGPU_RING_TYPE_MES:
272                 me = 2;
273                 eng_sel = 5;
274                 break;
275         default:
276                 WARN_ON(1);
277         }
278
279         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
280         /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
281         amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
282                           PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */
283                           PACKET3_MAP_QUEUES_VMID(0) | /* VMID */
284                           PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
285                           PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
286                           PACKET3_MAP_QUEUES_ME((me)) |
287                           PACKET3_MAP_QUEUES_QUEUE_TYPE(0) | /*queue_type: normal compute queue */
288                           PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) | /* alloc format: all_on_one_pipe */
289                           PACKET3_MAP_QUEUES_ENGINE_SEL(eng_sel) |
290                           PACKET3_MAP_QUEUES_NUM_QUEUES(1)); /* num_queues: must be 1 */
291         amdgpu_ring_write(kiq_ring, PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index));
292         amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
293         amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
294         amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
295         amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
296 }
297
298 static void gfx_v12_0_kiq_unmap_queues(struct amdgpu_ring *kiq_ring,
299                                        struct amdgpu_ring *ring,
300                                        enum amdgpu_unmap_queues_action action,
301                                        u64 gpu_addr, u64 seq)
302 {
303         struct amdgpu_device *adev = kiq_ring->adev;
304         uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
305
306         if (adev->enable_mes && !adev->gfx.kiq[0].ring.sched.ready) {
307                 amdgpu_mes_unmap_legacy_queue(adev, ring, action, gpu_addr, seq);
308                 return;
309         }
310
311         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
312         amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
313                           PACKET3_UNMAP_QUEUES_ACTION(action) |
314                           PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) |
315                           PACKET3_UNMAP_QUEUES_ENGINE_SEL(eng_sel) |
316                           PACKET3_UNMAP_QUEUES_NUM_QUEUES(1));
317         amdgpu_ring_write(kiq_ring,
318                   PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
319
320         if (action == PREEMPT_QUEUES_NO_UNMAP) {
321                 amdgpu_ring_write(kiq_ring, lower_32_bits(gpu_addr));
322                 amdgpu_ring_write(kiq_ring, upper_32_bits(gpu_addr));
323                 amdgpu_ring_write(kiq_ring, seq);
324         } else {
325                 amdgpu_ring_write(kiq_ring, 0);
326                 amdgpu_ring_write(kiq_ring, 0);
327                 amdgpu_ring_write(kiq_ring, 0);
328         }
329 }
330
331 static void gfx_v12_0_kiq_query_status(struct amdgpu_ring *kiq_ring,
332                                        struct amdgpu_ring *ring,
333                                        u64 addr, u64 seq)
334 {
335         uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
336
337         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_QUERY_STATUS, 5));
338         amdgpu_ring_write(kiq_ring,
339                           PACKET3_QUERY_STATUS_CONTEXT_ID(0) |
340                           PACKET3_QUERY_STATUS_INTERRUPT_SEL(0) |
341                           PACKET3_QUERY_STATUS_COMMAND(2));
342         amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
343                           PACKET3_QUERY_STATUS_DOORBELL_OFFSET(ring->doorbell_index) |
344                           PACKET3_QUERY_STATUS_ENG_SEL(eng_sel));
345         amdgpu_ring_write(kiq_ring, lower_32_bits(addr));
346         amdgpu_ring_write(kiq_ring, upper_32_bits(addr));
347         amdgpu_ring_write(kiq_ring, lower_32_bits(seq));
348         amdgpu_ring_write(kiq_ring, upper_32_bits(seq));
349 }
350
351 static void gfx_v12_0_kiq_invalidate_tlbs(struct amdgpu_ring *kiq_ring,
352                                           uint16_t pasid,
353                                           uint32_t flush_type,
354                                           bool all_hub)
355 {
356         gfx_v12_0_ring_invalidate_tlbs(kiq_ring, pasid, flush_type, all_hub, 1);
357 }
358
359 static const struct kiq_pm4_funcs gfx_v12_0_kiq_pm4_funcs = {
360         .kiq_set_resources = gfx_v12_0_kiq_set_resources,
361         .kiq_map_queues = gfx_v12_0_kiq_map_queues,
362         .kiq_unmap_queues = gfx_v12_0_kiq_unmap_queues,
363         .kiq_query_status = gfx_v12_0_kiq_query_status,
364         .kiq_invalidate_tlbs = gfx_v12_0_kiq_invalidate_tlbs,
365         .set_resources_size = 8,
366         .map_queues_size = 7,
367         .unmap_queues_size = 6,
368         .query_status_size = 7,
369         .invalidate_tlbs_size = 2,
370 };
371
372 static void gfx_v12_0_set_kiq_pm4_funcs(struct amdgpu_device *adev)
373 {
374         adev->gfx.kiq[0].pmf = &gfx_v12_0_kiq_pm4_funcs;
375 }
376
377 static void gfx_v12_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel,
378                                    int mem_space, int opt, uint32_t addr0,
379                                    uint32_t addr1, uint32_t ref,
380                                    uint32_t mask, uint32_t inv)
381 {
382         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
383         amdgpu_ring_write(ring,
384                           /* memory (1) or register (0) */
385                           (WAIT_REG_MEM_MEM_SPACE(mem_space) |
386                            WAIT_REG_MEM_OPERATION(opt) | /* wait */
387                            WAIT_REG_MEM_FUNCTION(3) |  /* equal */
388                            WAIT_REG_MEM_ENGINE(eng_sel)));
389
390         if (mem_space)
391                 BUG_ON(addr0 & 0x3); /* Dword align */
392         amdgpu_ring_write(ring, addr0);
393         amdgpu_ring_write(ring, addr1);
394         amdgpu_ring_write(ring, ref);
395         amdgpu_ring_write(ring, mask);
396         amdgpu_ring_write(ring, inv); /* poll interval */
397 }
398
399 static int gfx_v12_0_ring_test_ring(struct amdgpu_ring *ring)
400 {
401         struct amdgpu_device *adev = ring->adev;
402         uint32_t scratch = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG0);
403         uint32_t tmp = 0;
404         unsigned i;
405         int r;
406
407         WREG32(scratch, 0xCAFEDEAD);
408         r = amdgpu_ring_alloc(ring, 5);
409         if (r) {
410                 dev_err(adev->dev,
411                         "amdgpu: cp failed to lock ring %d (%d).\n",
412                         ring->idx, r);
413                 return r;
414         }
415
416         if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ) {
417                 gfx_v12_0_ring_emit_wreg(ring, scratch, 0xDEADBEEF);
418         } else {
419                 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
420                 amdgpu_ring_write(ring, scratch -
421                                   PACKET3_SET_UCONFIG_REG_START);
422                 amdgpu_ring_write(ring, 0xDEADBEEF);
423         }
424         amdgpu_ring_commit(ring);
425
426         for (i = 0; i < adev->usec_timeout; i++) {
427                 tmp = RREG32(scratch);
428                 if (tmp == 0xDEADBEEF)
429                         break;
430                 if (amdgpu_emu_mode == 1)
431                         msleep(1);
432                 else
433                         udelay(1);
434         }
435
436         if (i >= adev->usec_timeout)
437                 r = -ETIMEDOUT;
438         return r;
439 }
440
441 static int gfx_v12_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
442 {
443         struct amdgpu_device *adev = ring->adev;
444         struct amdgpu_ib ib;
445         struct dma_fence *f = NULL;
446         unsigned index;
447         uint64_t gpu_addr;
448         volatile uint32_t *cpu_ptr;
449         long r;
450
451         /* MES KIQ fw hasn't indirect buffer support for now */
452         if (adev->enable_mes_kiq &&
453             ring->funcs->type == AMDGPU_RING_TYPE_KIQ)
454                 return 0;
455
456         memset(&ib, 0, sizeof(ib));
457
458         if (ring->is_mes_queue) {
459                 uint32_t padding, offset;
460
461                 offset = amdgpu_mes_ctx_get_offs(ring, AMDGPU_MES_CTX_IB_OFFS);
462                 padding = amdgpu_mes_ctx_get_offs(ring,
463                                                   AMDGPU_MES_CTX_PADDING_OFFS);
464
465                 ib.gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
466                 ib.ptr = amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset);
467
468                 gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, padding);
469                 cpu_ptr = amdgpu_mes_ctx_get_offs_cpu_addr(ring, padding);
470                 *cpu_ptr = cpu_to_le32(0xCAFEDEAD);
471         } else {
472                 r = amdgpu_device_wb_get(adev, &index);
473                 if (r)
474                         return r;
475
476                 gpu_addr = adev->wb.gpu_addr + (index * 4);
477                 adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
478                 cpu_ptr = &adev->wb.wb[index];
479
480                 r = amdgpu_ib_get(adev, NULL, 16, AMDGPU_IB_POOL_DIRECT, &ib);
481                 if (r) {
482                         dev_err(adev->dev, "amdgpu: failed to get ib (%ld).\n", r);
483                         goto err1;
484                 }
485         }
486
487         ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3);
488         ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
489         ib.ptr[2] = lower_32_bits(gpu_addr);
490         ib.ptr[3] = upper_32_bits(gpu_addr);
491         ib.ptr[4] = 0xDEADBEEF;
492         ib.length_dw = 5;
493
494         r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
495         if (r)
496                 goto err2;
497
498         r = dma_fence_wait_timeout(f, false, timeout);
499         if (r == 0) {
500                 r = -ETIMEDOUT;
501                 goto err2;
502         } else if (r < 0) {
503                 goto err2;
504         }
505
506         if (le32_to_cpu(*cpu_ptr) == 0xDEADBEEF)
507                 r = 0;
508         else
509                 r = -EINVAL;
510 err2:
511         if (!ring->is_mes_queue)
512                 amdgpu_ib_free(adev, &ib, NULL);
513         dma_fence_put(f);
514 err1:
515         if (!ring->is_mes_queue)
516                 amdgpu_device_wb_free(adev, index);
517         return r;
518 }
519
520 static void gfx_v12_0_free_microcode(struct amdgpu_device *adev)
521 {
522         amdgpu_ucode_release(&adev->gfx.pfp_fw);
523         amdgpu_ucode_release(&adev->gfx.me_fw);
524         amdgpu_ucode_release(&adev->gfx.rlc_fw);
525         amdgpu_ucode_release(&adev->gfx.mec_fw);
526
527         kfree(adev->gfx.rlc.register_list_format);
528 }
529
530 static int gfx_v12_0_init_toc_microcode(struct amdgpu_device *adev, const char *ucode_prefix)
531 {
532         const struct psp_firmware_header_v1_0 *toc_hdr;
533         int err = 0;
534
535         err = amdgpu_ucode_request(adev, &adev->psp.toc_fw,
536                                    "amdgpu/%s_toc.bin", ucode_prefix);
537         if (err)
538                 goto out;
539
540         toc_hdr = (const struct psp_firmware_header_v1_0 *)adev->psp.toc_fw->data;
541         adev->psp.toc.fw_version = le32_to_cpu(toc_hdr->header.ucode_version);
542         adev->psp.toc.feature_version = le32_to_cpu(toc_hdr->sos.fw_version);
543         adev->psp.toc.size_bytes = le32_to_cpu(toc_hdr->header.ucode_size_bytes);
544         adev->psp.toc.start_addr = (uint8_t *)toc_hdr +
545                         le32_to_cpu(toc_hdr->header.ucode_array_offset_bytes);
546         return 0;
547 out:
548         amdgpu_ucode_release(&adev->psp.toc_fw);
549         return err;
550 }
551
552 static int gfx_v12_0_init_microcode(struct amdgpu_device *adev)
553 {
554         char ucode_prefix[15];
555         int err;
556         const struct rlc_firmware_header_v2_0 *rlc_hdr;
557         uint16_t version_major;
558         uint16_t version_minor;
559
560         DRM_DEBUG("\n");
561
562         amdgpu_ucode_ip_version_decode(adev, GC_HWIP, ucode_prefix, sizeof(ucode_prefix));
563
564         err = amdgpu_ucode_request(adev, &adev->gfx.pfp_fw,
565                                    "amdgpu/%s_pfp.bin", ucode_prefix);
566         if (err)
567                 goto out;
568         amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_PFP);
569         amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_PFP_P0_STACK);
570
571         err = amdgpu_ucode_request(adev, &adev->gfx.me_fw,
572                                    "amdgpu/%s_me.bin", ucode_prefix);
573         if (err)
574                 goto out;
575         amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_ME);
576         amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_ME_P0_STACK);
577
578         if (!amdgpu_sriov_vf(adev)) {
579                 err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw,
580                                            "amdgpu/%s_rlc.bin", ucode_prefix);
581                 if (err)
582                         goto out;
583                 rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
584                 version_major = le16_to_cpu(rlc_hdr->header.header_version_major);
585                 version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor);
586                 err = amdgpu_gfx_rlc_init_microcode(adev, version_major, version_minor);
587                 if (err)
588                         goto out;
589         }
590
591         err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw,
592                                    "amdgpu/%s_mec.bin", ucode_prefix);
593         if (err)
594                 goto out;
595         amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_MEC);
596         amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_MEC_P0_STACK);
597         amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_MEC_P1_STACK);
598
599         if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO)
600                 err = gfx_v12_0_init_toc_microcode(adev, ucode_prefix);
601
602         /* only one MEC for gfx 12 */
603         adev->gfx.mec2_fw = NULL;
604
605         if (adev->gfx.imu.funcs) {
606                 if (adev->gfx.imu.funcs->init_microcode) {
607                         err = adev->gfx.imu.funcs->init_microcode(adev);
608                         if (err)
609                                 dev_err(adev->dev, "Failed to load imu firmware!\n");
610                 }
611         }
612
613 out:
614         if (err) {
615                 amdgpu_ucode_release(&adev->gfx.pfp_fw);
616                 amdgpu_ucode_release(&adev->gfx.me_fw);
617                 amdgpu_ucode_release(&adev->gfx.rlc_fw);
618                 amdgpu_ucode_release(&adev->gfx.mec_fw);
619         }
620
621         return err;
622 }
623
624 static u32 gfx_v12_0_get_csb_size(struct amdgpu_device *adev)
625 {
626         u32 count = 0;
627         const struct cs_section_def *sect = NULL;
628         const struct cs_extent_def *ext = NULL;
629
630         count += 1;
631
632         for (sect = gfx12_cs_data; sect->section != NULL; ++sect) {
633                 if (sect->id == SECT_CONTEXT) {
634                         for (ext = sect->section; ext->extent != NULL; ++ext)
635                                 count += 2 + ext->reg_count;
636                 } else
637                         return 0;
638         }
639
640         return count;
641 }
642
643 static void gfx_v12_0_get_csb_buffer(struct amdgpu_device *adev,
644                                      volatile u32 *buffer)
645 {
646         u32 count = 0, clustercount = 0, i;
647         const struct cs_section_def *sect = NULL;
648         const struct cs_extent_def *ext = NULL;
649
650         if (adev->gfx.rlc.cs_data == NULL)
651                 return;
652         if (buffer == NULL)
653                 return;
654
655         count += 1;
656
657         for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
658                 if (sect->id == SECT_CONTEXT) {
659                         for (ext = sect->section; ext->extent != NULL; ++ext) {
660                                 clustercount++;
661                                 buffer[count++] = ext->reg_count;
662                                 buffer[count++] = ext->reg_index;
663
664                                 for (i = 0; i < ext->reg_count; i++)
665                                         buffer[count++] = cpu_to_le32(ext->extent[i]);
666                         }
667                 } else
668                         return;
669         }
670
671         buffer[0] = clustercount;
672 }
673
674 static void gfx_v12_0_rlc_fini(struct amdgpu_device *adev)
675 {
676         /* clear state block */
677         amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj,
678                         &adev->gfx.rlc.clear_state_gpu_addr,
679                         (void **)&adev->gfx.rlc.cs_ptr);
680
681         /* jump table block */
682         amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj,
683                         &adev->gfx.rlc.cp_table_gpu_addr,
684                         (void **)&adev->gfx.rlc.cp_table_ptr);
685 }
686
687 static void gfx_v12_0_init_rlcg_reg_access_ctrl(struct amdgpu_device *adev)
688 {
689         struct amdgpu_rlcg_reg_access_ctrl *reg_access_ctrl;
690
691         reg_access_ctrl = &adev->gfx.rlc.reg_access_ctrl[0];
692         reg_access_ctrl->scratch_reg0 = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG0);
693         reg_access_ctrl->scratch_reg1 = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG1);
694         reg_access_ctrl->scratch_reg2 = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG2);
695         reg_access_ctrl->scratch_reg3 = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG3);
696         reg_access_ctrl->grbm_cntl = SOC15_REG_OFFSET(GC, 0, regGRBM_GFX_CNTL);
697         reg_access_ctrl->grbm_idx = SOC15_REG_OFFSET(GC, 0, regGRBM_GFX_INDEX);
698         reg_access_ctrl->spare_int = SOC15_REG_OFFSET(GC, 0, regRLC_SPARE_INT_0);
699         adev->gfx.rlc.rlcg_reg_access_supported = true;
700 }
701
702 static int gfx_v12_0_rlc_init(struct amdgpu_device *adev)
703 {
704         const struct cs_section_def *cs_data;
705         int r;
706
707         adev->gfx.rlc.cs_data = gfx12_cs_data;
708
709         cs_data = adev->gfx.rlc.cs_data;
710
711         if (cs_data) {
712                 /* init clear state block */
713                 r = amdgpu_gfx_rlc_init_csb(adev);
714                 if (r)
715                         return r;
716         }
717
718         /* init spm vmid with 0xf */
719         if (adev->gfx.rlc.funcs->update_spm_vmid)
720                 adev->gfx.rlc.funcs->update_spm_vmid(adev, NULL, 0xf);
721
722         return 0;
723 }
724
725 static void gfx_v12_0_mec_fini(struct amdgpu_device *adev)
726 {
727         amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
728         amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_obj, NULL, NULL);
729         amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_data_obj, NULL, NULL);
730 }
731
732 static void gfx_v12_0_me_init(struct amdgpu_device *adev)
733 {
734         bitmap_zero(adev->gfx.me.queue_bitmap, AMDGPU_MAX_GFX_QUEUES);
735
736         amdgpu_gfx_graphics_queue_acquire(adev);
737 }
738
739 static int gfx_v12_0_mec_init(struct amdgpu_device *adev)
740 {
741         int r;
742         u32 *hpd;
743         size_t mec_hpd_size;
744
745         bitmap_zero(adev->gfx.mec_bitmap[0].queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
746
747         /* take ownership of the relevant compute queues */
748         amdgpu_gfx_compute_queue_acquire(adev);
749         mec_hpd_size = adev->gfx.num_compute_rings * GFX12_MEC_HPD_SIZE;
750
751         if (mec_hpd_size) {
752                 r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
753                                               AMDGPU_GEM_DOMAIN_GTT,
754                                               &adev->gfx.mec.hpd_eop_obj,
755                                               &adev->gfx.mec.hpd_eop_gpu_addr,
756                                               (void **)&hpd);
757                 if (r) {
758                         dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
759                         gfx_v12_0_mec_fini(adev);
760                         return r;
761                 }
762
763                 memset(hpd, 0, mec_hpd_size);
764
765                 amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
766                 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
767         }
768
769         return 0;
770 }
771
772 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t wave, uint32_t address)
773 {
774         WREG32_SOC15(GC, 0, regSQ_IND_INDEX,
775                 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
776                 (address << SQ_IND_INDEX__INDEX__SHIFT));
777         return RREG32_SOC15(GC, 0, regSQ_IND_DATA);
778 }
779
780 static void wave_read_regs(struct amdgpu_device *adev, uint32_t wave,
781                            uint32_t thread, uint32_t regno,
782                            uint32_t num, uint32_t *out)
783 {
784         WREG32_SOC15(GC, 0, regSQ_IND_INDEX,
785                 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
786                 (regno << SQ_IND_INDEX__INDEX__SHIFT) |
787                 (thread << SQ_IND_INDEX__WORKITEM_ID__SHIFT) |
788                 (SQ_IND_INDEX__AUTO_INCR_MASK));
789         while (num--)
790                 *(out++) = RREG32_SOC15(GC, 0, regSQ_IND_DATA);
791 }
792
793 static void gfx_v12_0_read_wave_data(struct amdgpu_device *adev,
794                                      uint32_t xcc_id,
795                                      uint32_t simd, uint32_t wave,
796                                      uint32_t *dst, int *no_fields)
797 {
798         /* in gfx12 the SIMD_ID is specified as part of the INSTANCE
799          * field when performing a select_se_sh so it should be
800          * zero here */
801         WARN_ON(simd != 0);
802
803         /* type 4 wave data */
804         dst[(*no_fields)++] = 4;
805         dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_STATUS);
806         dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_PC_LO);
807         dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_PC_HI);
808         dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_EXEC_LO);
809         dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_EXEC_HI);
810         dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_HW_ID1);
811         dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_HW_ID2);
812         dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_GPR_ALLOC);
813         dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_LDS_ALLOC);
814         dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_IB_STS);
815         dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_IB_STS2);
816         dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_IB_DBG1);
817         dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_M0);
818         dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_MODE);
819         dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_STATE_PRIV);
820         dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_EXCP_FLAG_PRIV);
821         dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_EXCP_FLAG_USER);
822         dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_TRAP_CTRL);
823         dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_ACTIVE);
824         dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_VALID_AND_IDLE);
825         dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_DVGPR_ALLOC_LO);
826         dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_DVGPR_ALLOC_HI);
827         dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_SCHED_MODE);
828 }
829
830 static void gfx_v12_0_read_wave_sgprs(struct amdgpu_device *adev,
831                                       uint32_t xcc_id, uint32_t simd,
832                                       uint32_t wave, uint32_t start,
833                                       uint32_t size, uint32_t *dst)
834 {
835         WARN_ON(simd != 0);
836
837         wave_read_regs(
838                 adev, wave, 0, start + SQIND_WAVE_SGPRS_OFFSET, size,
839                 dst);
840 }
841
842 static void gfx_v12_0_read_wave_vgprs(struct amdgpu_device *adev,
843                                       uint32_t xcc_id, uint32_t simd,
844                                       uint32_t wave, uint32_t thread,
845                                       uint32_t start, uint32_t size,
846                                       uint32_t *dst)
847 {
848         wave_read_regs(
849                 adev, wave, thread,
850                 start + SQIND_WAVE_VGPRS_OFFSET, size, dst);
851 }
852
853 static void gfx_v12_0_select_me_pipe_q(struct amdgpu_device *adev,
854                                        u32 me, u32 pipe, u32 q, u32 vm, u32 xcc_id)
855 {
856         soc24_grbm_select(adev, me, pipe, q, vm);
857 }
858
859 static const struct amdgpu_gfx_funcs gfx_v12_0_gfx_funcs = {
860         .get_gpu_clock_counter = &gfx_v12_0_get_gpu_clock_counter,
861         .select_se_sh = &gfx_v12_0_select_se_sh,
862         .read_wave_data = &gfx_v12_0_read_wave_data,
863         .read_wave_sgprs = &gfx_v12_0_read_wave_sgprs,
864         .read_wave_vgprs = &gfx_v12_0_read_wave_vgprs,
865         .select_me_pipe_q = &gfx_v12_0_select_me_pipe_q,
866         .update_perfmon_mgcg = &gfx_v12_0_update_perf_clk,
867 };
868
869 static int gfx_v12_0_gpu_early_init(struct amdgpu_device *adev)
870 {
871
872         switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
873         case IP_VERSION(12, 0, 0):
874         case IP_VERSION(12, 0, 1):
875                 adev->gfx.config.max_hw_contexts = 8;
876                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
877                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
878                 adev->gfx.config.sc_hiz_tile_fifo_size = 0;
879                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
880                 break;
881         default:
882                 BUG();
883                 break;
884         }
885
886         return 0;
887 }
888
889 static int gfx_v12_0_gfx_ring_init(struct amdgpu_device *adev, int ring_id,
890                                    int me, int pipe, int queue)
891 {
892         int r;
893         struct amdgpu_ring *ring;
894         unsigned int irq_type;
895
896         ring = &adev->gfx.gfx_ring[ring_id];
897
898         ring->me = me;
899         ring->pipe = pipe;
900         ring->queue = queue;
901
902         ring->ring_obj = NULL;
903         ring->use_doorbell = true;
904
905         if (!ring_id)
906                 ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1;
907         else
908                 ring->doorbell_index = adev->doorbell_index.gfx_ring1 << 1;
909         ring->vm_hub = AMDGPU_GFXHUB(0);
910         sprintf(ring->name, "gfx_%d.%d.%d", ring->me, ring->pipe, ring->queue);
911
912         irq_type = AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP + ring->pipe;
913         r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, irq_type,
914                              AMDGPU_RING_PRIO_DEFAULT, NULL);
915         if (r)
916                 return r;
917         return 0;
918 }
919
920 static int gfx_v12_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
921                                        int mec, int pipe, int queue)
922 {
923         int r;
924         unsigned irq_type;
925         struct amdgpu_ring *ring;
926         unsigned int hw_prio;
927
928         ring = &adev->gfx.compute_ring[ring_id];
929
930         /* mec0 is me1 */
931         ring->me = mec + 1;
932         ring->pipe = pipe;
933         ring->queue = queue;
934
935         ring->ring_obj = NULL;
936         ring->use_doorbell = true;
937         ring->doorbell_index = (adev->doorbell_index.mec_ring0 + ring_id) << 1;
938         ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
939                                 + (ring_id * GFX12_MEC_HPD_SIZE);
940         ring->vm_hub = AMDGPU_GFXHUB(0);
941         sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
942
943         irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
944                 + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
945                 + ring->pipe;
946         hw_prio = amdgpu_gfx_is_high_priority_compute_queue(adev, ring) ?
947                         AMDGPU_GFX_PIPE_PRIO_HIGH : AMDGPU_GFX_PIPE_PRIO_NORMAL;
948         /* type-2 packets are deprecated on MEC, use type-3 instead */
949         r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, irq_type,
950                              hw_prio, NULL);
951         if (r)
952                 return r;
953
954         return 0;
955 }
956
957 static struct {
958         SOC24_FIRMWARE_ID       id;
959         unsigned int            offset;
960         unsigned int            size;
961         unsigned int            size_x16;
962 } rlc_autoload_info[SOC24_FIRMWARE_ID_MAX];
963
964 #define RLC_TOC_OFFSET_DWUNIT   8
965 #define RLC_SIZE_MULTIPLE       1024
966 #define RLC_TOC_UMF_SIZE_inM    23ULL
967 #define RLC_TOC_FORMAT_API      165ULL
968
969 static void gfx_v12_0_parse_rlc_toc(struct amdgpu_device *adev, void *rlc_toc)
970 {
971         RLC_TABLE_OF_CONTENT_V2 *ucode = rlc_toc;
972
973         while (ucode && (ucode->id > SOC24_FIRMWARE_ID_INVALID)) {
974                 rlc_autoload_info[ucode->id].id = ucode->id;
975                 rlc_autoload_info[ucode->id].offset =
976                         ucode->offset * RLC_TOC_OFFSET_DWUNIT * 4;
977                 rlc_autoload_info[ucode->id].size =
978                         ucode->size_x16 ? ucode->size * RLC_SIZE_MULTIPLE * 4 :
979                                           ucode->size * 4;
980                 ucode++;
981         }
982 }
983
984 static uint32_t gfx_v12_0_calc_toc_total_size(struct amdgpu_device *adev)
985 {
986         uint32_t total_size = 0;
987         SOC24_FIRMWARE_ID id;
988
989         gfx_v12_0_parse_rlc_toc(adev, adev->psp.toc.start_addr);
990
991         for (id = SOC24_FIRMWARE_ID_RLC_G_UCODE; id < SOC24_FIRMWARE_ID_MAX; id++)
992                 total_size += rlc_autoload_info[id].size;
993
994         /* In case the offset in rlc toc ucode is aligned */
995         if (total_size < rlc_autoload_info[SOC24_FIRMWARE_ID_MAX-1].offset)
996                 total_size = rlc_autoload_info[SOC24_FIRMWARE_ID_MAX-1].offset +
997                         rlc_autoload_info[SOC24_FIRMWARE_ID_MAX-1].size;
998         if (total_size < (RLC_TOC_UMF_SIZE_inM << 20))
999                 total_size = RLC_TOC_UMF_SIZE_inM << 20;
1000
1001         return total_size;
1002 }
1003
1004 static int gfx_v12_0_rlc_autoload_buffer_init(struct amdgpu_device *adev)
1005 {
1006         int r;
1007         uint32_t total_size;
1008
1009         total_size = gfx_v12_0_calc_toc_total_size(adev);
1010
1011         r = amdgpu_bo_create_reserved(adev, total_size, 64 * 1024,
1012                                       AMDGPU_GEM_DOMAIN_VRAM,
1013                                       &adev->gfx.rlc.rlc_autoload_bo,
1014                                       &adev->gfx.rlc.rlc_autoload_gpu_addr,
1015                                       (void **)&adev->gfx.rlc.rlc_autoload_ptr);
1016
1017         if (r) {
1018                 dev_err(adev->dev, "(%d) failed to create fw autoload bo\n", r);
1019                 return r;
1020         }
1021
1022         return 0;
1023 }
1024
1025 static void gfx_v12_0_rlc_backdoor_autoload_copy_ucode(struct amdgpu_device *adev,
1026                                                        SOC24_FIRMWARE_ID id,
1027                                                        const void *fw_data,
1028                                                        uint32_t fw_size)
1029 {
1030         uint32_t toc_offset;
1031         uint32_t toc_fw_size;
1032         char *ptr = adev->gfx.rlc.rlc_autoload_ptr;
1033
1034         if (id <= SOC24_FIRMWARE_ID_INVALID || id >= SOC24_FIRMWARE_ID_MAX)
1035                 return;
1036
1037         toc_offset = rlc_autoload_info[id].offset;
1038         toc_fw_size = rlc_autoload_info[id].size;
1039
1040         if (fw_size == 0)
1041                 fw_size = toc_fw_size;
1042
1043         if (fw_size > toc_fw_size)
1044                 fw_size = toc_fw_size;
1045
1046         memcpy(ptr + toc_offset, fw_data, fw_size);
1047
1048         if (fw_size < toc_fw_size)
1049                 memset(ptr + toc_offset + fw_size, 0, toc_fw_size - fw_size);
1050 }
1051
1052 static void
1053 gfx_v12_0_rlc_backdoor_autoload_copy_toc_ucode(struct amdgpu_device *adev)
1054 {
1055         void *data;
1056         uint32_t size;
1057         uint32_t *toc_ptr;
1058
1059         data = adev->psp.toc.start_addr;
1060         size = rlc_autoload_info[SOC24_FIRMWARE_ID_RLC_TOC].size;
1061
1062         toc_ptr = (uint32_t *)data + size / 4 - 2;
1063         *toc_ptr = (RLC_TOC_FORMAT_API << 24) | 0x1;
1064
1065         gfx_v12_0_rlc_backdoor_autoload_copy_ucode(adev, SOC24_FIRMWARE_ID_RLC_TOC,
1066                                                    data, size);
1067 }
1068
1069 static void
1070 gfx_v12_0_rlc_backdoor_autoload_copy_gfx_ucode(struct amdgpu_device *adev)
1071 {
1072         const __le32 *fw_data;
1073         uint32_t fw_size;
1074         const struct gfx_firmware_header_v2_0 *cpv2_hdr;
1075         const struct rlc_firmware_header_v2_0 *rlc_hdr;
1076         const struct rlc_firmware_header_v2_1 *rlcv21_hdr;
1077         const struct rlc_firmware_header_v2_2 *rlcv22_hdr;
1078         uint16_t version_major, version_minor;
1079
1080         /* pfp ucode */
1081         cpv2_hdr = (const struct gfx_firmware_header_v2_0 *)
1082                 adev->gfx.pfp_fw->data;
1083         /* instruction */
1084         fw_data = (const __le32 *)(adev->gfx.pfp_fw->data +
1085                 le32_to_cpu(cpv2_hdr->ucode_offset_bytes));
1086         fw_size = le32_to_cpu(cpv2_hdr->ucode_size_bytes);
1087         gfx_v12_0_rlc_backdoor_autoload_copy_ucode(adev, SOC24_FIRMWARE_ID_RS64_PFP,
1088                                                    fw_data, fw_size);
1089         /* data */
1090         fw_data = (const __le32 *)(adev->gfx.pfp_fw->data +
1091                 le32_to_cpu(cpv2_hdr->data_offset_bytes));
1092         fw_size = le32_to_cpu(cpv2_hdr->data_size_bytes);
1093         gfx_v12_0_rlc_backdoor_autoload_copy_ucode(adev, SOC24_FIRMWARE_ID_RS64_PFP_P0_STACK,
1094                                                    fw_data, fw_size);
1095         gfx_v12_0_rlc_backdoor_autoload_copy_ucode(adev, SOC24_FIRMWARE_ID_RS64_PFP_P1_STACK,
1096                                                    fw_data, fw_size);
1097         /* me ucode */
1098         cpv2_hdr = (const struct gfx_firmware_header_v2_0 *)
1099                 adev->gfx.me_fw->data;
1100         /* instruction */
1101         fw_data = (const __le32 *)(adev->gfx.me_fw->data +
1102                 le32_to_cpu(cpv2_hdr->ucode_offset_bytes));
1103         fw_size = le32_to_cpu(cpv2_hdr->ucode_size_bytes);
1104         gfx_v12_0_rlc_backdoor_autoload_copy_ucode(adev, SOC24_FIRMWARE_ID_RS64_ME,
1105                                                    fw_data, fw_size);
1106         /* data */
1107         fw_data = (const __le32 *)(adev->gfx.me_fw->data +
1108                 le32_to_cpu(cpv2_hdr->data_offset_bytes));
1109         fw_size = le32_to_cpu(cpv2_hdr->data_size_bytes);
1110         gfx_v12_0_rlc_backdoor_autoload_copy_ucode(adev, SOC24_FIRMWARE_ID_RS64_ME_P0_STACK,
1111                                                    fw_data, fw_size);
1112         gfx_v12_0_rlc_backdoor_autoload_copy_ucode(adev, SOC24_FIRMWARE_ID_RS64_ME_P1_STACK,
1113                                                    fw_data, fw_size);
1114         /* mec ucode */
1115         cpv2_hdr = (const struct gfx_firmware_header_v2_0 *)
1116                 adev->gfx.mec_fw->data;
1117         /* instruction */
1118         fw_data = (const __le32 *) (adev->gfx.mec_fw->data +
1119                 le32_to_cpu(cpv2_hdr->ucode_offset_bytes));
1120         fw_size = le32_to_cpu(cpv2_hdr->ucode_size_bytes);
1121         gfx_v12_0_rlc_backdoor_autoload_copy_ucode(adev, SOC24_FIRMWARE_ID_RS64_MEC,
1122                                                    fw_data, fw_size);
1123         /* data */
1124         fw_data = (const __le32 *) (adev->gfx.mec_fw->data +
1125                 le32_to_cpu(cpv2_hdr->data_offset_bytes));
1126         fw_size = le32_to_cpu(cpv2_hdr->data_size_bytes);
1127         gfx_v12_0_rlc_backdoor_autoload_copy_ucode(adev, SOC24_FIRMWARE_ID_RS64_MEC_P0_STACK,
1128                                                    fw_data, fw_size);
1129         gfx_v12_0_rlc_backdoor_autoload_copy_ucode(adev, SOC24_FIRMWARE_ID_RS64_MEC_P1_STACK,
1130                                                    fw_data, fw_size);
1131         gfx_v12_0_rlc_backdoor_autoload_copy_ucode(adev, SOC24_FIRMWARE_ID_RS64_MEC_P2_STACK,
1132                                                    fw_data, fw_size);
1133         gfx_v12_0_rlc_backdoor_autoload_copy_ucode(adev, SOC24_FIRMWARE_ID_RS64_MEC_P3_STACK,
1134                                                    fw_data, fw_size);
1135
1136         /* rlc ucode */
1137         rlc_hdr = (const struct rlc_firmware_header_v2_0 *)
1138                 adev->gfx.rlc_fw->data;
1139         fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
1140                         le32_to_cpu(rlc_hdr->header.ucode_array_offset_bytes));
1141         fw_size = le32_to_cpu(rlc_hdr->header.ucode_size_bytes);
1142         gfx_v12_0_rlc_backdoor_autoload_copy_ucode(adev, SOC24_FIRMWARE_ID_RLC_G_UCODE,
1143                                                    fw_data, fw_size);
1144
1145         version_major = le16_to_cpu(rlc_hdr->header.header_version_major);
1146         version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor);
1147         if (version_major == 2) {
1148                 if (version_minor >= 1) {
1149                         rlcv21_hdr = (const struct rlc_firmware_header_v2_1 *)adev->gfx.rlc_fw->data;
1150
1151                         fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
1152                                         le32_to_cpu(rlcv21_hdr->save_restore_list_gpm_offset_bytes));
1153                         fw_size = le32_to_cpu(rlcv21_hdr->save_restore_list_gpm_size_bytes);
1154                         gfx_v12_0_rlc_backdoor_autoload_copy_ucode(adev, SOC24_FIRMWARE_ID_RLCG_SCRATCH,
1155                                                    fw_data, fw_size);
1156
1157                         fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
1158                                         le32_to_cpu(rlcv21_hdr->save_restore_list_srm_offset_bytes));
1159                         fw_size = le32_to_cpu(rlcv21_hdr->save_restore_list_srm_size_bytes);
1160                         gfx_v12_0_rlc_backdoor_autoload_copy_ucode(adev, SOC24_FIRMWARE_ID_RLC_SRM_ARAM,
1161                                                    fw_data, fw_size);
1162                 }
1163                 if (version_minor >= 2) {
1164                         rlcv22_hdr = (const struct rlc_firmware_header_v2_2 *)adev->gfx.rlc_fw->data;
1165
1166                         fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
1167                                         le32_to_cpu(rlcv22_hdr->rlc_iram_ucode_offset_bytes));
1168                         fw_size = le32_to_cpu(rlcv22_hdr->rlc_iram_ucode_size_bytes);
1169                         gfx_v12_0_rlc_backdoor_autoload_copy_ucode(adev, SOC24_FIRMWARE_ID_RLX6_UCODE,
1170                                                    fw_data, fw_size);
1171
1172                         fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
1173                                         le32_to_cpu(rlcv22_hdr->rlc_dram_ucode_offset_bytes));
1174                         fw_size = le32_to_cpu(rlcv22_hdr->rlc_dram_ucode_size_bytes);
1175                         gfx_v12_0_rlc_backdoor_autoload_copy_ucode(adev, SOC24_FIRMWARE_ID_RLX6_DRAM_BOOT,
1176                                                    fw_data, fw_size);
1177                 }
1178         }
1179 }
1180
1181 static void
1182 gfx_v12_0_rlc_backdoor_autoload_copy_sdma_ucode(struct amdgpu_device *adev)
1183 {
1184         const __le32 *fw_data;
1185         uint32_t fw_size;
1186         const struct sdma_firmware_header_v3_0 *sdma_hdr;
1187
1188         sdma_hdr = (const struct sdma_firmware_header_v3_0 *)
1189                 adev->sdma.instance[0].fw->data;
1190         fw_data = (const __le32 *) (adev->sdma.instance[0].fw->data +
1191                         le32_to_cpu(sdma_hdr->ucode_offset_bytes));
1192         fw_size = le32_to_cpu(sdma_hdr->ucode_size_bytes);
1193
1194         gfx_v12_0_rlc_backdoor_autoload_copy_ucode(adev, SOC24_FIRMWARE_ID_SDMA_UCODE_TH0,
1195                                                    fw_data, fw_size);
1196 }
1197
1198 static void
1199 gfx_v12_0_rlc_backdoor_autoload_copy_mes_ucode(struct amdgpu_device *adev)
1200 {
1201         const __le32 *fw_data;
1202         unsigned fw_size;
1203         const struct mes_firmware_header_v1_0 *mes_hdr;
1204         int pipe, ucode_id, data_id;
1205
1206         for (pipe = 0; pipe < 2; pipe++) {
1207                 if (pipe == 0) {
1208                         ucode_id = SOC24_FIRMWARE_ID_RS64_MES_P0;
1209                         data_id  = SOC24_FIRMWARE_ID_RS64_MES_P0_STACK;
1210                 } else {
1211                         ucode_id = SOC24_FIRMWARE_ID_RS64_MES_P1;
1212                         data_id  = SOC24_FIRMWARE_ID_RS64_MES_P1_STACK;
1213                 }
1214
1215                 mes_hdr = (const struct mes_firmware_header_v1_0 *)
1216                         adev->mes.fw[pipe]->data;
1217
1218                 fw_data = (const __le32 *)(adev->mes.fw[pipe]->data +
1219                                 le32_to_cpu(mes_hdr->mes_ucode_offset_bytes));
1220                 fw_size = le32_to_cpu(mes_hdr->mes_ucode_size_bytes);
1221
1222                 gfx_v12_0_rlc_backdoor_autoload_copy_ucode(adev, ucode_id, fw_data, fw_size);
1223
1224                 fw_data = (const __le32 *)(adev->mes.fw[pipe]->data +
1225                                 le32_to_cpu(mes_hdr->mes_ucode_data_offset_bytes));
1226                 fw_size = le32_to_cpu(mes_hdr->mes_ucode_data_size_bytes);
1227
1228                 gfx_v12_0_rlc_backdoor_autoload_copy_ucode(adev, data_id, fw_data, fw_size);
1229         }
1230 }
1231
1232 static int gfx_v12_0_rlc_backdoor_autoload_enable(struct amdgpu_device *adev)
1233 {
1234         uint32_t rlc_g_offset, rlc_g_size;
1235         uint64_t gpu_addr;
1236         uint32_t data;
1237
1238         /* RLC autoload sequence 2: copy ucode */
1239         gfx_v12_0_rlc_backdoor_autoload_copy_sdma_ucode(adev);
1240         gfx_v12_0_rlc_backdoor_autoload_copy_gfx_ucode(adev);
1241         gfx_v12_0_rlc_backdoor_autoload_copy_mes_ucode(adev);
1242         gfx_v12_0_rlc_backdoor_autoload_copy_toc_ucode(adev);
1243
1244         rlc_g_offset = rlc_autoload_info[SOC24_FIRMWARE_ID_RLC_G_UCODE].offset;
1245         rlc_g_size = rlc_autoload_info[SOC24_FIRMWARE_ID_RLC_G_UCODE].size;
1246         gpu_addr = adev->gfx.rlc.rlc_autoload_gpu_addr + rlc_g_offset - adev->gmc.vram_start;
1247
1248         WREG32_SOC15(GC, 0, regGFX_IMU_RLC_BOOTLOADER_ADDR_HI, upper_32_bits(gpu_addr));
1249         WREG32_SOC15(GC, 0, regGFX_IMU_RLC_BOOTLOADER_ADDR_LO, lower_32_bits(gpu_addr));
1250
1251         WREG32_SOC15(GC, 0, regGFX_IMU_RLC_BOOTLOADER_SIZE, rlc_g_size);
1252
1253         if (adev->gfx.imu.funcs && (amdgpu_dpm > 0)) {
1254                 /* RLC autoload sequence 3: load IMU fw */
1255                 if (adev->gfx.imu.funcs->load_microcode)
1256                         adev->gfx.imu.funcs->load_microcode(adev);
1257                 /* RLC autoload sequence 4 init IMU fw */
1258                 if (adev->gfx.imu.funcs->setup_imu)
1259                         adev->gfx.imu.funcs->setup_imu(adev);
1260                 if (adev->gfx.imu.funcs->start_imu)
1261                         adev->gfx.imu.funcs->start_imu(adev);
1262
1263                 /* RLC autoload sequence 5 disable gpa mode */
1264                 gfx_v12_0_disable_gpa_mode(adev);
1265         } else {
1266                 /* unhalt rlc to start autoload without imu */
1267                 data = RREG32_SOC15(GC, 0, regRLC_GPM_THREAD_ENABLE);
1268                 data = REG_SET_FIELD(data, RLC_GPM_THREAD_ENABLE, THREAD0_ENABLE, 1);
1269                 data = REG_SET_FIELD(data, RLC_GPM_THREAD_ENABLE, THREAD1_ENABLE, 1);
1270                 WREG32_SOC15(GC, 0, regRLC_GPM_THREAD_ENABLE, data);
1271                 WREG32_SOC15(GC, 0, regRLC_CNTL, RLC_CNTL__RLC_ENABLE_F32_MASK);
1272         }
1273
1274         return 0;
1275 }
1276
1277 static void gfx_v12_0_alloc_ip_dump(struct amdgpu_device *adev)
1278 {
1279         uint32_t reg_count = ARRAY_SIZE(gc_reg_list_12_0);
1280         uint32_t *ptr;
1281         uint32_t inst;
1282
1283         ptr = kcalloc(reg_count, sizeof(uint32_t), GFP_KERNEL);
1284         if (!ptr) {
1285                 DRM_ERROR("Failed to allocate memory for GFX IP Dump\n");
1286                 adev->gfx.ip_dump_core = NULL;
1287         } else {
1288                 adev->gfx.ip_dump_core = ptr;
1289         }
1290
1291         /* Allocate memory for compute queue registers for all the instances */
1292         reg_count = ARRAY_SIZE(gc_cp_reg_list_12);
1293         inst = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe_per_mec *
1294                 adev->gfx.mec.num_queue_per_pipe;
1295
1296         ptr = kcalloc(reg_count * inst, sizeof(uint32_t), GFP_KERNEL);
1297         if (!ptr) {
1298                 DRM_ERROR("Failed to allocate memory for Compute Queues IP Dump\n");
1299                 adev->gfx.ip_dump_compute_queues = NULL;
1300         } else {
1301                 adev->gfx.ip_dump_compute_queues = ptr;
1302         }
1303
1304         /* Allocate memory for gfx queue registers for all the instances */
1305         reg_count = ARRAY_SIZE(gc_gfx_queue_reg_list_12);
1306         inst = adev->gfx.me.num_me * adev->gfx.me.num_pipe_per_me *
1307                 adev->gfx.me.num_queue_per_pipe;
1308
1309         ptr = kcalloc(reg_count * inst, sizeof(uint32_t), GFP_KERNEL);
1310         if (!ptr) {
1311                 DRM_ERROR("Failed to allocate memory for GFX Queues IP Dump\n");
1312                 adev->gfx.ip_dump_gfx_queues = NULL;
1313         } else {
1314                 adev->gfx.ip_dump_gfx_queues = ptr;
1315         }
1316 }
1317
1318 static int gfx_v12_0_sw_init(void *handle)
1319 {
1320         int i, j, k, r, ring_id = 0;
1321         unsigned num_compute_rings;
1322         int xcc_id = 0;
1323         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1324
1325         switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
1326         case IP_VERSION(12, 0, 0):
1327         case IP_VERSION(12, 0, 1):
1328                 adev->gfx.me.num_me = 1;
1329                 adev->gfx.me.num_pipe_per_me = 1;
1330                 adev->gfx.me.num_queue_per_pipe = 1;
1331                 adev->gfx.mec.num_mec = 2;
1332                 adev->gfx.mec.num_pipe_per_mec = 2;
1333                 adev->gfx.mec.num_queue_per_pipe = 4;
1334                 break;
1335         default:
1336                 adev->gfx.me.num_me = 1;
1337                 adev->gfx.me.num_pipe_per_me = 1;
1338                 adev->gfx.me.num_queue_per_pipe = 1;
1339                 adev->gfx.mec.num_mec = 1;
1340                 adev->gfx.mec.num_pipe_per_mec = 4;
1341                 adev->gfx.mec.num_queue_per_pipe = 8;
1342                 break;
1343         }
1344
1345         /* recalculate compute rings to use based on hardware configuration */
1346         num_compute_rings = (adev->gfx.mec.num_pipe_per_mec *
1347                              adev->gfx.mec.num_queue_per_pipe) / 2;
1348         adev->gfx.num_compute_rings = min(adev->gfx.num_compute_rings,
1349                                           num_compute_rings);
1350
1351         /* EOP Event */
1352         r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GRBM_CP,
1353                               GFX_11_0_0__SRCID__CP_EOP_INTERRUPT,
1354                               &adev->gfx.eop_irq);
1355         if (r)
1356                 return r;
1357
1358         /* Bad opcode Event */
1359         r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GRBM_CP,
1360                               GFX_11_0_0__SRCID__CP_BAD_OPCODE_ERROR,
1361                               &adev->gfx.bad_op_irq);
1362         if (r)
1363                 return r;
1364
1365         /* Privileged reg */
1366         r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GRBM_CP,
1367                               GFX_11_0_0__SRCID__CP_PRIV_REG_FAULT,
1368                               &adev->gfx.priv_reg_irq);
1369         if (r)
1370                 return r;
1371
1372         /* Privileged inst */
1373         r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GRBM_CP,
1374                               GFX_11_0_0__SRCID__CP_PRIV_INSTR_FAULT,
1375                               &adev->gfx.priv_inst_irq);
1376         if (r)
1377                 return r;
1378
1379         adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
1380
1381         gfx_v12_0_me_init(adev);
1382
1383         r = gfx_v12_0_rlc_init(adev);
1384         if (r) {
1385                 dev_err(adev->dev, "Failed to init rlc BOs!\n");
1386                 return r;
1387         }
1388
1389         r = gfx_v12_0_mec_init(adev);
1390         if (r) {
1391                 dev_err(adev->dev, "Failed to init MEC BOs!\n");
1392                 return r;
1393         }
1394
1395         /* set up the gfx ring */
1396         for (i = 0; i < adev->gfx.me.num_me; i++) {
1397                 for (j = 0; j < adev->gfx.me.num_queue_per_pipe; j++) {
1398                         for (k = 0; k < adev->gfx.me.num_pipe_per_me; k++) {
1399                                 if (!amdgpu_gfx_is_me_queue_enabled(adev, i, k, j))
1400                                         continue;
1401
1402                                 r = gfx_v12_0_gfx_ring_init(adev, ring_id,
1403                                                             i, k, j);
1404                                 if (r)
1405                                         return r;
1406                                 ring_id++;
1407                         }
1408                 }
1409         }
1410
1411         ring_id = 0;
1412         /* set up the compute queues - allocate horizontally across pipes */
1413         for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
1414                 for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
1415                         for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
1416                                 if (!amdgpu_gfx_is_mec_queue_enabled(adev,
1417                                                                 0, i, k, j))
1418                                         continue;
1419
1420                                 r = gfx_v12_0_compute_ring_init(adev, ring_id,
1421                                                                 i, k, j);
1422                                 if (r)
1423                                         return r;
1424
1425                                 ring_id++;
1426                         }
1427                 }
1428         }
1429
1430         if (!adev->enable_mes_kiq) {
1431                 r = amdgpu_gfx_kiq_init(adev, GFX12_MEC_HPD_SIZE, 0);
1432                 if (r) {
1433                         dev_err(adev->dev, "Failed to init KIQ BOs!\n");
1434                         return r;
1435                 }
1436
1437                 r = amdgpu_gfx_kiq_init_ring(adev, xcc_id);
1438                 if (r)
1439                         return r;
1440         }
1441
1442         r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct v12_compute_mqd), 0);
1443         if (r)
1444                 return r;
1445
1446         /* allocate visible FB for rlc auto-loading fw */
1447         if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) {
1448                 r = gfx_v12_0_rlc_autoload_buffer_init(adev);
1449                 if (r)
1450                         return r;
1451         }
1452
1453         r = gfx_v12_0_gpu_early_init(adev);
1454         if (r)
1455                 return r;
1456
1457         gfx_v12_0_alloc_ip_dump(adev);
1458
1459         return 0;
1460 }
1461
1462 static void gfx_v12_0_pfp_fini(struct amdgpu_device *adev)
1463 {
1464         amdgpu_bo_free_kernel(&adev->gfx.pfp.pfp_fw_obj,
1465                               &adev->gfx.pfp.pfp_fw_gpu_addr,
1466                               (void **)&adev->gfx.pfp.pfp_fw_ptr);
1467
1468         amdgpu_bo_free_kernel(&adev->gfx.pfp.pfp_fw_data_obj,
1469                               &adev->gfx.pfp.pfp_fw_data_gpu_addr,
1470                               (void **)&adev->gfx.pfp.pfp_fw_data_ptr);
1471 }
1472
1473 static void gfx_v12_0_me_fini(struct amdgpu_device *adev)
1474 {
1475         amdgpu_bo_free_kernel(&adev->gfx.me.me_fw_obj,
1476                               &adev->gfx.me.me_fw_gpu_addr,
1477                               (void **)&adev->gfx.me.me_fw_ptr);
1478
1479         amdgpu_bo_free_kernel(&adev->gfx.me.me_fw_data_obj,
1480                                &adev->gfx.me.me_fw_data_gpu_addr,
1481                                (void **)&adev->gfx.me.me_fw_data_ptr);
1482 }
1483
1484 static void gfx_v12_0_rlc_autoload_buffer_fini(struct amdgpu_device *adev)
1485 {
1486         amdgpu_bo_free_kernel(&adev->gfx.rlc.rlc_autoload_bo,
1487                         &adev->gfx.rlc.rlc_autoload_gpu_addr,
1488                         (void **)&adev->gfx.rlc.rlc_autoload_ptr);
1489 }
1490
1491 static int gfx_v12_0_sw_fini(void *handle)
1492 {
1493         int i;
1494         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1495
1496         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
1497                 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
1498         for (i = 0; i < adev->gfx.num_compute_rings; i++)
1499                 amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
1500
1501         amdgpu_gfx_mqd_sw_fini(adev, 0);
1502
1503         if (!adev->enable_mes_kiq) {
1504                 amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq[0].ring);
1505                 amdgpu_gfx_kiq_fini(adev, 0);
1506         }
1507
1508         gfx_v12_0_pfp_fini(adev);
1509         gfx_v12_0_me_fini(adev);
1510         gfx_v12_0_rlc_fini(adev);
1511         gfx_v12_0_mec_fini(adev);
1512
1513         if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO)
1514                 gfx_v12_0_rlc_autoload_buffer_fini(adev);
1515
1516         gfx_v12_0_free_microcode(adev);
1517
1518         kfree(adev->gfx.ip_dump_core);
1519         kfree(adev->gfx.ip_dump_compute_queues);
1520         kfree(adev->gfx.ip_dump_gfx_queues);
1521
1522         return 0;
1523 }
1524
1525 static void gfx_v12_0_select_se_sh(struct amdgpu_device *adev, u32 se_num,
1526                                    u32 sh_num, u32 instance, int xcc_id)
1527 {
1528         u32 data;
1529
1530         if (instance == 0xffffffff)
1531                 data = REG_SET_FIELD(0, GRBM_GFX_INDEX,
1532                                      INSTANCE_BROADCAST_WRITES, 1);
1533         else
1534                 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX,
1535                                      instance);
1536
1537         if (se_num == 0xffffffff)
1538                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES,
1539                                      1);
1540         else
1541                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
1542
1543         if (sh_num == 0xffffffff)
1544                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SA_BROADCAST_WRITES,
1545                                      1);
1546         else
1547                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SA_INDEX, sh_num);
1548
1549         WREG32_SOC15(GC, 0, regGRBM_GFX_INDEX, data);
1550 }
1551
1552 static u32 gfx_v12_0_get_sa_active_bitmap(struct amdgpu_device *adev)
1553 {
1554         u32 gc_disabled_sa_mask, gc_user_disabled_sa_mask, sa_mask;
1555
1556         gc_disabled_sa_mask = RREG32_SOC15(GC, 0, regGRBM_CC_GC_SA_UNIT_DISABLE);
1557         gc_disabled_sa_mask = REG_GET_FIELD(gc_disabled_sa_mask,
1558                                             GRBM_CC_GC_SA_UNIT_DISABLE,
1559                                             SA_DISABLE);
1560         gc_user_disabled_sa_mask = RREG32_SOC15(GC, 0, regGRBM_GC_USER_SA_UNIT_DISABLE);
1561         gc_user_disabled_sa_mask = REG_GET_FIELD(gc_user_disabled_sa_mask,
1562                                                  GRBM_GC_USER_SA_UNIT_DISABLE,
1563                                                  SA_DISABLE);
1564         sa_mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_sh_per_se *
1565                                             adev->gfx.config.max_shader_engines);
1566
1567         return sa_mask & (~(gc_disabled_sa_mask | gc_user_disabled_sa_mask));
1568 }
1569
1570 static u32 gfx_v12_0_get_rb_active_bitmap(struct amdgpu_device *adev)
1571 {
1572         u32 gc_disabled_rb_mask, gc_user_disabled_rb_mask;
1573         u32 rb_mask;
1574
1575         gc_disabled_rb_mask = RREG32_SOC15(GC, 0, regCC_RB_BACKEND_DISABLE);
1576         gc_disabled_rb_mask = REG_GET_FIELD(gc_disabled_rb_mask,
1577                                             CC_RB_BACKEND_DISABLE,
1578                                             BACKEND_DISABLE);
1579         gc_user_disabled_rb_mask = RREG32_SOC15(GC, 0, regGC_USER_RB_BACKEND_DISABLE);
1580         gc_user_disabled_rb_mask = REG_GET_FIELD(gc_user_disabled_rb_mask,
1581                                                  GC_USER_RB_BACKEND_DISABLE,
1582                                                  BACKEND_DISABLE);
1583         rb_mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se *
1584                                             adev->gfx.config.max_shader_engines);
1585
1586         return rb_mask & (~(gc_disabled_rb_mask | gc_user_disabled_rb_mask));
1587 }
1588
1589 static void gfx_v12_0_setup_rb(struct amdgpu_device *adev)
1590 {
1591         u32 rb_bitmap_width_per_sa;
1592         u32 max_sa;
1593         u32 active_sa_bitmap;
1594         u32 global_active_rb_bitmap;
1595         u32 active_rb_bitmap = 0;
1596         u32 i;
1597
1598         /* query sa bitmap from SA_UNIT_DISABLE registers */
1599         active_sa_bitmap = gfx_v12_0_get_sa_active_bitmap(adev);
1600         /* query rb bitmap from RB_BACKEND_DISABLE registers */
1601         global_active_rb_bitmap = gfx_v12_0_get_rb_active_bitmap(adev);
1602
1603         /* generate active rb bitmap according to active sa bitmap */
1604         max_sa = adev->gfx.config.max_shader_engines *
1605                  adev->gfx.config.max_sh_per_se;
1606         rb_bitmap_width_per_sa = adev->gfx.config.max_backends_per_se /
1607                                  adev->gfx.config.max_sh_per_se;
1608         for (i = 0; i < max_sa; i++) {
1609                 if (active_sa_bitmap & (1 << i))
1610                         active_rb_bitmap |= (0x3 << (i * rb_bitmap_width_per_sa));
1611         }
1612
1613         active_rb_bitmap |= global_active_rb_bitmap;
1614         adev->gfx.config.backend_enable_mask = active_rb_bitmap;
1615         adev->gfx.config.num_rbs = hweight32(active_rb_bitmap);
1616 }
1617
1618 #define LDS_APP_BASE           0x1
1619 #define SCRATCH_APP_BASE       0x2
1620
1621 static void gfx_v12_0_init_compute_vmid(struct amdgpu_device *adev)
1622 {
1623         int i;
1624         uint32_t sh_mem_bases;
1625         uint32_t data;
1626
1627         /*
1628          * Configure apertures:
1629          * LDS:         0x60000000'00000000 - 0x60000001'00000000 (4GB)
1630          * Scratch:     0x60000001'00000000 - 0x60000002'00000000 (4GB)
1631          * GPUVM:       0x60010000'00000000 - 0x60020000'00000000 (1TB)
1632          */
1633         sh_mem_bases = (LDS_APP_BASE << SH_MEM_BASES__SHARED_BASE__SHIFT) |
1634                         SCRATCH_APP_BASE;
1635
1636         mutex_lock(&adev->srbm_mutex);
1637         for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
1638                 soc24_grbm_select(adev, 0, 0, 0, i);
1639                 /* CP and shaders */
1640                 WREG32_SOC15(GC, 0, regSH_MEM_CONFIG, DEFAULT_SH_MEM_CONFIG);
1641                 WREG32_SOC15(GC, 0, regSH_MEM_BASES, sh_mem_bases);
1642
1643                 /* Enable trap for each kfd vmid. */
1644                 data = RREG32_SOC15(GC, 0, regSPI_GDBG_PER_VMID_CNTL);
1645                 data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, TRAP_EN, 1);
1646                 WREG32_SOC15(GC, 0, regSPI_GDBG_PER_VMID_CNTL, data);
1647         }
1648         soc24_grbm_select(adev, 0, 0, 0, 0);
1649         mutex_unlock(&adev->srbm_mutex);
1650 }
1651
1652 static void gfx_v12_0_tcp_harvest(struct amdgpu_device *adev)
1653 {
1654         /* TODO: harvest feature to be added later. */
1655 }
1656
1657 static void gfx_v12_0_get_tcc_info(struct amdgpu_device *adev)
1658 {
1659 }
1660
1661 static void gfx_v12_0_constants_init(struct amdgpu_device *adev)
1662 {
1663         u32 tmp;
1664         int i;
1665
1666         if (!amdgpu_sriov_vf(adev))
1667                 WREG32_FIELD15_PREREG(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff);
1668
1669         gfx_v12_0_setup_rb(adev);
1670         gfx_v12_0_get_cu_info(adev, &adev->gfx.cu_info);
1671         gfx_v12_0_get_tcc_info(adev);
1672         adev->gfx.config.pa_sc_tile_steering_override = 0;
1673
1674         /* XXX SH_MEM regs */
1675         /* where to put LDS, scratch, GPUVM in FSA64 space */
1676         mutex_lock(&adev->srbm_mutex);
1677         for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB(0)].num_ids; i++) {
1678                 soc24_grbm_select(adev, 0, 0, 0, i);
1679                 /* CP and shaders */
1680                 WREG32_SOC15(GC, 0, regSH_MEM_CONFIG, DEFAULT_SH_MEM_CONFIG);
1681                 if (i != 0) {
1682                         tmp = REG_SET_FIELD(0, SH_MEM_BASES, PRIVATE_BASE,
1683                                 (adev->gmc.private_aperture_start >> 48));
1684                         tmp = REG_SET_FIELD(tmp, SH_MEM_BASES, SHARED_BASE,
1685                                 (adev->gmc.shared_aperture_start >> 48));
1686                         WREG32_SOC15(GC, 0, regSH_MEM_BASES, tmp);
1687                 }
1688         }
1689         soc24_grbm_select(adev, 0, 0, 0, 0);
1690
1691         mutex_unlock(&adev->srbm_mutex);
1692
1693         gfx_v12_0_init_compute_vmid(adev);
1694 }
1695
1696 static u32 gfx_v12_0_get_cpg_int_cntl(struct amdgpu_device *adev,
1697                                       int me, int pipe)
1698 {
1699         if (me != 0)
1700                 return 0;
1701
1702         switch (pipe) {
1703         case 0:
1704                 return SOC15_REG_OFFSET(GC, 0, regCP_INT_CNTL_RING0);
1705         default:
1706                 return 0;
1707         }
1708 }
1709
1710 static u32 gfx_v12_0_get_cpc_int_cntl(struct amdgpu_device *adev,
1711                                       int me, int pipe)
1712 {
1713         /*
1714          * amdgpu controls only the first MEC. That's why this function only
1715          * handles the setting of interrupts for this specific MEC. All other
1716          * pipes' interrupts are set by amdkfd.
1717          */
1718         if (me != 1)
1719                 return 0;
1720
1721         switch (pipe) {
1722         case 0:
1723                 return SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE0_INT_CNTL);
1724         case 1:
1725                 return SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE1_INT_CNTL);
1726         default:
1727                 return 0;
1728         }
1729 }
1730
1731 static void gfx_v12_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
1732                                                bool enable)
1733 {
1734         u32 tmp, cp_int_cntl_reg;
1735         int i, j;
1736
1737         if (amdgpu_sriov_vf(adev))
1738                 return;
1739
1740         for (i = 0; i < adev->gfx.me.num_me; i++) {
1741                 for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) {
1742                         cp_int_cntl_reg = gfx_v12_0_get_cpg_int_cntl(adev, i, j);
1743
1744                         if (cp_int_cntl_reg) {
1745                                 tmp = RREG32_SOC15_IP(GC, cp_int_cntl_reg);
1746                                 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE,
1747                                                     enable ? 1 : 0);
1748                                 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE,
1749                                                     enable ? 1 : 0);
1750                                 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE,
1751                                                     enable ? 1 : 0);
1752                                 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE,
1753                                                     enable ? 1 : 0);
1754                                 WREG32_SOC15_IP(GC, cp_int_cntl_reg, tmp);
1755                         }
1756                 }
1757         }
1758 }
1759
1760 static int gfx_v12_0_init_csb(struct amdgpu_device *adev)
1761 {
1762         adev->gfx.rlc.funcs->get_csb_buffer(adev, adev->gfx.rlc.cs_ptr);
1763
1764         WREG32_SOC15(GC, 0, regRLC_CSIB_ADDR_HI,
1765                         adev->gfx.rlc.clear_state_gpu_addr >> 32);
1766         WREG32_SOC15(GC, 0, regRLC_CSIB_ADDR_LO,
1767                         adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
1768         WREG32_SOC15(GC, 0, regRLC_CSIB_LENGTH, adev->gfx.rlc.clear_state_size);
1769
1770         return 0;
1771 }
1772
1773 static void gfx_v12_0_rlc_stop(struct amdgpu_device *adev)
1774 {
1775         u32 tmp = RREG32_SOC15(GC, 0, regRLC_CNTL);
1776
1777         tmp = REG_SET_FIELD(tmp, RLC_CNTL, RLC_ENABLE_F32, 0);
1778         WREG32_SOC15(GC, 0, regRLC_CNTL, tmp);
1779 }
1780
1781 static void gfx_v12_0_rlc_reset(struct amdgpu_device *adev)
1782 {
1783         WREG32_FIELD15_PREREG(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
1784         udelay(50);
1785         WREG32_FIELD15_PREREG(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
1786         udelay(50);
1787 }
1788
1789 static void gfx_v12_0_rlc_smu_handshake_cntl(struct amdgpu_device *adev,
1790                                              bool enable)
1791 {
1792         uint32_t rlc_pg_cntl;
1793
1794         rlc_pg_cntl = RREG32_SOC15(GC, 0, regRLC_PG_CNTL);
1795
1796         if (!enable) {
1797                 /* RLC_PG_CNTL[23] = 0 (default)
1798                  * RLC will wait for handshake acks with SMU
1799                  * GFXOFF will be enabled
1800                  * RLC_PG_CNTL[23] = 1
1801                  * RLC will not issue any message to SMU
1802                  * hence no handshake between SMU & RLC
1803                  * GFXOFF will be disabled
1804                  */
1805                 rlc_pg_cntl |= RLC_PG_CNTL__SMU_HANDSHAKE_DISABLE_MASK;
1806         } else
1807                 rlc_pg_cntl &= ~RLC_PG_CNTL__SMU_HANDSHAKE_DISABLE_MASK;
1808         WREG32_SOC15(GC, 0, regRLC_PG_CNTL, rlc_pg_cntl);
1809 }
1810
1811 static void gfx_v12_0_rlc_start(struct amdgpu_device *adev)
1812 {
1813         /* TODO: enable rlc & smu handshake until smu
1814          * and gfxoff feature works as expected */
1815         if (!(amdgpu_pp_feature_mask & PP_GFXOFF_MASK))
1816                 gfx_v12_0_rlc_smu_handshake_cntl(adev, false);
1817
1818         WREG32_FIELD15_PREREG(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 1);
1819         udelay(50);
1820 }
1821
1822 static void gfx_v12_0_rlc_enable_srm(struct amdgpu_device *adev)
1823 {
1824         uint32_t tmp;
1825
1826         /* enable Save Restore Machine */
1827         tmp = RREG32(SOC15_REG_OFFSET(GC, 0, regRLC_SRM_CNTL));
1828         tmp |= RLC_SRM_CNTL__AUTO_INCR_ADDR_MASK;
1829         tmp |= RLC_SRM_CNTL__SRM_ENABLE_MASK;
1830         WREG32(SOC15_REG_OFFSET(GC, 0, regRLC_SRM_CNTL), tmp);
1831 }
1832
1833 static void gfx_v12_0_load_rlcg_microcode(struct amdgpu_device *adev)
1834 {
1835         const struct rlc_firmware_header_v2_0 *hdr;
1836         const __le32 *fw_data;
1837         unsigned i, fw_size;
1838
1839         hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
1840         fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
1841                            le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1842         fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
1843
1844         WREG32_SOC15(GC, 0, regRLC_GPM_UCODE_ADDR,
1845                      RLCG_UCODE_LOADING_START_ADDRESS);
1846
1847         for (i = 0; i < fw_size; i++)
1848                 WREG32_SOC15(GC, 0, regRLC_GPM_UCODE_DATA,
1849                              le32_to_cpup(fw_data++));
1850
1851         WREG32_SOC15(GC, 0, regRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
1852 }
1853
1854 static void gfx_v12_0_load_rlc_iram_dram_microcode(struct amdgpu_device *adev)
1855 {
1856         const struct rlc_firmware_header_v2_2 *hdr;
1857         const __le32 *fw_data;
1858         unsigned i, fw_size;
1859         u32 tmp;
1860
1861         hdr = (const struct rlc_firmware_header_v2_2 *)adev->gfx.rlc_fw->data;
1862
1863         fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
1864                         le32_to_cpu(hdr->rlc_iram_ucode_offset_bytes));
1865         fw_size = le32_to_cpu(hdr->rlc_iram_ucode_size_bytes) / 4;
1866
1867         WREG32_SOC15(GC, 0, regRLC_LX6_IRAM_ADDR, 0);
1868
1869         for (i = 0; i < fw_size; i++) {
1870                 if ((amdgpu_emu_mode == 1) && (i % 100 == 99))
1871                         msleep(1);
1872                 WREG32_SOC15(GC, 0, regRLC_LX6_IRAM_DATA,
1873                                 le32_to_cpup(fw_data++));
1874         }
1875
1876         WREG32_SOC15(GC, 0, regRLC_LX6_IRAM_ADDR, adev->gfx.rlc_fw_version);
1877
1878         fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
1879                         le32_to_cpu(hdr->rlc_dram_ucode_offset_bytes));
1880         fw_size = le32_to_cpu(hdr->rlc_dram_ucode_size_bytes) / 4;
1881
1882         WREG32_SOC15(GC, 0, regRLC_LX6_DRAM_ADDR, 0);
1883         for (i = 0; i < fw_size; i++) {
1884                 if ((amdgpu_emu_mode == 1) && (i % 100 == 99))
1885                         msleep(1);
1886                 WREG32_SOC15(GC, 0, regRLC_LX6_DRAM_DATA,
1887                                 le32_to_cpup(fw_data++));
1888         }
1889
1890         WREG32_SOC15(GC, 0, regRLC_LX6_IRAM_ADDR, adev->gfx.rlc_fw_version);
1891
1892         tmp = RREG32_SOC15(GC, 0, regRLC_LX6_CNTL);
1893         tmp = REG_SET_FIELD(tmp, RLC_LX6_CNTL, PDEBUG_ENABLE, 1);
1894         tmp = REG_SET_FIELD(tmp, RLC_LX6_CNTL, BRESET, 0);
1895         WREG32_SOC15(GC, 0, regRLC_LX6_CNTL, tmp);
1896 }
1897
1898 static int gfx_v12_0_rlc_load_microcode(struct amdgpu_device *adev)
1899 {
1900         const struct rlc_firmware_header_v2_0 *hdr;
1901         uint16_t version_major;
1902         uint16_t version_minor;
1903
1904         if (!adev->gfx.rlc_fw)
1905                 return -EINVAL;
1906
1907         hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
1908         amdgpu_ucode_print_rlc_hdr(&hdr->header);
1909
1910         version_major = le16_to_cpu(hdr->header.header_version_major);
1911         version_minor = le16_to_cpu(hdr->header.header_version_minor);
1912
1913         if (version_major == 2) {
1914                 gfx_v12_0_load_rlcg_microcode(adev);
1915                 if (amdgpu_dpm == 1) {
1916                         if (version_minor >= 2)
1917                                 gfx_v12_0_load_rlc_iram_dram_microcode(adev);
1918                 }
1919
1920                 return 0;
1921         }
1922
1923         return -EINVAL;
1924 }
1925
1926 static int gfx_v12_0_rlc_resume(struct amdgpu_device *adev)
1927 {
1928         int r;
1929
1930         if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
1931                 gfx_v12_0_init_csb(adev);
1932
1933                 if (!amdgpu_sriov_vf(adev)) /* enable RLC SRM */
1934                         gfx_v12_0_rlc_enable_srm(adev);
1935         } else {
1936                 if (amdgpu_sriov_vf(adev)) {
1937                         gfx_v12_0_init_csb(adev);
1938                         return 0;
1939                 }
1940
1941                 adev->gfx.rlc.funcs->stop(adev);
1942
1943                 /* disable CG */
1944                 WREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL, 0);
1945
1946                 /* disable PG */
1947                 WREG32_SOC15(GC, 0, regRLC_PG_CNTL, 0);
1948
1949                 if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
1950                         /* legacy rlc firmware loading */
1951                         r = gfx_v12_0_rlc_load_microcode(adev);
1952                         if (r)
1953                                 return r;
1954                 }
1955
1956                 gfx_v12_0_init_csb(adev);
1957
1958                 adev->gfx.rlc.funcs->start(adev);
1959         }
1960
1961         return 0;
1962 }
1963
1964 static void gfx_v12_0_config_gfx_rs64(struct amdgpu_device *adev)
1965 {
1966         const struct gfx_firmware_header_v2_0 *pfp_hdr;
1967         const struct gfx_firmware_header_v2_0 *me_hdr;
1968         const struct gfx_firmware_header_v2_0 *mec_hdr;
1969         uint32_t pipe_id, tmp;
1970
1971         mec_hdr = (const struct gfx_firmware_header_v2_0 *)
1972                 adev->gfx.mec_fw->data;
1973         me_hdr = (const struct gfx_firmware_header_v2_0 *)
1974                 adev->gfx.me_fw->data;
1975         pfp_hdr = (const struct gfx_firmware_header_v2_0 *)
1976                 adev->gfx.pfp_fw->data;
1977
1978         /* config pfp program start addr */
1979         for (pipe_id = 0; pipe_id < 2; pipe_id++) {
1980                 soc24_grbm_select(adev, 0, pipe_id, 0, 0);
1981                 WREG32_SOC15(GC, 0, regCP_PFP_PRGRM_CNTR_START,
1982                         (pfp_hdr->ucode_start_addr_hi << 30) |
1983                         (pfp_hdr->ucode_start_addr_lo >> 2));
1984                 WREG32_SOC15(GC, 0, regCP_PFP_PRGRM_CNTR_START_HI,
1985                         pfp_hdr->ucode_start_addr_hi >> 2);
1986         }
1987         soc24_grbm_select(adev, 0, 0, 0, 0);
1988
1989         /* reset pfp pipe */
1990         tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL);
1991         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_PIPE0_RESET, 1);
1992         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_PIPE1_RESET, 1);
1993         WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
1994
1995         /* clear pfp pipe reset */
1996         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_PIPE0_RESET, 0);
1997         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_PIPE1_RESET, 0);
1998         WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
1999
2000         /* config me program start addr */
2001         for (pipe_id = 0; pipe_id < 2; pipe_id++) {
2002                 soc24_grbm_select(adev, 0, pipe_id, 0, 0);
2003                 WREG32_SOC15(GC, 0, regCP_ME_PRGRM_CNTR_START,
2004                         (me_hdr->ucode_start_addr_hi << 30) |
2005                         (me_hdr->ucode_start_addr_lo >> 2));
2006                 WREG32_SOC15(GC, 0, regCP_ME_PRGRM_CNTR_START_HI,
2007                         me_hdr->ucode_start_addr_hi>>2);
2008         }
2009         soc24_grbm_select(adev, 0, 0, 0, 0);
2010
2011         /* reset me pipe */
2012         tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL);
2013         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_PIPE0_RESET, 1);
2014         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_PIPE1_RESET, 1);
2015         WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
2016
2017         /* clear me pipe reset */
2018         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_PIPE0_RESET, 0);
2019         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_PIPE1_RESET, 0);
2020         WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
2021
2022         /* config mec program start addr */
2023         for (pipe_id = 0; pipe_id < 4; pipe_id++) {
2024                 soc24_grbm_select(adev, 1, pipe_id, 0, 0);
2025                 WREG32_SOC15(GC, 0, regCP_MEC_RS64_PRGRM_CNTR_START,
2026                                         mec_hdr->ucode_start_addr_lo >> 2 |
2027                                         mec_hdr->ucode_start_addr_hi << 30);
2028                 WREG32_SOC15(GC, 0, regCP_MEC_RS64_PRGRM_CNTR_START_HI,
2029                                         mec_hdr->ucode_start_addr_hi >> 2);
2030         }
2031         soc24_grbm_select(adev, 0, 0, 0, 0);
2032
2033         /* reset mec pipe */
2034         tmp = RREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL);
2035         tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE0_RESET, 1);
2036         tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE1_RESET, 1);
2037         tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE2_RESET, 1);
2038         tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE3_RESET, 1);
2039         WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, tmp);
2040
2041         /* clear mec pipe reset */
2042         tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE0_RESET, 0);
2043         tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE1_RESET, 0);
2044         tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE2_RESET, 0);
2045         tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE3_RESET, 0);
2046         WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, tmp);
2047 }
2048
2049 static void gfx_v12_0_set_pfp_ucode_start_addr(struct amdgpu_device *adev)
2050 {
2051         const struct gfx_firmware_header_v2_0 *cp_hdr;
2052         unsigned pipe_id, tmp;
2053
2054         cp_hdr = (const struct gfx_firmware_header_v2_0 *)
2055                 adev->gfx.pfp_fw->data;
2056         mutex_lock(&adev->srbm_mutex);
2057         for (pipe_id = 0; pipe_id < adev->gfx.me.num_pipe_per_me; pipe_id++) {
2058                 soc24_grbm_select(adev, 0, pipe_id, 0, 0);
2059                 WREG32_SOC15(GC, 0, regCP_PFP_PRGRM_CNTR_START,
2060                              (cp_hdr->ucode_start_addr_hi << 30) |
2061                              (cp_hdr->ucode_start_addr_lo >> 2));
2062                 WREG32_SOC15(GC, 0, regCP_PFP_PRGRM_CNTR_START_HI,
2063                              cp_hdr->ucode_start_addr_hi>>2);
2064
2065                 /*
2066                  * Program CP_ME_CNTL to reset given PIPE to take
2067                  * effect of CP_PFP_PRGRM_CNTR_START.
2068                  */
2069                 tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL);
2070                 if (pipe_id == 0)
2071                         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
2072                                         PFP_PIPE0_RESET, 1);
2073                 else
2074                         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
2075                                         PFP_PIPE1_RESET, 1);
2076                 WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
2077
2078                 /* Clear pfp pipe0 reset bit. */
2079                 if (pipe_id == 0)
2080                         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
2081                                         PFP_PIPE0_RESET, 0);
2082                 else
2083                         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
2084                                         PFP_PIPE1_RESET, 0);
2085                 WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
2086         }
2087         soc24_grbm_select(adev, 0, 0, 0, 0);
2088         mutex_unlock(&adev->srbm_mutex);
2089 }
2090
2091 static void gfx_v12_0_set_me_ucode_start_addr(struct amdgpu_device *adev)
2092 {
2093         const struct gfx_firmware_header_v2_0 *cp_hdr;
2094         unsigned pipe_id, tmp;
2095
2096         cp_hdr = (const struct gfx_firmware_header_v2_0 *)
2097                 adev->gfx.me_fw->data;
2098         mutex_lock(&adev->srbm_mutex);
2099         for (pipe_id = 0; pipe_id < adev->gfx.me.num_pipe_per_me; pipe_id++) {
2100                 soc24_grbm_select(adev, 0, pipe_id, 0, 0);
2101                 WREG32_SOC15(GC, 0, regCP_ME_PRGRM_CNTR_START,
2102                              (cp_hdr->ucode_start_addr_hi << 30) |
2103                              (cp_hdr->ucode_start_addr_lo >> 2) );
2104                 WREG32_SOC15(GC, 0, regCP_ME_PRGRM_CNTR_START_HI,
2105                              cp_hdr->ucode_start_addr_hi>>2);
2106
2107                 /*
2108                  * Program CP_ME_CNTL to reset given PIPE to take
2109                  * effect of CP_ME_PRGRM_CNTR_START.
2110                  */
2111                 tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL);
2112                 if (pipe_id == 0)
2113                         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
2114                                         ME_PIPE0_RESET, 1);
2115                 else
2116                         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
2117                                         ME_PIPE1_RESET, 1);
2118                 WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
2119
2120                 /* Clear pfp pipe0 reset bit. */
2121                 if (pipe_id == 0)
2122                         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
2123                                         ME_PIPE0_RESET, 0);
2124                 else
2125                         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
2126                                         ME_PIPE1_RESET, 0);
2127                 WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
2128         }
2129         soc24_grbm_select(adev, 0, 0, 0, 0);
2130         mutex_unlock(&adev->srbm_mutex);
2131 }
2132
2133 static void gfx_v12_0_set_mec_ucode_start_addr(struct amdgpu_device *adev)
2134 {
2135         const struct gfx_firmware_header_v2_0 *cp_hdr;
2136         unsigned pipe_id;
2137
2138         cp_hdr = (const struct gfx_firmware_header_v2_0 *)
2139                 adev->gfx.mec_fw->data;
2140         mutex_lock(&adev->srbm_mutex);
2141         for (pipe_id = 0; pipe_id < adev->gfx.mec.num_pipe_per_mec; pipe_id++) {
2142                 soc24_grbm_select(adev, 1, pipe_id, 0, 0);
2143                 WREG32_SOC15(GC, 0, regCP_MEC_RS64_PRGRM_CNTR_START,
2144                              cp_hdr->ucode_start_addr_lo >> 2 |
2145                              cp_hdr->ucode_start_addr_hi << 30);
2146                 WREG32_SOC15(GC, 0, regCP_MEC_RS64_PRGRM_CNTR_START_HI,
2147                              cp_hdr->ucode_start_addr_hi >> 2);
2148         }
2149         soc24_grbm_select(adev, 0, 0, 0, 0);
2150         mutex_unlock(&adev->srbm_mutex);
2151 }
2152
2153 static int gfx_v12_0_wait_for_rlc_autoload_complete(struct amdgpu_device *adev)
2154 {
2155         uint32_t cp_status;
2156         uint32_t bootload_status;
2157         int i;
2158
2159         for (i = 0; i < adev->usec_timeout; i++) {
2160                 cp_status = RREG32_SOC15(GC, 0, regCP_STAT);
2161                 bootload_status = RREG32_SOC15(GC, 0, regRLC_RLCS_BOOTLOAD_STATUS);
2162
2163                 if ((cp_status == 0) &&
2164                     (REG_GET_FIELD(bootload_status,
2165                         RLC_RLCS_BOOTLOAD_STATUS, BOOTLOAD_COMPLETE) == 1)) {
2166                         break;
2167                 }
2168                 udelay(1);
2169                 if (amdgpu_emu_mode)
2170                         msleep(10);
2171         }
2172
2173         if (i >= adev->usec_timeout) {
2174                 dev_err(adev->dev, "rlc autoload: gc ucode autoload timeout\n");
2175                 return -ETIMEDOUT;
2176         }
2177
2178         if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) {
2179                 gfx_v12_0_set_pfp_ucode_start_addr(adev);
2180                 gfx_v12_0_set_me_ucode_start_addr(adev);
2181                 gfx_v12_0_set_mec_ucode_start_addr(adev);
2182         }
2183
2184         return 0;
2185 }
2186
2187 static int gfx_v12_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
2188 {
2189         int i;
2190         u32 tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL);
2191
2192         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, enable ? 0 : 1);
2193         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, enable ? 0 : 1);
2194         WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
2195
2196         for (i = 0; i < adev->usec_timeout; i++) {
2197                 if (RREG32_SOC15(GC, 0, regCP_STAT) == 0)
2198                         break;
2199                 udelay(1);
2200         }
2201
2202         if (i >= adev->usec_timeout)
2203                 DRM_ERROR("failed to %s cp gfx\n", enable ? "unhalt" : "halt");
2204
2205         return 0;
2206 }
2207
2208 static int gfx_v12_0_cp_gfx_load_pfp_microcode_rs64(struct amdgpu_device *adev)
2209 {
2210         int r;
2211         const struct gfx_firmware_header_v2_0 *pfp_hdr;
2212         const __le32 *fw_ucode, *fw_data;
2213         unsigned i, pipe_id, fw_ucode_size, fw_data_size;
2214         uint32_t tmp;
2215         uint32_t usec_timeout = 50000;  /* wait for 50ms */
2216
2217         pfp_hdr = (const struct gfx_firmware_header_v2_0 *)
2218                 adev->gfx.pfp_fw->data;
2219
2220         amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
2221
2222         /* instruction */
2223         fw_ucode = (const __le32 *)(adev->gfx.pfp_fw->data +
2224                 le32_to_cpu(pfp_hdr->ucode_offset_bytes));
2225         fw_ucode_size = le32_to_cpu(pfp_hdr->ucode_size_bytes);
2226         /* data */
2227         fw_data = (const __le32 *)(adev->gfx.pfp_fw->data +
2228                 le32_to_cpu(pfp_hdr->data_offset_bytes));
2229         fw_data_size = le32_to_cpu(pfp_hdr->data_size_bytes);
2230
2231         /* 64kb align */
2232         r = amdgpu_bo_create_reserved(adev, fw_ucode_size,
2233                                       64 * 1024, AMDGPU_GEM_DOMAIN_VRAM,
2234                                       &adev->gfx.pfp.pfp_fw_obj,
2235                                       &adev->gfx.pfp.pfp_fw_gpu_addr,
2236                                       (void **)&adev->gfx.pfp.pfp_fw_ptr);
2237         if (r) {
2238                 dev_err(adev->dev, "(%d) failed to create pfp ucode fw bo\n", r);
2239                 gfx_v12_0_pfp_fini(adev);
2240                 return r;
2241         }
2242
2243         r = amdgpu_bo_create_reserved(adev, fw_data_size,
2244                                       64 * 1024, AMDGPU_GEM_DOMAIN_VRAM,
2245                                       &adev->gfx.pfp.pfp_fw_data_obj,
2246                                       &adev->gfx.pfp.pfp_fw_data_gpu_addr,
2247                                       (void **)&adev->gfx.pfp.pfp_fw_data_ptr);
2248         if (r) {
2249                 dev_err(adev->dev, "(%d) failed to create pfp data fw bo\n", r);
2250                 gfx_v12_0_pfp_fini(adev);
2251                 return r;
2252         }
2253
2254         memcpy(adev->gfx.pfp.pfp_fw_ptr, fw_ucode, fw_ucode_size);
2255         memcpy(adev->gfx.pfp.pfp_fw_data_ptr, fw_data, fw_data_size);
2256
2257         amdgpu_bo_kunmap(adev->gfx.pfp.pfp_fw_obj);
2258         amdgpu_bo_kunmap(adev->gfx.pfp.pfp_fw_data_obj);
2259         amdgpu_bo_unreserve(adev->gfx.pfp.pfp_fw_obj);
2260         amdgpu_bo_unreserve(adev->gfx.pfp.pfp_fw_data_obj);
2261
2262         if (amdgpu_emu_mode == 1)
2263                 adev->hdp.funcs->flush_hdp(adev, NULL);
2264
2265         WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_LO,
2266                 lower_32_bits(adev->gfx.pfp.pfp_fw_gpu_addr));
2267         WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_HI,
2268                 upper_32_bits(adev->gfx.pfp.pfp_fw_gpu_addr));
2269
2270         tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_CNTL);
2271         tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, VMID, 0);
2272         tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, CACHE_POLICY, 0);
2273         tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, EXE_DISABLE, 0);
2274         WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_CNTL, tmp);
2275
2276         /*
2277          * Programming any of the CP_PFP_IC_BASE registers
2278          * forces invalidation of the ME L1 I$. Wait for the
2279          * invalidation complete
2280          */
2281         for (i = 0; i < usec_timeout; i++) {
2282                 tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL);
2283                 if (1 == REG_GET_FIELD(tmp, CP_PFP_IC_OP_CNTL,
2284                         INVALIDATE_CACHE_COMPLETE))
2285                         break;
2286                 udelay(1);
2287         }
2288
2289         if (i >= usec_timeout) {
2290                 dev_err(adev->dev, "failed to invalidate instruction cache\n");
2291                 return -EINVAL;
2292         }
2293
2294         /* Prime the L1 instruction caches */
2295         tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL);
2296         tmp = REG_SET_FIELD(tmp, CP_PFP_IC_OP_CNTL, PRIME_ICACHE, 1);
2297         WREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL, tmp);
2298         /* Waiting for cache primed*/
2299         for (i = 0; i < usec_timeout; i++) {
2300                 tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL);
2301                 if (1 == REG_GET_FIELD(tmp, CP_PFP_IC_OP_CNTL,
2302                         ICACHE_PRIMED))
2303                         break;
2304                 udelay(1);
2305         }
2306
2307         if (i >= usec_timeout) {
2308                 dev_err(adev->dev, "failed to prime instruction cache\n");
2309                 return -EINVAL;
2310         }
2311
2312         mutex_lock(&adev->srbm_mutex);
2313         for (pipe_id = 0; pipe_id < adev->gfx.me.num_pipe_per_me; pipe_id++) {
2314                 soc24_grbm_select(adev, 0, pipe_id, 0, 0);
2315
2316                 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE0_LO,
2317                         lower_32_bits(adev->gfx.pfp.pfp_fw_data_gpu_addr));
2318                 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE0_HI,
2319                         upper_32_bits(adev->gfx.pfp.pfp_fw_data_gpu_addr));
2320         }
2321         soc24_grbm_select(adev, 0, 0, 0, 0);
2322         mutex_unlock(&adev->srbm_mutex);
2323
2324         tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL);
2325         tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, VMID, 0);
2326         tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, CACHE_POLICY, 0);
2327         WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL, tmp);
2328
2329         /* Invalidate the data caches */
2330         tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL);
2331         tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL, INVALIDATE_DCACHE, 1);
2332         WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL, tmp);
2333
2334         for (i = 0; i < usec_timeout; i++) {
2335                 tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL);
2336                 if (1 == REG_GET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL,
2337                         INVALIDATE_DCACHE_COMPLETE))
2338                         break;
2339                 udelay(1);
2340         }
2341
2342         if (i >= usec_timeout) {
2343                 dev_err(adev->dev, "failed to invalidate RS64 data cache\n");
2344                 return -EINVAL;
2345         }
2346
2347         gfx_v12_0_set_pfp_ucode_start_addr(adev);
2348
2349         return 0;
2350 }
2351
2352 static int gfx_v12_0_cp_gfx_load_me_microcode_rs64(struct amdgpu_device *adev)
2353 {
2354         int r;
2355         const struct gfx_firmware_header_v2_0 *me_hdr;
2356         const __le32 *fw_ucode, *fw_data;
2357         unsigned i, pipe_id, fw_ucode_size, fw_data_size;
2358         uint32_t tmp;
2359         uint32_t usec_timeout = 50000;  /* wait for 50ms */
2360
2361         me_hdr = (const struct gfx_firmware_header_v2_0 *)
2362                 adev->gfx.me_fw->data;
2363
2364         amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
2365
2366         /* instruction */
2367         fw_ucode = (const __le32 *)(adev->gfx.me_fw->data +
2368                 le32_to_cpu(me_hdr->ucode_offset_bytes));
2369         fw_ucode_size = le32_to_cpu(me_hdr->ucode_size_bytes);
2370         /* data */
2371         fw_data = (const __le32 *)(adev->gfx.me_fw->data +
2372                 le32_to_cpu(me_hdr->data_offset_bytes));
2373         fw_data_size = le32_to_cpu(me_hdr->data_size_bytes);
2374
2375         /* 64kb align*/
2376         r = amdgpu_bo_create_reserved(adev, fw_ucode_size,
2377                                       64 * 1024, AMDGPU_GEM_DOMAIN_VRAM,
2378                                       &adev->gfx.me.me_fw_obj,
2379                                       &adev->gfx.me.me_fw_gpu_addr,
2380                                       (void **)&adev->gfx.me.me_fw_ptr);
2381         if (r) {
2382                 dev_err(adev->dev, "(%d) failed to create me ucode bo\n", r);
2383                 gfx_v12_0_me_fini(adev);
2384                 return r;
2385         }
2386
2387         r = amdgpu_bo_create_reserved(adev, fw_data_size,
2388                                       64 * 1024, AMDGPU_GEM_DOMAIN_VRAM,
2389                                       &adev->gfx.me.me_fw_data_obj,
2390                                       &adev->gfx.me.me_fw_data_gpu_addr,
2391                                       (void **)&adev->gfx.me.me_fw_data_ptr);
2392         if (r) {
2393                 dev_err(adev->dev, "(%d) failed to create me data bo\n", r);
2394                 gfx_v12_0_pfp_fini(adev);
2395                 return r;
2396         }
2397
2398         memcpy(adev->gfx.me.me_fw_ptr, fw_ucode, fw_ucode_size);
2399         memcpy(adev->gfx.me.me_fw_data_ptr, fw_data, fw_data_size);
2400
2401         amdgpu_bo_kunmap(adev->gfx.me.me_fw_obj);
2402         amdgpu_bo_kunmap(adev->gfx.me.me_fw_data_obj);
2403         amdgpu_bo_unreserve(adev->gfx.me.me_fw_obj);
2404         amdgpu_bo_unreserve(adev->gfx.me.me_fw_data_obj);
2405
2406         if (amdgpu_emu_mode == 1)
2407                 adev->hdp.funcs->flush_hdp(adev, NULL);
2408
2409         WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_LO,
2410                 lower_32_bits(adev->gfx.me.me_fw_gpu_addr));
2411         WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_HI,
2412                 upper_32_bits(adev->gfx.me.me_fw_gpu_addr));
2413
2414         tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_BASE_CNTL);
2415         tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, VMID, 0);
2416         tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, CACHE_POLICY, 0);
2417         tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, EXE_DISABLE, 0);
2418         WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_CNTL, tmp);
2419
2420         /*
2421          * Programming any of the CP_ME_IC_BASE registers
2422          * forces invalidation of the ME L1 I$. Wait for the
2423          * invalidation complete
2424          */
2425         for (i = 0; i < usec_timeout; i++) {
2426                 tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL);
2427                 if (1 == REG_GET_FIELD(tmp, CP_ME_IC_OP_CNTL,
2428                         INVALIDATE_CACHE_COMPLETE))
2429                         break;
2430                 udelay(1);
2431         }
2432
2433         if (i >= usec_timeout) {
2434                 dev_err(adev->dev, "failed to invalidate instruction cache\n");
2435                 return -EINVAL;
2436         }
2437
2438         /* Prime the instruction caches */
2439         tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL);
2440         tmp = REG_SET_FIELD(tmp, CP_ME_IC_OP_CNTL, PRIME_ICACHE, 1);
2441         WREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL, tmp);
2442
2443         /* Waiting for instruction cache primed*/
2444         for (i = 0; i < usec_timeout; i++) {
2445                 tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL);
2446                 if (1 == REG_GET_FIELD(tmp, CP_ME_IC_OP_CNTL,
2447                         ICACHE_PRIMED))
2448                         break;
2449                 udelay(1);
2450         }
2451
2452         if (i >= usec_timeout) {
2453                 dev_err(adev->dev, "failed to prime instruction cache\n");
2454                 return -EINVAL;
2455         }
2456
2457         mutex_lock(&adev->srbm_mutex);
2458         for (pipe_id = 0; pipe_id < adev->gfx.me.num_pipe_per_me; pipe_id++) {
2459                 soc24_grbm_select(adev, 0, pipe_id, 0, 0);
2460
2461                 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE1_LO,
2462                         lower_32_bits(adev->gfx.me.me_fw_data_gpu_addr));
2463                 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE1_HI,
2464                         upper_32_bits(adev->gfx.me.me_fw_data_gpu_addr));
2465         }
2466         soc24_grbm_select(adev, 0, 0, 0, 0);
2467         mutex_unlock(&adev->srbm_mutex);
2468
2469         tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL);
2470         tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, VMID, 0);
2471         tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, CACHE_POLICY, 0);
2472         WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL, tmp);
2473
2474         /* Invalidate the data caches */
2475         tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL);
2476         tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL, INVALIDATE_DCACHE, 1);
2477         WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL, tmp);
2478
2479         for (i = 0; i < usec_timeout; i++) {
2480                 tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL);
2481                 if (1 == REG_GET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL,
2482                         INVALIDATE_DCACHE_COMPLETE))
2483                         break;
2484                 udelay(1);
2485         }
2486
2487         if (i >= usec_timeout) {
2488                 dev_err(adev->dev, "failed to invalidate RS64 data cache\n");
2489                 return -EINVAL;
2490         }
2491
2492         gfx_v12_0_set_me_ucode_start_addr(adev);
2493
2494         return 0;
2495 }
2496
2497 static int gfx_v12_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
2498 {
2499         int r;
2500
2501         if (!adev->gfx.me_fw || !adev->gfx.pfp_fw)
2502                 return -EINVAL;
2503
2504         gfx_v12_0_cp_gfx_enable(adev, false);
2505
2506         r = gfx_v12_0_cp_gfx_load_pfp_microcode_rs64(adev);
2507         if (r) {
2508                 dev_err(adev->dev, "(%d) failed to load pfp fw\n", r);
2509                 return r;
2510         }
2511
2512         r = gfx_v12_0_cp_gfx_load_me_microcode_rs64(adev);
2513         if (r) {
2514                 dev_err(adev->dev, "(%d) failed to load me fw\n", r);
2515                 return r;
2516         }
2517
2518         return 0;
2519 }
2520
2521 static int gfx_v12_0_cp_gfx_start(struct amdgpu_device *adev)
2522 {
2523         /* init the CP */
2524         WREG32_SOC15(GC, 0, regCP_MAX_CONTEXT,
2525                      adev->gfx.config.max_hw_contexts - 1);
2526         WREG32_SOC15(GC, 0, regCP_DEVICE_ID, 1);
2527
2528         if (!amdgpu_async_gfx_ring)
2529                 gfx_v12_0_cp_gfx_enable(adev, true);
2530
2531         return 0;
2532 }
2533
2534 static void gfx_v12_0_cp_gfx_switch_pipe(struct amdgpu_device *adev,
2535                                          CP_PIPE_ID pipe)
2536 {
2537         u32 tmp;
2538
2539         tmp = RREG32_SOC15(GC, 0, regGRBM_GFX_CNTL);
2540         tmp = REG_SET_FIELD(tmp, GRBM_GFX_CNTL, PIPEID, pipe);
2541
2542         WREG32_SOC15(GC, 0, regGRBM_GFX_CNTL, tmp);
2543 }
2544
2545 static void gfx_v12_0_cp_gfx_set_doorbell(struct amdgpu_device *adev,
2546                                           struct amdgpu_ring *ring)
2547 {
2548         u32 tmp;
2549
2550         tmp = RREG32_SOC15(GC, 0, regCP_RB_DOORBELL_CONTROL);
2551         if (ring->use_doorbell) {
2552                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
2553                                     DOORBELL_OFFSET, ring->doorbell_index);
2554                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
2555                                     DOORBELL_EN, 1);
2556         } else {
2557                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
2558                                     DOORBELL_EN, 0);
2559         }
2560         WREG32_SOC15(GC, 0, regCP_RB_DOORBELL_CONTROL, tmp);
2561
2562         tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
2563                             DOORBELL_RANGE_LOWER, ring->doorbell_index);
2564         WREG32_SOC15(GC, 0, regCP_RB_DOORBELL_RANGE_LOWER, tmp);
2565
2566         WREG32_SOC15(GC, 0, regCP_RB_DOORBELL_RANGE_UPPER,
2567                      CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
2568 }
2569
2570 static int gfx_v12_0_cp_gfx_resume(struct amdgpu_device *adev)
2571 {
2572         struct amdgpu_ring *ring;
2573         u32 tmp;
2574         u32 rb_bufsz;
2575         u64 rb_addr, rptr_addr, wptr_gpu_addr;
2576         u32 i;
2577
2578         /* Set the write pointer delay */
2579         WREG32_SOC15(GC, 0, regCP_RB_WPTR_DELAY, 0);
2580
2581         /* set the RB to use vmid 0 */
2582         WREG32_SOC15(GC, 0, regCP_RB_VMID, 0);
2583
2584         /* Init gfx ring 0 for pipe 0 */
2585         mutex_lock(&adev->srbm_mutex);
2586         gfx_v12_0_cp_gfx_switch_pipe(adev, PIPE_ID0);
2587
2588         /* Set ring buffer size */
2589         ring = &adev->gfx.gfx_ring[0];
2590         rb_bufsz = order_base_2(ring->ring_size / 8);
2591         tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
2592         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
2593         WREG32_SOC15(GC, 0, regCP_RB0_CNTL, tmp);
2594
2595         /* Initialize the ring buffer's write pointers */
2596         ring->wptr = 0;
2597         WREG32_SOC15(GC, 0, regCP_RB0_WPTR, lower_32_bits(ring->wptr));
2598         WREG32_SOC15(GC, 0, regCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
2599
2600         /* set the wb address wether it's enabled or not */
2601         rptr_addr = ring->rptr_gpu_addr;
2602         WREG32_SOC15(GC, 0, regCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
2603         WREG32_SOC15(GC, 0, regCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) &
2604                      CP_RB_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK);
2605
2606         wptr_gpu_addr = ring->wptr_gpu_addr;
2607         WREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_ADDR_LO,
2608                      lower_32_bits(wptr_gpu_addr));
2609         WREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_ADDR_HI,
2610                      upper_32_bits(wptr_gpu_addr));
2611
2612         mdelay(1);
2613         WREG32_SOC15(GC, 0, regCP_RB0_CNTL, tmp);
2614
2615         rb_addr = ring->gpu_addr >> 8;
2616         WREG32_SOC15(GC, 0, regCP_RB0_BASE, rb_addr);
2617         WREG32_SOC15(GC, 0, regCP_RB0_BASE_HI, upper_32_bits(rb_addr));
2618
2619         WREG32_SOC15(GC, 0, regCP_RB_ACTIVE, 1);
2620
2621         gfx_v12_0_cp_gfx_set_doorbell(adev, ring);
2622         mutex_unlock(&adev->srbm_mutex);
2623
2624         /* Switch to pipe 0 */
2625         mutex_lock(&adev->srbm_mutex);
2626         gfx_v12_0_cp_gfx_switch_pipe(adev, PIPE_ID0);
2627         mutex_unlock(&adev->srbm_mutex);
2628
2629         /* start the ring */
2630         gfx_v12_0_cp_gfx_start(adev);
2631
2632         for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
2633                 ring = &adev->gfx.gfx_ring[i];
2634                 ring->sched.ready = true;
2635         }
2636
2637         return 0;
2638 }
2639
2640 static void gfx_v12_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
2641 {
2642         u32 data;
2643
2644         data = RREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL);
2645         data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_INVALIDATE_ICACHE,
2646                                                  enable ? 0 : 1);
2647         data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE0_RESET,
2648                                                  enable ? 0 : 1);
2649         data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE1_RESET,
2650                                                  enable ? 0 : 1);
2651         data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE2_RESET,
2652                                                  enable ? 0 : 1);
2653         data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE3_RESET,
2654                                                  enable ? 0 : 1);
2655         data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE0_ACTIVE,
2656                                                  enable ? 1 : 0);
2657         data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE1_ACTIVE,
2658                                                  enable ? 1 : 0);
2659         data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE2_ACTIVE,
2660                                                  enable ? 1 : 0);
2661         data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE3_ACTIVE,
2662                                                  enable ? 1 : 0);
2663         data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_HALT,
2664                                                  enable ? 0 : 1);
2665         WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, data);
2666
2667         adev->gfx.kiq[0].ring.sched.ready = enable;
2668
2669         udelay(50);
2670 }
2671
2672 static int gfx_v12_0_cp_compute_load_microcode_rs64(struct amdgpu_device *adev)
2673 {
2674         const struct gfx_firmware_header_v2_0 *mec_hdr;
2675         const __le32 *fw_ucode, *fw_data;
2676         u32 tmp, fw_ucode_size, fw_data_size;
2677         u32 i, usec_timeout = 50000; /* Wait for 50 ms */
2678         u32 *fw_ucode_ptr, *fw_data_ptr;
2679         int r;
2680
2681         if (!adev->gfx.mec_fw)
2682                 return -EINVAL;
2683
2684         gfx_v12_0_cp_compute_enable(adev, false);
2685
2686         mec_hdr = (const struct gfx_firmware_header_v2_0 *)adev->gfx.mec_fw->data;
2687         amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
2688
2689         fw_ucode = (const __le32 *) (adev->gfx.mec_fw->data +
2690                                 le32_to_cpu(mec_hdr->ucode_offset_bytes));
2691         fw_ucode_size = le32_to_cpu(mec_hdr->ucode_size_bytes);
2692
2693         fw_data = (const __le32 *) (adev->gfx.mec_fw->data +
2694                                 le32_to_cpu(mec_hdr->data_offset_bytes));
2695         fw_data_size = le32_to_cpu(mec_hdr->data_size_bytes);
2696
2697         r = amdgpu_bo_create_reserved(adev, fw_ucode_size,
2698                                       64 * 1024, AMDGPU_GEM_DOMAIN_VRAM,
2699                                       &adev->gfx.mec.mec_fw_obj,
2700                                       &adev->gfx.mec.mec_fw_gpu_addr,
2701                                       (void **)&fw_ucode_ptr);
2702         if (r) {
2703                 dev_err(adev->dev, "(%d) failed to create mec fw ucode bo\n", r);
2704                 gfx_v12_0_mec_fini(adev);
2705                 return r;
2706         }
2707
2708         r = amdgpu_bo_create_reserved(adev,
2709                                       ALIGN(fw_data_size, 64 * 1024) *
2710                                       adev->gfx.mec.num_pipe_per_mec,
2711                                       64 * 1024, AMDGPU_GEM_DOMAIN_VRAM,
2712                                       &adev->gfx.mec.mec_fw_data_obj,
2713                                       &adev->gfx.mec.mec_fw_data_gpu_addr,
2714                                       (void **)&fw_data_ptr);
2715         if (r) {
2716                 dev_err(adev->dev, "(%d) failed to create mec fw ucode bo\n", r);
2717                 gfx_v12_0_mec_fini(adev);
2718                 return r;
2719         }
2720
2721         memcpy(fw_ucode_ptr, fw_ucode, fw_ucode_size);
2722         for (i = 0; i < adev->gfx.mec.num_pipe_per_mec; i++) {
2723                 memcpy(fw_data_ptr + i * ALIGN(fw_data_size, 64 * 1024) / 4, fw_data, fw_data_size);
2724         }
2725
2726         amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj);
2727         amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_data_obj);
2728         amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj);
2729         amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_data_obj);
2730
2731         tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_CNTL);
2732         tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, VMID, 0);
2733         tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, EXE_DISABLE, 0);
2734         tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0);
2735         WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_CNTL, tmp);
2736
2737         tmp = RREG32_SOC15(GC, 0, regCP_MEC_DC_BASE_CNTL);
2738         tmp = REG_SET_FIELD(tmp, CP_MEC_DC_BASE_CNTL, VMID, 0);
2739         tmp = REG_SET_FIELD(tmp, CP_MEC_DC_BASE_CNTL, CACHE_POLICY, 0);
2740         WREG32_SOC15(GC, 0, regCP_MEC_DC_BASE_CNTL, tmp);
2741
2742         mutex_lock(&adev->srbm_mutex);
2743         for (i = 0; i < adev->gfx.mec.num_pipe_per_mec; i++) {
2744                 soc24_grbm_select(adev, 1, i, 0, 0);
2745
2746                 WREG32_SOC15(GC, 0, regCP_MEC_MDBASE_LO,
2747                              lower_32_bits(adev->gfx.mec.mec_fw_data_gpu_addr +
2748                                            i * ALIGN(fw_data_size, 64 * 1024)));
2749                 WREG32_SOC15(GC, 0, regCP_MEC_MDBASE_HI,
2750                              upper_32_bits(adev->gfx.mec.mec_fw_data_gpu_addr +
2751                                            i * ALIGN(fw_data_size, 64 * 1024)));
2752
2753                 WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_LO,
2754                              lower_32_bits(adev->gfx.mec.mec_fw_gpu_addr));
2755                 WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_HI,
2756                              upper_32_bits(adev->gfx.mec.mec_fw_gpu_addr));
2757         }
2758         mutex_unlock(&adev->srbm_mutex);
2759         soc24_grbm_select(adev, 0, 0, 0, 0);
2760
2761         /* Trigger an invalidation of the L1 instruction caches */
2762         tmp = RREG32_SOC15(GC, 0, regCP_MEC_DC_OP_CNTL);
2763         tmp = REG_SET_FIELD(tmp, CP_MEC_DC_OP_CNTL, INVALIDATE_DCACHE, 1);
2764         WREG32_SOC15(GC, 0, regCP_MEC_DC_OP_CNTL, tmp);
2765
2766         /* Wait for invalidation complete */
2767         for (i = 0; i < usec_timeout; i++) {
2768                 tmp = RREG32_SOC15(GC, 0, regCP_MEC_DC_OP_CNTL);
2769                 if (1 == REG_GET_FIELD(tmp, CP_MEC_DC_OP_CNTL,
2770                                        INVALIDATE_DCACHE_COMPLETE))
2771                         break;
2772                 udelay(1);
2773         }
2774
2775         if (i >= usec_timeout) {
2776                 dev_err(adev->dev, "failed to invalidate instruction cache\n");
2777                 return -EINVAL;
2778         }
2779
2780         /* Trigger an invalidation of the L1 instruction caches */
2781         tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL);
2782         tmp = REG_SET_FIELD(tmp, CP_CPC_IC_OP_CNTL, INVALIDATE_CACHE, 1);
2783         WREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL, tmp);
2784
2785         /* Wait for invalidation complete */
2786         for (i = 0; i < usec_timeout; i++) {
2787                 tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL);
2788                 if (1 == REG_GET_FIELD(tmp, CP_CPC_IC_OP_CNTL,
2789                                        INVALIDATE_CACHE_COMPLETE))
2790                         break;
2791                 udelay(1);
2792         }
2793
2794         if (i >= usec_timeout) {
2795                 dev_err(adev->dev, "failed to invalidate instruction cache\n");
2796                 return -EINVAL;
2797         }
2798
2799         gfx_v12_0_set_mec_ucode_start_addr(adev);
2800
2801         return 0;
2802 }
2803
2804 static void gfx_v12_0_kiq_setting(struct amdgpu_ring *ring)
2805 {
2806         uint32_t tmp;
2807         struct amdgpu_device *adev = ring->adev;
2808
2809         /* tell RLC which is KIQ queue */
2810         tmp = RREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS);
2811         tmp &= 0xffffff00;
2812         tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
2813         WREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS, tmp);
2814         tmp |= 0x80;
2815         WREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS, tmp);
2816 }
2817
2818 static void gfx_v12_0_cp_set_doorbell_range(struct amdgpu_device *adev)
2819 {
2820         /* set graphics engine doorbell range */
2821         WREG32_SOC15(GC, 0, regCP_RB_DOORBELL_RANGE_LOWER,
2822                      (adev->doorbell_index.gfx_ring0 * 2) << 2);
2823         WREG32_SOC15(GC, 0, regCP_RB_DOORBELL_RANGE_UPPER,
2824                      (adev->doorbell_index.gfx_userqueue_end * 2) << 2);
2825
2826         /* set compute engine doorbell range */
2827         WREG32_SOC15(GC, 0, regCP_MEC_DOORBELL_RANGE_LOWER,
2828                      (adev->doorbell_index.kiq * 2) << 2);
2829         WREG32_SOC15(GC, 0, regCP_MEC_DOORBELL_RANGE_UPPER,
2830                      (adev->doorbell_index.userqueue_end * 2) << 2);
2831 }
2832
2833 static int gfx_v12_0_gfx_mqd_init(struct amdgpu_device *adev, void *m,
2834                                   struct amdgpu_mqd_prop *prop)
2835 {
2836         struct v12_gfx_mqd *mqd = m;
2837         uint64_t hqd_gpu_addr, wb_gpu_addr;
2838         uint32_t tmp;
2839         uint32_t rb_bufsz;
2840
2841         /* set up gfx hqd wptr */
2842         mqd->cp_gfx_hqd_wptr = 0;
2843         mqd->cp_gfx_hqd_wptr_hi = 0;
2844
2845         /* set the pointer to the MQD */
2846         mqd->cp_mqd_base_addr = prop->mqd_gpu_addr & 0xfffffffc;
2847         mqd->cp_mqd_base_addr_hi = upper_32_bits(prop->mqd_gpu_addr);
2848
2849         /* set up mqd control */
2850         tmp = RREG32_SOC15(GC, 0, regCP_GFX_MQD_CONTROL);
2851         tmp = REG_SET_FIELD(tmp, CP_GFX_MQD_CONTROL, VMID, 0);
2852         tmp = REG_SET_FIELD(tmp, CP_GFX_MQD_CONTROL, PRIV_STATE, 1);
2853         tmp = REG_SET_FIELD(tmp, CP_GFX_MQD_CONTROL, CACHE_POLICY, 0);
2854         mqd->cp_gfx_mqd_control = tmp;
2855
2856         /* set up gfx_hqd_vimd with 0x0 to indicate the ring buffer's vmid */
2857         tmp = RREG32_SOC15(GC, 0, regCP_GFX_HQD_VMID);
2858         tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_VMID, VMID, 0);
2859         mqd->cp_gfx_hqd_vmid = 0;
2860
2861         /* set up default queue priority level
2862          * 0x0 = low priority, 0x1 = high priority */
2863         tmp = RREG32_SOC15(GC, 0, regCP_GFX_HQD_QUEUE_PRIORITY);
2864         tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_QUEUE_PRIORITY, PRIORITY_LEVEL, 0);
2865         mqd->cp_gfx_hqd_queue_priority = tmp;
2866
2867         /* set up time quantum */
2868         tmp = RREG32_SOC15(GC, 0, regCP_GFX_HQD_QUANTUM);
2869         tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_QUANTUM, QUANTUM_EN, 1);
2870         mqd->cp_gfx_hqd_quantum = tmp;
2871
2872         /* set up gfx hqd base. this is similar as CP_RB_BASE */
2873         hqd_gpu_addr = prop->hqd_base_gpu_addr >> 8;
2874         mqd->cp_gfx_hqd_base = hqd_gpu_addr;
2875         mqd->cp_gfx_hqd_base_hi = upper_32_bits(hqd_gpu_addr);
2876
2877         /* set up hqd_rptr_addr/_hi, similar as CP_RB_RPTR */
2878         wb_gpu_addr = prop->rptr_gpu_addr;
2879         mqd->cp_gfx_hqd_rptr_addr = wb_gpu_addr & 0xfffffffc;
2880         mqd->cp_gfx_hqd_rptr_addr_hi =
2881                 upper_32_bits(wb_gpu_addr) & 0xffff;
2882
2883         /* set up rb_wptr_poll addr */
2884         wb_gpu_addr = prop->wptr_gpu_addr;
2885         mqd->cp_rb_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
2886         mqd->cp_rb_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
2887
2888         /* set up the gfx_hqd_control, similar as CP_RB0_CNTL */
2889         rb_bufsz = order_base_2(prop->queue_size / 4) - 1;
2890         tmp = RREG32_SOC15(GC, 0, regCP_GFX_HQD_CNTL);
2891         tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, RB_BUFSZ, rb_bufsz);
2892         tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, RB_BLKSZ, rb_bufsz - 2);
2893 #ifdef __BIG_ENDIAN
2894         tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, BUF_SWAP, 1);
2895 #endif
2896         mqd->cp_gfx_hqd_cntl = tmp;
2897
2898         /* set up cp_doorbell_control */
2899         tmp = RREG32_SOC15(GC, 0, regCP_RB_DOORBELL_CONTROL);
2900         if (prop->use_doorbell) {
2901                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
2902                                     DOORBELL_OFFSET, prop->doorbell_index);
2903                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
2904                                     DOORBELL_EN, 1);
2905         } else
2906                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
2907                                     DOORBELL_EN, 0);
2908         mqd->cp_rb_doorbell_control = tmp;
2909
2910         /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
2911         mqd->cp_gfx_hqd_rptr = RREG32_SOC15(GC, 0, regCP_GFX_HQD_RPTR);
2912
2913         /* active the queue */
2914         mqd->cp_gfx_hqd_active = 1;
2915
2916         return 0;
2917 }
2918
2919 static int gfx_v12_0_kgq_init_queue(struct amdgpu_ring *ring, bool reset)
2920 {
2921         struct amdgpu_device *adev = ring->adev;
2922         struct v12_gfx_mqd *mqd = ring->mqd_ptr;
2923         int mqd_idx = ring - &adev->gfx.gfx_ring[0];
2924
2925         if (!reset && !amdgpu_in_reset(adev) && !adev->in_suspend) {
2926                 memset((void *)mqd, 0, sizeof(*mqd));
2927                 mutex_lock(&adev->srbm_mutex);
2928                 soc24_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
2929                 amdgpu_ring_init_mqd(ring);
2930                 soc24_grbm_select(adev, 0, 0, 0, 0);
2931                 mutex_unlock(&adev->srbm_mutex);
2932                 if (adev->gfx.me.mqd_backup[mqd_idx])
2933                         memcpy_fromio(adev->gfx.me.mqd_backup[mqd_idx], mqd, sizeof(*mqd));
2934         } else {
2935                 /* restore mqd with the backup copy */
2936                 if (adev->gfx.me.mqd_backup[mqd_idx])
2937                         memcpy_toio(mqd, adev->gfx.me.mqd_backup[mqd_idx], sizeof(*mqd));
2938                 /* reset the ring */
2939                 ring->wptr = 0;
2940                 *ring->wptr_cpu_addr = 0;
2941                 amdgpu_ring_clear_ring(ring);
2942         }
2943
2944         return 0;
2945 }
2946
2947 static int gfx_v12_0_cp_async_gfx_ring_resume(struct amdgpu_device *adev)
2948 {
2949         int r, i;
2950         struct amdgpu_ring *ring;
2951
2952         for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
2953                 ring = &adev->gfx.gfx_ring[i];
2954
2955                 r = amdgpu_bo_reserve(ring->mqd_obj, false);
2956                 if (unlikely(r != 0))
2957                         goto done;
2958
2959                 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
2960                 if (!r) {
2961                         r = gfx_v12_0_kgq_init_queue(ring, false);
2962                         amdgpu_bo_kunmap(ring->mqd_obj);
2963                         ring->mqd_ptr = NULL;
2964                 }
2965                 amdgpu_bo_unreserve(ring->mqd_obj);
2966                 if (r)
2967                         goto done;
2968         }
2969
2970         r = amdgpu_gfx_enable_kgq(adev, 0);
2971         if (r)
2972                 goto done;
2973
2974         r = gfx_v12_0_cp_gfx_start(adev);
2975         if (r)
2976                 goto done;
2977
2978         for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
2979                 ring = &adev->gfx.gfx_ring[i];
2980                 ring->sched.ready = true;
2981         }
2982 done:
2983         return r;
2984 }
2985
2986 static int gfx_v12_0_compute_mqd_init(struct amdgpu_device *adev, void *m,
2987                                       struct amdgpu_mqd_prop *prop)
2988 {
2989         struct v12_compute_mqd *mqd = m;
2990         uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
2991         uint32_t tmp;
2992
2993         mqd->header = 0xC0310800;
2994         mqd->compute_pipelinestat_enable = 0x00000001;
2995         mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
2996         mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
2997         mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
2998         mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
2999         mqd->compute_misc_reserved = 0x00000007;
3000
3001         eop_base_addr = prop->eop_gpu_addr >> 8;
3002         mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
3003         mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
3004
3005         /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
3006         tmp = RREG32_SOC15(GC, 0, regCP_HQD_EOP_CONTROL);
3007         tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
3008                         (order_base_2(GFX12_MEC_HPD_SIZE / 4) - 1));
3009
3010         mqd->cp_hqd_eop_control = tmp;
3011
3012         /* enable doorbell? */
3013         tmp = RREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL);
3014
3015         if (prop->use_doorbell) {
3016                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3017                                     DOORBELL_OFFSET, prop->doorbell_index);
3018                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3019                                     DOORBELL_EN, 1);
3020                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3021                                     DOORBELL_SOURCE, 0);
3022                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3023                                     DOORBELL_HIT, 0);
3024         } else {
3025                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3026                                     DOORBELL_EN, 0);
3027         }
3028
3029         mqd->cp_hqd_pq_doorbell_control = tmp;
3030
3031         /* disable the queue if it's active */
3032         mqd->cp_hqd_dequeue_request = 0;
3033         mqd->cp_hqd_pq_rptr = 0;
3034         mqd->cp_hqd_pq_wptr_lo = 0;
3035         mqd->cp_hqd_pq_wptr_hi = 0;
3036
3037         /* set the pointer to the MQD */
3038         mqd->cp_mqd_base_addr_lo = prop->mqd_gpu_addr & 0xfffffffc;
3039         mqd->cp_mqd_base_addr_hi = upper_32_bits(prop->mqd_gpu_addr);
3040
3041         /* set MQD vmid to 0 */
3042         tmp = RREG32_SOC15(GC, 0, regCP_MQD_CONTROL);
3043         tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
3044         mqd->cp_mqd_control = tmp;
3045
3046         /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3047         hqd_gpu_addr = prop->hqd_base_gpu_addr >> 8;
3048         mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
3049         mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
3050
3051         /* set up the HQD, this is similar to CP_RB0_CNTL */
3052         tmp = RREG32_SOC15(GC, 0, regCP_HQD_PQ_CONTROL);
3053         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
3054                             (order_base_2(prop->queue_size / 4) - 1));
3055         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
3056                             (order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1));
3057         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 1);
3058         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, TUNNEL_DISPATCH, 0);
3059         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
3060         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
3061         mqd->cp_hqd_pq_control = tmp;
3062
3063         /* set the wb address whether it's enabled or not */
3064         wb_gpu_addr = prop->rptr_gpu_addr;
3065         mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
3066         mqd->cp_hqd_pq_rptr_report_addr_hi =
3067                 upper_32_bits(wb_gpu_addr) & 0xffff;
3068
3069         /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
3070         wb_gpu_addr = prop->wptr_gpu_addr;
3071         mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
3072         mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
3073
3074         tmp = 0;
3075         /* enable the doorbell if requested */
3076         if (prop->use_doorbell) {
3077                 tmp = RREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL);
3078                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3079                                 DOORBELL_OFFSET, prop->doorbell_index);
3080
3081                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3082                                     DOORBELL_EN, 1);
3083                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3084                                     DOORBELL_SOURCE, 0);
3085                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3086                                     DOORBELL_HIT, 0);
3087         }
3088
3089         mqd->cp_hqd_pq_doorbell_control = tmp;
3090
3091         /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3092         mqd->cp_hqd_pq_rptr = RREG32_SOC15(GC, 0, regCP_HQD_PQ_RPTR);
3093
3094         /* set the vmid for the queue */
3095         mqd->cp_hqd_vmid = 0;
3096
3097         tmp = RREG32_SOC15(GC, 0, regCP_HQD_PERSISTENT_STATE);
3098         tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x55);
3099         mqd->cp_hqd_persistent_state = tmp;
3100
3101         /* set MIN_IB_AVAIL_SIZE */
3102         tmp = RREG32_SOC15(GC, 0, regCP_HQD_IB_CONTROL);
3103         tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
3104         mqd->cp_hqd_ib_control = tmp;
3105
3106         /* set static priority for a compute queue/ring */
3107         mqd->cp_hqd_pipe_priority = prop->hqd_pipe_priority;
3108         mqd->cp_hqd_queue_priority = prop->hqd_queue_priority;
3109
3110         mqd->cp_hqd_active = prop->hqd_active;
3111
3112         return 0;
3113 }
3114
3115 static int gfx_v12_0_kiq_init_register(struct amdgpu_ring *ring)
3116 {
3117         struct amdgpu_device *adev = ring->adev;
3118         struct v12_compute_mqd *mqd = ring->mqd_ptr;
3119         int j;
3120
3121         /* inactivate the queue */
3122         if (amdgpu_sriov_vf(adev))
3123                 WREG32_SOC15(GC, 0, regCP_HQD_ACTIVE, 0);
3124
3125         /* disable wptr polling */
3126         WREG32_FIELD15_PREREG(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
3127
3128         /* write the EOP addr */
3129         WREG32_SOC15(GC, 0, regCP_HQD_EOP_BASE_ADDR,
3130                mqd->cp_hqd_eop_base_addr_lo);
3131         WREG32_SOC15(GC, 0, regCP_HQD_EOP_BASE_ADDR_HI,
3132                mqd->cp_hqd_eop_base_addr_hi);
3133
3134         /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
3135         WREG32_SOC15(GC, 0, regCP_HQD_EOP_CONTROL,
3136                mqd->cp_hqd_eop_control);
3137
3138         /* enable doorbell? */
3139         WREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL,
3140                mqd->cp_hqd_pq_doorbell_control);
3141
3142         /* disable the queue if it's active */
3143         if (RREG32_SOC15(GC, 0, regCP_HQD_ACTIVE) & 1) {
3144                 WREG32_SOC15(GC, 0, regCP_HQD_DEQUEUE_REQUEST, 1);
3145                 for (j = 0; j < adev->usec_timeout; j++) {
3146                         if (!(RREG32_SOC15(GC, 0, regCP_HQD_ACTIVE) & 1))
3147                                 break;
3148                         udelay(1);
3149                 }
3150                 WREG32_SOC15(GC, 0, regCP_HQD_DEQUEUE_REQUEST,
3151                        mqd->cp_hqd_dequeue_request);
3152                 WREG32_SOC15(GC, 0, regCP_HQD_PQ_RPTR,
3153                        mqd->cp_hqd_pq_rptr);
3154                 WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_LO,
3155                        mqd->cp_hqd_pq_wptr_lo);
3156                 WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_HI,
3157                        mqd->cp_hqd_pq_wptr_hi);
3158         }
3159
3160         /* set the pointer to the MQD */
3161         WREG32_SOC15(GC, 0, regCP_MQD_BASE_ADDR,
3162                mqd->cp_mqd_base_addr_lo);
3163         WREG32_SOC15(GC, 0, regCP_MQD_BASE_ADDR_HI,
3164                mqd->cp_mqd_base_addr_hi);
3165
3166         /* set MQD vmid to 0 */
3167         WREG32_SOC15(GC, 0, regCP_MQD_CONTROL,
3168                mqd->cp_mqd_control);
3169
3170         /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3171         WREG32_SOC15(GC, 0, regCP_HQD_PQ_BASE,
3172                mqd->cp_hqd_pq_base_lo);
3173         WREG32_SOC15(GC, 0, regCP_HQD_PQ_BASE_HI,
3174                mqd->cp_hqd_pq_base_hi);
3175
3176         /* set up the HQD, this is similar to CP_RB0_CNTL */
3177         WREG32_SOC15(GC, 0, regCP_HQD_PQ_CONTROL,
3178                mqd->cp_hqd_pq_control);
3179
3180         /* set the wb address whether it's enabled or not */
3181         WREG32_SOC15(GC, 0, regCP_HQD_PQ_RPTR_REPORT_ADDR,
3182                 mqd->cp_hqd_pq_rptr_report_addr_lo);
3183         WREG32_SOC15(GC, 0, regCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
3184                 mqd->cp_hqd_pq_rptr_report_addr_hi);
3185
3186         /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
3187         WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_POLL_ADDR,
3188                mqd->cp_hqd_pq_wptr_poll_addr_lo);
3189         WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_POLL_ADDR_HI,
3190                mqd->cp_hqd_pq_wptr_poll_addr_hi);
3191
3192         /* enable the doorbell if requested */
3193         if (ring->use_doorbell) {
3194                 WREG32_SOC15(GC, 0, regCP_MEC_DOORBELL_RANGE_LOWER,
3195                         (adev->doorbell_index.kiq * 2) << 2);
3196                 WREG32_SOC15(GC, 0, regCP_MEC_DOORBELL_RANGE_UPPER,
3197                         (adev->doorbell_index.userqueue_end * 2) << 2);
3198         }
3199
3200         WREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL,
3201                mqd->cp_hqd_pq_doorbell_control);
3202
3203         /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3204         WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_LO,
3205                mqd->cp_hqd_pq_wptr_lo);
3206         WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_HI,
3207                mqd->cp_hqd_pq_wptr_hi);
3208
3209         /* set the vmid for the queue */
3210         WREG32_SOC15(GC, 0, regCP_HQD_VMID, mqd->cp_hqd_vmid);
3211
3212         WREG32_SOC15(GC, 0, regCP_HQD_PERSISTENT_STATE,
3213                mqd->cp_hqd_persistent_state);
3214
3215         /* activate the queue */
3216         WREG32_SOC15(GC, 0, regCP_HQD_ACTIVE,
3217                mqd->cp_hqd_active);
3218
3219         if (ring->use_doorbell)
3220                 WREG32_FIELD15_PREREG(GC, 0, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
3221
3222         return 0;
3223 }
3224
3225 static int gfx_v12_0_kiq_init_queue(struct amdgpu_ring *ring)
3226 {
3227         struct amdgpu_device *adev = ring->adev;
3228         struct v12_compute_mqd *mqd = ring->mqd_ptr;
3229         int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS;
3230
3231         gfx_v12_0_kiq_setting(ring);
3232
3233         if (amdgpu_in_reset(adev)) { /* for GPU_RESET case */
3234                 /* reset MQD to a clean status */
3235                 if (adev->gfx.mec.mqd_backup[mqd_idx])
3236                         memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(*mqd));
3237
3238                 /* reset ring buffer */
3239                 ring->wptr = 0;
3240                 amdgpu_ring_clear_ring(ring);
3241
3242                 mutex_lock(&adev->srbm_mutex);
3243                 soc24_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3244                 gfx_v12_0_kiq_init_register(ring);
3245                 soc24_grbm_select(adev, 0, 0, 0, 0);
3246                 mutex_unlock(&adev->srbm_mutex);
3247         } else {
3248                 memset((void *)mqd, 0, sizeof(*mqd));
3249                 if (amdgpu_sriov_vf(adev) && adev->in_suspend)
3250                         amdgpu_ring_clear_ring(ring);
3251                 mutex_lock(&adev->srbm_mutex);
3252                 soc24_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3253                 amdgpu_ring_init_mqd(ring);
3254                 gfx_v12_0_kiq_init_register(ring);
3255                 soc24_grbm_select(adev, 0, 0, 0, 0);
3256                 mutex_unlock(&adev->srbm_mutex);
3257
3258                 if (adev->gfx.mec.mqd_backup[mqd_idx])
3259                         memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(*mqd));
3260         }
3261
3262         return 0;
3263 }
3264
3265 static int gfx_v12_0_kcq_init_queue(struct amdgpu_ring *ring, bool reset)
3266 {
3267         struct amdgpu_device *adev = ring->adev;
3268         struct v12_compute_mqd *mqd = ring->mqd_ptr;
3269         int mqd_idx = ring - &adev->gfx.compute_ring[0];
3270
3271         if (!reset && !amdgpu_in_reset(adev) && !adev->in_suspend) {
3272                 memset((void *)mqd, 0, sizeof(*mqd));
3273                 mutex_lock(&adev->srbm_mutex);
3274                 soc24_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3275                 amdgpu_ring_init_mqd(ring);
3276                 soc24_grbm_select(adev, 0, 0, 0, 0);
3277                 mutex_unlock(&adev->srbm_mutex);
3278
3279                 if (adev->gfx.mec.mqd_backup[mqd_idx])
3280                         memcpy_fromio(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(*mqd));
3281         } else {
3282                 /* restore MQD to a clean status */
3283                 if (adev->gfx.mec.mqd_backup[mqd_idx])
3284                         memcpy_toio(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(*mqd));
3285                 /* reset ring buffer */
3286                 ring->wptr = 0;
3287                 atomic64_set((atomic64_t *)ring->wptr_cpu_addr, 0);
3288                 amdgpu_ring_clear_ring(ring);
3289         }
3290
3291         return 0;
3292 }
3293
3294 static int gfx_v12_0_kiq_resume(struct amdgpu_device *adev)
3295 {
3296         struct amdgpu_ring *ring;
3297         int r;
3298
3299         ring = &adev->gfx.kiq[0].ring;
3300
3301         r = amdgpu_bo_reserve(ring->mqd_obj, false);
3302         if (unlikely(r != 0))
3303                 return r;
3304
3305         r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
3306         if (unlikely(r != 0)) {
3307                 amdgpu_bo_unreserve(ring->mqd_obj);
3308                 return r;
3309         }
3310
3311         gfx_v12_0_kiq_init_queue(ring);
3312         amdgpu_bo_kunmap(ring->mqd_obj);
3313         ring->mqd_ptr = NULL;
3314         amdgpu_bo_unreserve(ring->mqd_obj);
3315         ring->sched.ready = true;
3316         return 0;
3317 }
3318
3319 static int gfx_v12_0_kcq_resume(struct amdgpu_device *adev)
3320 {
3321         struct amdgpu_ring *ring = NULL;
3322         int r = 0, i;
3323
3324         if (!amdgpu_async_gfx_ring)
3325                 gfx_v12_0_cp_compute_enable(adev, true);
3326
3327         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3328                 ring = &adev->gfx.compute_ring[i];
3329
3330                 r = amdgpu_bo_reserve(ring->mqd_obj, false);
3331                 if (unlikely(r != 0))
3332                         goto done;
3333                 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
3334                 if (!r) {
3335                         r = gfx_v12_0_kcq_init_queue(ring, false);
3336                         amdgpu_bo_kunmap(ring->mqd_obj);
3337                         ring->mqd_ptr = NULL;
3338                 }
3339                 amdgpu_bo_unreserve(ring->mqd_obj);
3340                 if (r)
3341                         goto done;
3342         }
3343
3344         r = amdgpu_gfx_enable_kcq(adev, 0);
3345 done:
3346         return r;
3347 }
3348
3349 static int gfx_v12_0_cp_resume(struct amdgpu_device *adev)
3350 {
3351         int r, i;
3352         struct amdgpu_ring *ring;
3353
3354         if (!(adev->flags & AMD_IS_APU))
3355                 gfx_v12_0_enable_gui_idle_interrupt(adev, false);
3356
3357         if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
3358                 /* legacy firmware loading */
3359                 r = gfx_v12_0_cp_gfx_load_microcode(adev);
3360                 if (r)
3361                         return r;
3362
3363                 r = gfx_v12_0_cp_compute_load_microcode_rs64(adev);
3364                 if (r)
3365                         return r;
3366         }
3367
3368         gfx_v12_0_cp_set_doorbell_range(adev);
3369
3370         if (amdgpu_async_gfx_ring) {
3371                 gfx_v12_0_cp_compute_enable(adev, true);
3372                 gfx_v12_0_cp_gfx_enable(adev, true);
3373         }
3374
3375         if (adev->enable_mes_kiq && adev->mes.kiq_hw_init)
3376                 r = amdgpu_mes_kiq_hw_init(adev);
3377         else
3378                 r = gfx_v12_0_kiq_resume(adev);
3379         if (r)
3380                 return r;
3381
3382         r = gfx_v12_0_kcq_resume(adev);
3383         if (r)
3384                 return r;
3385
3386         if (!amdgpu_async_gfx_ring) {
3387                 r = gfx_v12_0_cp_gfx_resume(adev);
3388                 if (r)
3389                         return r;
3390         } else {
3391                 r = gfx_v12_0_cp_async_gfx_ring_resume(adev);
3392                 if (r)
3393                         return r;
3394         }
3395
3396         for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
3397                 ring = &adev->gfx.gfx_ring[i];
3398                 r = amdgpu_ring_test_helper(ring);
3399                 if (r)
3400                         return r;
3401         }
3402
3403         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3404                 ring = &adev->gfx.compute_ring[i];
3405                 r = amdgpu_ring_test_helper(ring);
3406                 if (r)
3407                         return r;
3408         }
3409
3410         return 0;
3411 }
3412
3413 static void gfx_v12_0_cp_enable(struct amdgpu_device *adev, bool enable)
3414 {
3415         gfx_v12_0_cp_gfx_enable(adev, enable);
3416         gfx_v12_0_cp_compute_enable(adev, enable);
3417 }
3418
3419 static int gfx_v12_0_gfxhub_enable(struct amdgpu_device *adev)
3420 {
3421         int r;
3422         bool value;
3423
3424         r = adev->gfxhub.funcs->gart_enable(adev);
3425         if (r)
3426                 return r;
3427
3428         adev->hdp.funcs->flush_hdp(adev, NULL);
3429
3430         value = (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_ALWAYS) ?
3431                 false : true;
3432
3433         adev->gfxhub.funcs->set_fault_enable_default(adev, value);
3434         /* TODO investigate why this and the hdp flush above is needed,
3435          * are we missing a flush somewhere else? */
3436         adev->gmc.gmc_funcs->flush_gpu_tlb(adev, 0, AMDGPU_GFXHUB(0), 0);
3437
3438         return 0;
3439 }
3440
3441 static int get_gb_addr_config(struct amdgpu_device *adev)
3442 {
3443         u32 gb_addr_config;
3444
3445         gb_addr_config = RREG32_SOC15(GC, 0, regGB_ADDR_CONFIG);
3446         if (gb_addr_config == 0)
3447                 return -EINVAL;
3448
3449         adev->gfx.config.gb_addr_config_fields.num_pkrs =
3450                 1 << REG_GET_FIELD(gb_addr_config, GB_ADDR_CONFIG, NUM_PKRS);
3451
3452         adev->gfx.config.gb_addr_config = gb_addr_config;
3453
3454         adev->gfx.config.gb_addr_config_fields.num_pipes = 1 <<
3455                         REG_GET_FIELD(adev->gfx.config.gb_addr_config,
3456                                       GB_ADDR_CONFIG, NUM_PIPES);
3457
3458         adev->gfx.config.max_tile_pipes =
3459                 adev->gfx.config.gb_addr_config_fields.num_pipes;
3460
3461         adev->gfx.config.gb_addr_config_fields.max_compress_frags = 1 <<
3462                         REG_GET_FIELD(adev->gfx.config.gb_addr_config,
3463                                       GB_ADDR_CONFIG, MAX_COMPRESSED_FRAGS);
3464         adev->gfx.config.gb_addr_config_fields.num_rb_per_se = 1 <<
3465                         REG_GET_FIELD(adev->gfx.config.gb_addr_config,
3466                                       GB_ADDR_CONFIG, NUM_RB_PER_SE);
3467         adev->gfx.config.gb_addr_config_fields.num_se = 1 <<
3468                         REG_GET_FIELD(adev->gfx.config.gb_addr_config,
3469                                       GB_ADDR_CONFIG, NUM_SHADER_ENGINES);
3470         adev->gfx.config.gb_addr_config_fields.pipe_interleave_size = 1 << (8 +
3471                         REG_GET_FIELD(adev->gfx.config.gb_addr_config,
3472                                       GB_ADDR_CONFIG, PIPE_INTERLEAVE_SIZE));
3473
3474         return 0;
3475 }
3476
3477 static void gfx_v12_0_disable_gpa_mode(struct amdgpu_device *adev)
3478 {
3479         uint32_t data;
3480
3481         data = RREG32_SOC15(GC, 0, regCPC_PSP_DEBUG);
3482         data |= CPC_PSP_DEBUG__GPA_OVERRIDE_MASK;
3483         WREG32_SOC15(GC, 0, regCPC_PSP_DEBUG, data);
3484
3485         data = RREG32_SOC15(GC, 0, regCPG_PSP_DEBUG);
3486         data |= CPG_PSP_DEBUG__GPA_OVERRIDE_MASK;
3487         WREG32_SOC15(GC, 0, regCPG_PSP_DEBUG, data);
3488 }
3489
3490 static void gfx_v12_0_init_golden_registers(struct amdgpu_device *adev)
3491 {
3492         if (amdgpu_sriov_vf(adev))
3493                 return;
3494
3495         switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
3496         case IP_VERSION(12, 0, 0):
3497         case IP_VERSION(12, 0, 1):
3498                 if (adev->rev_id == 0)
3499                         soc15_program_register_sequence(adev,
3500                                         golden_settings_gc_12_0,
3501                                         (const u32)ARRAY_SIZE(golden_settings_gc_12_0));
3502                 break;
3503         default:
3504                 break;
3505         }
3506 }
3507
3508 static int gfx_v12_0_hw_init(void *handle)
3509 {
3510         int r;
3511         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3512
3513         if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) {
3514                 if (adev->gfx.imu.funcs && (amdgpu_dpm > 0)) {
3515                         /* RLC autoload sequence 1: Program rlc ram */
3516                         if (adev->gfx.imu.funcs->program_rlc_ram)
3517                                 adev->gfx.imu.funcs->program_rlc_ram(adev);
3518                 }
3519                 /* rlc autoload firmware */
3520                 r = gfx_v12_0_rlc_backdoor_autoload_enable(adev);
3521                 if (r)
3522                         return r;
3523         } else {
3524                 if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
3525                         if (adev->gfx.imu.funcs && (amdgpu_dpm > 0)) {
3526                                 if (adev->gfx.imu.funcs->load_microcode)
3527                                         adev->gfx.imu.funcs->load_microcode(adev);
3528                                 if (adev->gfx.imu.funcs->setup_imu)
3529                                         adev->gfx.imu.funcs->setup_imu(adev);
3530                                 if (adev->gfx.imu.funcs->start_imu)
3531                                         adev->gfx.imu.funcs->start_imu(adev);
3532                         }
3533
3534                         /* disable gpa mode in backdoor loading */
3535                         gfx_v12_0_disable_gpa_mode(adev);
3536                 }
3537         }
3538
3539         if ((adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) ||
3540             (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP)) {
3541                 r = gfx_v12_0_wait_for_rlc_autoload_complete(adev);
3542                 if (r) {
3543                         dev_err(adev->dev, "(%d) failed to wait rlc autoload complete\n", r);
3544                         return r;
3545                 }
3546         }
3547
3548         if (!amdgpu_emu_mode)
3549                 gfx_v12_0_init_golden_registers(adev);
3550
3551         adev->gfx.is_poweron = true;
3552
3553         if (get_gb_addr_config(adev))
3554                 DRM_WARN("Invalid gb_addr_config !\n");
3555
3556         if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP)
3557                 gfx_v12_0_config_gfx_rs64(adev);
3558
3559         r = gfx_v12_0_gfxhub_enable(adev);
3560         if (r)
3561                 return r;
3562
3563         if ((adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT ||
3564              adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) &&
3565              (amdgpu_dpm == 1)) {
3566                 /**
3567                  * For gfx 12, rlc firmware loading relies on smu firmware is
3568                  * loaded firstly, so in direct type, it has to load smc ucode
3569                  * here before rlc.
3570                  */
3571                 r = amdgpu_pm_load_smu_firmware(adev, NULL);
3572                 if (r)
3573                         return r;
3574         }
3575
3576         gfx_v12_0_constants_init(adev);
3577
3578         if (adev->nbio.funcs->gc_doorbell_init)
3579                 adev->nbio.funcs->gc_doorbell_init(adev);
3580
3581         r = gfx_v12_0_rlc_resume(adev);
3582         if (r)
3583                 return r;
3584
3585         /*
3586          * init golden registers and rlc resume may override some registers,
3587          * reconfig them here
3588          */
3589         gfx_v12_0_tcp_harvest(adev);
3590
3591         r = gfx_v12_0_cp_resume(adev);
3592         if (r)
3593                 return r;
3594
3595         return r;
3596 }
3597
3598 static int gfx_v12_0_hw_fini(void *handle)
3599 {
3600         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3601         uint32_t tmp;
3602
3603         amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
3604         amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
3605         amdgpu_irq_put(adev, &adev->gfx.bad_op_irq, 0);
3606
3607         if (!adev->no_hw_access) {
3608                 if (amdgpu_async_gfx_ring) {
3609                         if (amdgpu_gfx_disable_kgq(adev, 0))
3610                                 DRM_ERROR("KGQ disable failed\n");
3611                 }
3612
3613                 if (amdgpu_gfx_disable_kcq(adev, 0))
3614                         DRM_ERROR("KCQ disable failed\n");
3615
3616                 amdgpu_mes_kiq_hw_fini(adev);
3617         }
3618
3619         if (amdgpu_sriov_vf(adev)) {
3620                 gfx_v12_0_cp_gfx_enable(adev, false);
3621                 /* Program KIQ position of RLC_CP_SCHEDULERS during destroy */
3622                 tmp = RREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS);
3623                 tmp &= 0xffffff00;
3624                 WREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS, tmp);
3625
3626                 return 0;
3627         }
3628         gfx_v12_0_cp_enable(adev, false);
3629         gfx_v12_0_enable_gui_idle_interrupt(adev, false);
3630
3631         adev->gfxhub.funcs->gart_disable(adev);
3632
3633         adev->gfx.is_poweron = false;
3634
3635         return 0;
3636 }
3637
3638 static int gfx_v12_0_suspend(void *handle)
3639 {
3640         return gfx_v12_0_hw_fini(handle);
3641 }
3642
3643 static int gfx_v12_0_resume(void *handle)
3644 {
3645         return gfx_v12_0_hw_init(handle);
3646 }
3647
3648 static bool gfx_v12_0_is_idle(void *handle)
3649 {
3650         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3651
3652         if (REG_GET_FIELD(RREG32_SOC15(GC, 0, regGRBM_STATUS),
3653                                 GRBM_STATUS, GUI_ACTIVE))
3654                 return false;
3655         else
3656                 return true;
3657 }
3658
3659 static int gfx_v12_0_wait_for_idle(void *handle)
3660 {
3661         unsigned i;
3662         u32 tmp;
3663         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3664
3665         for (i = 0; i < adev->usec_timeout; i++) {
3666                 /* read MC_STATUS */
3667                 tmp = RREG32_SOC15(GC, 0, regGRBM_STATUS) &
3668                         GRBM_STATUS__GUI_ACTIVE_MASK;
3669
3670                 if (!REG_GET_FIELD(tmp, GRBM_STATUS, GUI_ACTIVE))
3671                         return 0;
3672                 udelay(1);
3673         }
3674         return -ETIMEDOUT;
3675 }
3676
3677 static uint64_t gfx_v12_0_get_gpu_clock_counter(struct amdgpu_device *adev)
3678 {
3679         uint64_t clock = 0;
3680
3681         if (adev->smuio.funcs &&
3682             adev->smuio.funcs->get_gpu_clock_counter)
3683                 clock = adev->smuio.funcs->get_gpu_clock_counter(adev);
3684         else
3685                 dev_warn(adev->dev, "query gpu clock counter is not supported\n");
3686
3687         return clock;
3688 }
3689
3690 static int gfx_v12_0_early_init(void *handle)
3691 {
3692         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3693
3694         adev->gfx.funcs = &gfx_v12_0_gfx_funcs;
3695
3696         adev->gfx.num_gfx_rings = GFX12_NUM_GFX_RINGS;
3697         adev->gfx.num_compute_rings = min(amdgpu_gfx_get_num_kcq(adev),
3698                                           AMDGPU_MAX_COMPUTE_RINGS);
3699
3700         gfx_v12_0_set_kiq_pm4_funcs(adev);
3701         gfx_v12_0_set_ring_funcs(adev);
3702         gfx_v12_0_set_irq_funcs(adev);
3703         gfx_v12_0_set_rlc_funcs(adev);
3704         gfx_v12_0_set_mqd_funcs(adev);
3705         gfx_v12_0_set_imu_funcs(adev);
3706
3707         gfx_v12_0_init_rlcg_reg_access_ctrl(adev);
3708
3709         return gfx_v12_0_init_microcode(adev);
3710 }
3711
3712 static int gfx_v12_0_late_init(void *handle)
3713 {
3714         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3715         int r;
3716
3717         r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
3718         if (r)
3719                 return r;
3720
3721         r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
3722         if (r)
3723                 return r;
3724
3725         r = amdgpu_irq_get(adev, &adev->gfx.bad_op_irq, 0);
3726         if (r)
3727                 return r;
3728
3729         return 0;
3730 }
3731
3732 static bool gfx_v12_0_is_rlc_enabled(struct amdgpu_device *adev)
3733 {
3734         uint32_t rlc_cntl;
3735
3736         /* if RLC is not enabled, do nothing */
3737         rlc_cntl = RREG32_SOC15(GC, 0, regRLC_CNTL);
3738         return (REG_GET_FIELD(rlc_cntl, RLC_CNTL, RLC_ENABLE_F32)) ? true : false;
3739 }
3740
3741 static void gfx_v12_0_set_safe_mode(struct amdgpu_device *adev,
3742                                     int xcc_id)
3743 {
3744         uint32_t data;
3745         unsigned i;
3746
3747         data = RLC_SAFE_MODE__CMD_MASK;
3748         data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
3749
3750         WREG32_SOC15(GC, 0, regRLC_SAFE_MODE, data);
3751
3752         /* wait for RLC_SAFE_MODE */
3753         for (i = 0; i < adev->usec_timeout; i++) {
3754                 if (!REG_GET_FIELD(RREG32_SOC15(GC, 0, regRLC_SAFE_MODE),
3755                                    RLC_SAFE_MODE, CMD))
3756                         break;
3757                 udelay(1);
3758         }
3759 }
3760
3761 static void gfx_v12_0_unset_safe_mode(struct amdgpu_device *adev,
3762                                       int xcc_id)
3763 {
3764         WREG32_SOC15(GC, 0, regRLC_SAFE_MODE, RLC_SAFE_MODE__CMD_MASK);
3765 }
3766
3767 static void gfx_v12_0_update_perf_clk(struct amdgpu_device *adev,
3768                                       bool enable)
3769 {
3770         uint32_t def, data;
3771
3772         if (!(adev->cg_flags & AMD_CG_SUPPORT_GFX_PERF_CLK))
3773                 return;
3774
3775         def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE);
3776
3777         if (enable)
3778                 data &= ~RLC_CGTT_MGCG_OVERRIDE__PERFMON_CLOCK_STATE_MASK;
3779         else
3780                 data |= RLC_CGTT_MGCG_OVERRIDE__PERFMON_CLOCK_STATE_MASK;
3781
3782         if (def != data)
3783                 WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data);
3784 }
3785
3786 static void gfx_v12_0_update_spm_vmid(struct amdgpu_device *adev,
3787                                       struct amdgpu_ring *ring,
3788                                       unsigned vmid)
3789 {
3790         u32 reg, data;
3791
3792         reg = SOC15_REG_OFFSET(GC, 0, regRLC_SPM_MC_CNTL);
3793         if (amdgpu_sriov_is_pp_one_vf(adev))
3794                 data = RREG32_NO_KIQ(reg);
3795         else
3796                 data = RREG32(reg);
3797
3798         data &= ~RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK;
3799         data |= (vmid & RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK) << RLC_SPM_MC_CNTL__RLC_SPM_VMID__SHIFT;
3800
3801         if (amdgpu_sriov_is_pp_one_vf(adev))
3802                 WREG32_SOC15_NO_KIQ(GC, 0, regRLC_SPM_MC_CNTL, data);
3803         else
3804                 WREG32_SOC15(GC, 0, regRLC_SPM_MC_CNTL, data);
3805
3806         if (ring
3807             && amdgpu_sriov_is_pp_one_vf(adev)
3808             && ((ring->funcs->type == AMDGPU_RING_TYPE_GFX)
3809                 || (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE))) {
3810                 uint32_t reg = SOC15_REG_OFFSET(GC, 0, regRLC_SPM_MC_CNTL);
3811                 amdgpu_ring_emit_wreg(ring, reg, data);
3812         }
3813 }
3814
3815 static const struct amdgpu_rlc_funcs gfx_v12_0_rlc_funcs = {
3816         .is_rlc_enabled = gfx_v12_0_is_rlc_enabled,
3817         .set_safe_mode = gfx_v12_0_set_safe_mode,
3818         .unset_safe_mode = gfx_v12_0_unset_safe_mode,
3819         .init = gfx_v12_0_rlc_init,
3820         .get_csb_size = gfx_v12_0_get_csb_size,
3821         .get_csb_buffer = gfx_v12_0_get_csb_buffer,
3822         .resume = gfx_v12_0_rlc_resume,
3823         .stop = gfx_v12_0_rlc_stop,
3824         .reset = gfx_v12_0_rlc_reset,
3825         .start = gfx_v12_0_rlc_start,
3826         .update_spm_vmid = gfx_v12_0_update_spm_vmid,
3827 };
3828
3829 #if 0
3830 static void gfx_v12_cntl_power_gating(struct amdgpu_device *adev, bool enable)
3831 {
3832         /* TODO */
3833 }
3834
3835 static void gfx_v12_cntl_pg(struct amdgpu_device *adev, bool enable)
3836 {
3837         /* TODO */
3838 }
3839 #endif
3840
3841 static int gfx_v12_0_set_powergating_state(void *handle,
3842                                            enum amd_powergating_state state)
3843 {
3844         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3845         bool enable = (state == AMD_PG_STATE_GATE);
3846
3847         if (amdgpu_sriov_vf(adev))
3848                 return 0;
3849
3850         switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
3851         case IP_VERSION(12, 0, 0):
3852         case IP_VERSION(12, 0, 1):
3853                 amdgpu_gfx_off_ctrl(adev, enable);
3854                 break;
3855         default:
3856                 break;
3857         }
3858
3859         return 0;
3860 }
3861
3862 static void gfx_v12_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
3863                                                        bool enable)
3864 {
3865         uint32_t def, data;
3866
3867         if (!(adev->cg_flags &
3868               (AMD_CG_SUPPORT_GFX_CGCG |
3869               AMD_CG_SUPPORT_GFX_CGLS |
3870               AMD_CG_SUPPORT_GFX_3D_CGCG |
3871               AMD_CG_SUPPORT_GFX_3D_CGLS)))
3872                 return;
3873
3874         if (enable) {
3875                 def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE);
3876
3877                 /* unset CGCG override */
3878                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)
3879                         data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGCG_OVERRIDE_MASK;
3880                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
3881                         data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
3882                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG ||
3883                     adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS)
3884                         data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_GFX3D_CG_OVERRIDE_MASK;
3885
3886                 /* update CGCG override bits */
3887                 if (def != data)
3888                         WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data);
3889
3890                 /* enable cgcg FSM(0x0000363F) */
3891                 def = data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL);
3892
3893                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
3894                         data &= ~RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD_MASK;
3895                         data |= (0x36 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
3896                                  RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
3897                 }
3898
3899                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
3900                         data &= ~RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY_MASK;
3901                         data |= (0x000F << RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
3902                                  RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
3903                 }
3904
3905                 if (def != data)
3906                         WREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL, data);
3907
3908                 /* Program RLC_CGCG_CGLS_CTRL_3D */
3909                 def = data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL_3D);
3910
3911                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG) {
3912                         data &= ~RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD_MASK;
3913                         data |= (0x36 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
3914                                  RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK;
3915                 }
3916
3917                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) {
3918                         data &= ~RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY_MASK;
3919                         data |= (0xf << RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
3920                                  RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK;
3921                 }
3922
3923                 if (def != data)
3924                         WREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL_3D, data);
3925
3926                 /* set IDLE_POLL_COUNT(0x00900100) */
3927                 def = data = RREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_CNTL);
3928
3929                 data &= ~(CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY_MASK | CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT_MASK);
3930                 data |= (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
3931                         (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
3932
3933                 if (def != data)
3934                         WREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_CNTL, data);
3935
3936                 data = RREG32_SOC15(GC, 0, regCP_INT_CNTL);
3937                 data = REG_SET_FIELD(data, CP_INT_CNTL, CNTX_BUSY_INT_ENABLE, 1);
3938                 data = REG_SET_FIELD(data, CP_INT_CNTL, CNTX_EMPTY_INT_ENABLE, 1);
3939                 data = REG_SET_FIELD(data, CP_INT_CNTL, CMP_BUSY_INT_ENABLE, 1);
3940                 data = REG_SET_FIELD(data, CP_INT_CNTL, GFX_IDLE_INT_ENABLE, 1);
3941                 WREG32_SOC15(GC, 0, regCP_INT_CNTL, data);
3942
3943                 data = RREG32_SOC15(GC, 0, regSDMA0_RLC_CGCG_CTRL);
3944                 data = REG_SET_FIELD(data, SDMA0_RLC_CGCG_CTRL, CGCG_INT_ENABLE, 1);
3945                 WREG32_SOC15(GC, 0, regSDMA0_RLC_CGCG_CTRL, data);
3946
3947                 /* Some ASICs only have one SDMA instance, not need to configure SDMA1 */
3948                 if (adev->sdma.num_instances > 1) {
3949                         data = RREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL);
3950                         data = REG_SET_FIELD(data, SDMA1_RLC_CGCG_CTRL, CGCG_INT_ENABLE, 1);
3951                         WREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL, data);
3952                 }
3953         } else {
3954                 /* Program RLC_CGCG_CGLS_CTRL */
3955                 def = data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL);
3956
3957                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)
3958                         data &= ~RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
3959
3960                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
3961                         data &= ~RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
3962
3963                 if (def != data)
3964                         WREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL, data);
3965
3966                 /* Program RLC_CGCG_CGLS_CTRL_3D */
3967                 def = data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL_3D);
3968
3969                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG)
3970                         data &= ~RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK;
3971                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS)
3972                         data &= ~RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK;
3973
3974                 if (def != data)
3975                         WREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL_3D, data);
3976
3977                 data = RREG32_SOC15(GC, 0, regSDMA0_RLC_CGCG_CTRL);
3978                 data &= ~SDMA0_RLC_CGCG_CTRL__CGCG_INT_ENABLE_MASK;
3979                 WREG32_SOC15(GC, 0, regSDMA0_RLC_CGCG_CTRL, data);
3980
3981                 /* Some ASICs only have one SDMA instance, not need to configure SDMA1 */
3982                 if (adev->sdma.num_instances > 1) {
3983                         data = RREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL);
3984                         data &= ~SDMA1_RLC_CGCG_CTRL__CGCG_INT_ENABLE_MASK;
3985                         WREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL, data);
3986                 }
3987         }
3988 }
3989
3990 static void gfx_v12_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
3991                                                        bool enable)
3992 {
3993         uint32_t data, def;
3994         if (!(adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)))
3995                 return;
3996
3997         /* It is disabled by HW by default */
3998         if (enable) {
3999                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
4000                         /* 1 - RLC_CGTT_MGCG_OVERRIDE */
4001                         def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE);
4002
4003                         data &= ~(RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
4004                                   RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK |
4005                                   RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK);
4006
4007                         if (def != data)
4008                                 WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data);
4009                 }
4010         } else {
4011                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
4012                         def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE);
4013
4014                         data |= (RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK |
4015                                  RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
4016                                  RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK);
4017
4018                         if (def != data)
4019                                 WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data);
4020                 }
4021         }
4022 }
4023
4024 static void gfx_v12_0_update_repeater_fgcg(struct amdgpu_device *adev,
4025                                            bool enable)
4026 {
4027         uint32_t def, data;
4028
4029         if (!(adev->cg_flags & AMD_CG_SUPPORT_REPEATER_FGCG))
4030                 return;
4031
4032         def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE);
4033
4034         if (enable)
4035                 data &= ~(RLC_CGTT_MGCG_OVERRIDE__GFXIP_REPEATER_FGCG_OVERRIDE_MASK |
4036                                   RLC_CGTT_MGCG_OVERRIDE__RLC_REPEATER_FGCG_OVERRIDE_MASK);
4037         else
4038                 data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_REPEATER_FGCG_OVERRIDE_MASK |
4039                                 RLC_CGTT_MGCG_OVERRIDE__RLC_REPEATER_FGCG_OVERRIDE_MASK;
4040
4041         if (def != data)
4042                 WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data);
4043 }
4044
4045 static void gfx_v12_0_update_sram_fgcg(struct amdgpu_device *adev,
4046                                        bool enable)
4047 {
4048         uint32_t def, data;
4049
4050         if (!(adev->cg_flags & AMD_CG_SUPPORT_GFX_FGCG))
4051                 return;
4052
4053         def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE);
4054
4055         if (enable)
4056                 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_FGCG_OVERRIDE_MASK;
4057         else
4058                 data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_FGCG_OVERRIDE_MASK;
4059
4060         if (def != data)
4061                 WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data);
4062 }
4063
4064 static int gfx_v12_0_update_gfx_clock_gating(struct amdgpu_device *adev,
4065                                             bool enable)
4066 {
4067         amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
4068
4069         gfx_v12_0_update_coarse_grain_clock_gating(adev, enable);
4070
4071         gfx_v12_0_update_medium_grain_clock_gating(adev, enable);
4072
4073         gfx_v12_0_update_repeater_fgcg(adev, enable);
4074
4075         gfx_v12_0_update_sram_fgcg(adev, enable);
4076
4077         gfx_v12_0_update_perf_clk(adev, enable);
4078
4079         if (adev->cg_flags &
4080             (AMD_CG_SUPPORT_GFX_MGCG |
4081              AMD_CG_SUPPORT_GFX_CGLS |
4082              AMD_CG_SUPPORT_GFX_CGCG |
4083              AMD_CG_SUPPORT_GFX_3D_CGCG |
4084              AMD_CG_SUPPORT_GFX_3D_CGLS))
4085                 gfx_v12_0_enable_gui_idle_interrupt(adev, enable);
4086
4087         amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
4088
4089         return 0;
4090 }
4091
4092 static int gfx_v12_0_set_clockgating_state(void *handle,
4093                                            enum amd_clockgating_state state)
4094 {
4095         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4096
4097         if (amdgpu_sriov_vf(adev))
4098                 return 0;
4099
4100         switch (adev->ip_versions[GC_HWIP][0]) {
4101         case IP_VERSION(12, 0, 0):
4102         case IP_VERSION(12, 0, 1):
4103                 gfx_v12_0_update_gfx_clock_gating(adev,
4104                                                   state == AMD_CG_STATE_GATE);
4105                 break;
4106         default:
4107                 break;
4108         }
4109
4110         return 0;
4111 }
4112
4113 static void gfx_v12_0_get_clockgating_state(void *handle, u64 *flags)
4114 {
4115         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4116         int data;
4117
4118         /* AMD_CG_SUPPORT_GFX_MGCG */
4119         data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE);
4120         if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK))
4121                 *flags |= AMD_CG_SUPPORT_GFX_MGCG;
4122
4123         /* AMD_CG_SUPPORT_REPEATER_FGCG */
4124         if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_REPEATER_FGCG_OVERRIDE_MASK))
4125                 *flags |= AMD_CG_SUPPORT_REPEATER_FGCG;
4126
4127         /* AMD_CG_SUPPORT_GFX_FGCG */
4128         if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_FGCG_OVERRIDE_MASK))
4129                 *flags |= AMD_CG_SUPPORT_GFX_FGCG;
4130
4131         /* AMD_CG_SUPPORT_GFX_PERF_CLK */
4132         if (!(data & RLC_CGTT_MGCG_OVERRIDE__PERFMON_CLOCK_STATE_MASK))
4133                 *flags |= AMD_CG_SUPPORT_GFX_PERF_CLK;
4134
4135         /* AMD_CG_SUPPORT_GFX_CGCG */
4136         data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL);
4137         if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
4138                 *flags |= AMD_CG_SUPPORT_GFX_CGCG;
4139
4140         /* AMD_CG_SUPPORT_GFX_CGLS */
4141         if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
4142                 *flags |= AMD_CG_SUPPORT_GFX_CGLS;
4143
4144         /* AMD_CG_SUPPORT_GFX_3D_CGCG */
4145         data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL_3D);
4146         if (data & RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK)
4147                 *flags |= AMD_CG_SUPPORT_GFX_3D_CGCG;
4148
4149         /* AMD_CG_SUPPORT_GFX_3D_CGLS */
4150         if (data & RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK)
4151                 *flags |= AMD_CG_SUPPORT_GFX_3D_CGLS;
4152 }
4153
4154 static u64 gfx_v12_0_ring_get_rptr_gfx(struct amdgpu_ring *ring)
4155 {
4156         /* gfx12 is 32bit rptr*/
4157         return *(uint32_t *)ring->rptr_cpu_addr;
4158 }
4159
4160 static u64 gfx_v12_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
4161 {
4162         struct amdgpu_device *adev = ring->adev;
4163         u64 wptr;
4164
4165         /* XXX check if swapping is necessary on BE */
4166         if (ring->use_doorbell) {
4167                 wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr);
4168         } else {
4169                 wptr = RREG32_SOC15(GC, 0, regCP_RB0_WPTR);
4170                 wptr += (u64)RREG32_SOC15(GC, 0, regCP_RB0_WPTR_HI) << 32;
4171         }
4172
4173         return wptr;
4174 }
4175
4176 static void gfx_v12_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
4177 {
4178         struct amdgpu_device *adev = ring->adev;
4179         uint32_t *wptr_saved;
4180         uint32_t *is_queue_unmap;
4181         uint64_t aggregated_db_index;
4182         uint32_t mqd_size = adev->mqds[AMDGPU_HW_IP_GFX].mqd_size;
4183         uint64_t wptr_tmp;
4184
4185         if (ring->is_mes_queue) {
4186                 wptr_saved = (uint32_t *)(ring->mqd_ptr + mqd_size);
4187                 is_queue_unmap = (uint32_t *)(ring->mqd_ptr + mqd_size +
4188                                               sizeof(uint32_t));
4189                 aggregated_db_index =
4190                         amdgpu_mes_get_aggregated_doorbell_index(adev,
4191                                                                  ring->hw_prio);
4192
4193                 wptr_tmp = ring->wptr & ring->buf_mask;
4194                 atomic64_set((atomic64_t *)ring->wptr_cpu_addr, wptr_tmp);
4195                 *wptr_saved = wptr_tmp;
4196                 /* assume doorbell always being used by mes mapped queue */
4197                 if (*is_queue_unmap) {
4198                         WDOORBELL64(aggregated_db_index, wptr_tmp);
4199                         WDOORBELL64(ring->doorbell_index, wptr_tmp);
4200                 } else {
4201                         WDOORBELL64(ring->doorbell_index, wptr_tmp);
4202
4203                         if (*is_queue_unmap)
4204                                 WDOORBELL64(aggregated_db_index, wptr_tmp);
4205                 }
4206         } else {
4207                 if (ring->use_doorbell) {
4208                         /* XXX check if swapping is necessary on BE */
4209                         atomic64_set((atomic64_t *)ring->wptr_cpu_addr,
4210                                      ring->wptr);
4211                         WDOORBELL64(ring->doorbell_index, ring->wptr);
4212                 } else {
4213                         WREG32_SOC15(GC, 0, regCP_RB0_WPTR,
4214                                      lower_32_bits(ring->wptr));
4215                         WREG32_SOC15(GC, 0, regCP_RB0_WPTR_HI,
4216                                      upper_32_bits(ring->wptr));
4217                 }
4218         }
4219 }
4220
4221 static u64 gfx_v12_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
4222 {
4223         /* gfx12 hardware is 32bit rptr */
4224         return *(uint32_t *)ring->rptr_cpu_addr;
4225 }
4226
4227 static u64 gfx_v12_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
4228 {
4229         u64 wptr;
4230
4231         /* XXX check if swapping is necessary on BE */
4232         if (ring->use_doorbell)
4233                 wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr);
4234         else
4235                 BUG();
4236         return wptr;
4237 }
4238
4239 static void gfx_v12_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
4240 {
4241         struct amdgpu_device *adev = ring->adev;
4242         uint32_t *wptr_saved;
4243         uint32_t *is_queue_unmap;
4244         uint64_t aggregated_db_index;
4245         uint32_t mqd_size = adev->mqds[AMDGPU_HW_IP_COMPUTE].mqd_size;
4246         uint64_t wptr_tmp;
4247
4248         if (ring->is_mes_queue) {
4249                 wptr_saved = (uint32_t *)(ring->mqd_ptr + mqd_size);
4250                 is_queue_unmap = (uint32_t *)(ring->mqd_ptr + mqd_size +
4251                                               sizeof(uint32_t));
4252                 aggregated_db_index =
4253                         amdgpu_mes_get_aggregated_doorbell_index(adev,
4254                                                                  ring->hw_prio);
4255
4256                 wptr_tmp = ring->wptr & ring->buf_mask;
4257                 atomic64_set((atomic64_t *)ring->wptr_cpu_addr, wptr_tmp);
4258                 *wptr_saved = wptr_tmp;
4259                 /* assume doorbell always used by mes mapped queue */
4260                 if (*is_queue_unmap) {
4261                         WDOORBELL64(aggregated_db_index, wptr_tmp);
4262                         WDOORBELL64(ring->doorbell_index, wptr_tmp);
4263                 } else {
4264                         WDOORBELL64(ring->doorbell_index, wptr_tmp);
4265
4266                         if (*is_queue_unmap)
4267                                 WDOORBELL64(aggregated_db_index, wptr_tmp);
4268                 }
4269         } else {
4270                 /* XXX check if swapping is necessary on BE */
4271                 if (ring->use_doorbell) {
4272                         atomic64_set((atomic64_t *)ring->wptr_cpu_addr,
4273                                      ring->wptr);
4274                         WDOORBELL64(ring->doorbell_index, ring->wptr);
4275                 } else {
4276                         BUG(); /* only DOORBELL method supported on gfx12 now */
4277                 }
4278         }
4279 }
4280
4281 static void gfx_v12_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
4282 {
4283         struct amdgpu_device *adev = ring->adev;
4284         u32 ref_and_mask, reg_mem_engine;
4285         const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio.hdp_flush_reg;
4286
4287         if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
4288                 switch (ring->me) {
4289                 case 1:
4290                         ref_and_mask = nbio_hf_reg->ref_and_mask_cp2 << ring->pipe;
4291                         break;
4292                 case 2:
4293                         ref_and_mask = nbio_hf_reg->ref_and_mask_cp6 << ring->pipe;
4294                         break;
4295                 default:
4296                         return;
4297                 }
4298                 reg_mem_engine = 0;
4299         } else {
4300                 ref_and_mask = nbio_hf_reg->ref_and_mask_cp0;
4301                 reg_mem_engine = 1; /* pfp */
4302         }
4303
4304         gfx_v12_0_wait_reg_mem(ring, reg_mem_engine, 0, 1,
4305                                adev->nbio.funcs->get_hdp_flush_req_offset(adev),
4306                                adev->nbio.funcs->get_hdp_flush_done_offset(adev),
4307                                ref_and_mask, ref_and_mask, 0x20);
4308 }
4309
4310 static void gfx_v12_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
4311                                        struct amdgpu_job *job,
4312                                        struct amdgpu_ib *ib,
4313                                        uint32_t flags)
4314 {
4315         unsigned vmid = AMDGPU_JOB_GET_VMID(job);
4316         u32 header, control = 0;
4317
4318         BUG_ON(ib->flags & AMDGPU_IB_FLAG_CE);
4319
4320         header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
4321
4322         control |= ib->length_dw | (vmid << 24);
4323
4324         if (ring->is_mes_queue)
4325                 /* inherit vmid from mqd */
4326                 control |= 0x400000;
4327
4328         amdgpu_ring_write(ring, header);
4329         BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
4330         amdgpu_ring_write(ring,
4331 #ifdef __BIG_ENDIAN
4332                 (2 << 0) |
4333 #endif
4334                 lower_32_bits(ib->gpu_addr));
4335         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
4336         amdgpu_ring_write(ring, control);
4337 }
4338
4339 static void gfx_v12_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
4340                                            struct amdgpu_job *job,
4341                                            struct amdgpu_ib *ib,
4342                                            uint32_t flags)
4343 {
4344         unsigned vmid = AMDGPU_JOB_GET_VMID(job);
4345         u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
4346
4347         if (ring->is_mes_queue)
4348                 /* inherit vmid from mqd */
4349                 control |= 0x40000000;
4350
4351         amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
4352         BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
4353         amdgpu_ring_write(ring,
4354 #ifdef __BIG_ENDIAN
4355                                 (2 << 0) |
4356 #endif
4357                                 lower_32_bits(ib->gpu_addr));
4358         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
4359         amdgpu_ring_write(ring, control);
4360 }
4361
4362 static void gfx_v12_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
4363                                      u64 seq, unsigned flags)
4364 {
4365         bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
4366         bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
4367
4368         /* RELEASE_MEM - flush caches, send int */
4369         amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6));
4370         amdgpu_ring_write(ring, (PACKET3_RELEASE_MEM_GCR_SEQ |
4371                                  PACKET3_RELEASE_MEM_GCR_GL2_WB |
4372                                  PACKET3_RELEASE_MEM_CACHE_POLICY(3) |
4373                                  PACKET3_RELEASE_MEM_EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
4374                                  PACKET3_RELEASE_MEM_EVENT_INDEX(5)));
4375         amdgpu_ring_write(ring, (PACKET3_RELEASE_MEM_DATA_SEL(write64bit ? 2 : 1) |
4376                                  PACKET3_RELEASE_MEM_INT_SEL(int_sel ? 2 : 0)));
4377
4378         /*
4379          * the address should be Qword aligned if 64bit write, Dword
4380          * aligned if only send 32bit data low (discard data high)
4381          */
4382         if (write64bit)
4383                 BUG_ON(addr & 0x7);
4384         else
4385                 BUG_ON(addr & 0x3);
4386         amdgpu_ring_write(ring, lower_32_bits(addr));
4387         amdgpu_ring_write(ring, upper_32_bits(addr));
4388         amdgpu_ring_write(ring, lower_32_bits(seq));
4389         amdgpu_ring_write(ring, upper_32_bits(seq));
4390         amdgpu_ring_write(ring, ring->is_mes_queue ?
4391                          (ring->hw_queue_id | AMDGPU_FENCE_MES_QUEUE_FLAG) : 0);
4392 }
4393
4394 static void gfx_v12_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
4395 {
4396         int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
4397         uint32_t seq = ring->fence_drv.sync_seq;
4398         uint64_t addr = ring->fence_drv.gpu_addr;
4399
4400         gfx_v12_0_wait_reg_mem(ring, usepfp, 1, 0, lower_32_bits(addr),
4401                                upper_32_bits(addr), seq, 0xffffffff, 4);
4402 }
4403
4404 static void gfx_v12_0_ring_invalidate_tlbs(struct amdgpu_ring *ring,
4405                                    uint16_t pasid, uint32_t flush_type,
4406                                    bool all_hub, uint8_t dst_sel)
4407 {
4408         amdgpu_ring_write(ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0));
4409         amdgpu_ring_write(ring,
4410                           PACKET3_INVALIDATE_TLBS_DST_SEL(dst_sel) |
4411                           PACKET3_INVALIDATE_TLBS_ALL_HUB(all_hub) |
4412                           PACKET3_INVALIDATE_TLBS_PASID(pasid) |
4413                           PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type));
4414 }
4415
4416 static void gfx_v12_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
4417                                          unsigned vmid, uint64_t pd_addr)
4418 {
4419         if (ring->is_mes_queue)
4420                 gfx_v12_0_ring_invalidate_tlbs(ring, 0, 0, false, 0);
4421         else
4422                 amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
4423
4424         /* compute doesn't have PFP */
4425         if (ring->funcs->type == AMDGPU_RING_TYPE_GFX) {
4426                 /* sync PFP to ME, otherwise we might get invalid PFP reads */
4427                 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
4428                 amdgpu_ring_write(ring, 0x0);
4429         }
4430 }
4431
4432 static void gfx_v12_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
4433                                           u64 seq, unsigned int flags)
4434 {
4435         struct amdgpu_device *adev = ring->adev;
4436
4437         /* we only allocate 32bit for each seq wb address */
4438         BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
4439
4440         /* write fence seq to the "addr" */
4441         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4442         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4443                                  WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
4444         amdgpu_ring_write(ring, lower_32_bits(addr));
4445         amdgpu_ring_write(ring, upper_32_bits(addr));
4446         amdgpu_ring_write(ring, lower_32_bits(seq));
4447
4448         if (flags & AMDGPU_FENCE_FLAG_INT) {
4449                 /* set register to trigger INT */
4450                 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4451                 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4452                                          WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
4453                 amdgpu_ring_write(ring, SOC15_REG_OFFSET(GC, 0, regCPC_INT_STATUS));
4454                 amdgpu_ring_write(ring, 0);
4455                 amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
4456         }
4457 }
4458
4459 static void gfx_v12_0_ring_emit_cntxcntl(struct amdgpu_ring *ring,
4460                                          uint32_t flags)
4461 {
4462         uint32_t dw2 = 0;
4463
4464         dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
4465         if (flags & AMDGPU_HAVE_CTX_SWITCH) {
4466                 /* set load_global_config & load_global_uconfig */
4467                 dw2 |= 0x8001;
4468                 /* set load_cs_sh_regs */
4469                 dw2 |= 0x01000000;
4470                 /* set load_per_context_state & load_gfx_sh_regs for GFX */
4471                 dw2 |= 0x10002;
4472         }
4473
4474         amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4475         amdgpu_ring_write(ring, dw2);
4476         amdgpu_ring_write(ring, 0);
4477 }
4478
4479 static unsigned gfx_v12_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring,
4480                                                    uint64_t addr)
4481 {
4482         unsigned ret;
4483
4484         amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
4485         amdgpu_ring_write(ring, lower_32_bits(addr));
4486         amdgpu_ring_write(ring, upper_32_bits(addr));
4487         /* discard following DWs if *cond_exec_gpu_addr==0 */
4488         amdgpu_ring_write(ring, 0);
4489         ret = ring->wptr & ring->buf_mask;
4490         /* patch dummy value later */
4491         amdgpu_ring_write(ring, 0);
4492
4493         return ret;
4494 }
4495
4496 static int gfx_v12_0_ring_preempt_ib(struct amdgpu_ring *ring)
4497 {
4498         int i, r = 0;
4499         struct amdgpu_device *adev = ring->adev;
4500         struct amdgpu_kiq *kiq = &adev->gfx.kiq[0];
4501         struct amdgpu_ring *kiq_ring = &kiq->ring;
4502         unsigned long flags;
4503
4504         if (adev->enable_mes)
4505                 return -EINVAL;
4506
4507         if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues)
4508                 return -EINVAL;
4509
4510         spin_lock_irqsave(&kiq->ring_lock, flags);
4511
4512         if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size)) {
4513                 spin_unlock_irqrestore(&kiq->ring_lock, flags);
4514                 return -ENOMEM;
4515         }
4516
4517         /* assert preemption condition */
4518         amdgpu_ring_set_preempt_cond_exec(ring, false);
4519
4520         /* assert IB preemption, emit the trailing fence */
4521         kiq->pmf->kiq_unmap_queues(kiq_ring, ring, PREEMPT_QUEUES_NO_UNMAP,
4522                                    ring->trail_fence_gpu_addr,
4523                                    ++ring->trail_seq);
4524         amdgpu_ring_commit(kiq_ring);
4525
4526         spin_unlock_irqrestore(&kiq->ring_lock, flags);
4527
4528         /* poll the trailing fence */
4529         for (i = 0; i < adev->usec_timeout; i++) {
4530                 if (ring->trail_seq ==
4531                     le32_to_cpu(*(ring->trail_fence_cpu_addr)))
4532                         break;
4533                 udelay(1);
4534         }
4535
4536         if (i >= adev->usec_timeout) {
4537                 r = -EINVAL;
4538                 DRM_ERROR("ring %d failed to preempt ib\n", ring->idx);
4539         }
4540
4541         /* deassert preemption condition */
4542         amdgpu_ring_set_preempt_cond_exec(ring, true);
4543         return r;
4544 }
4545
4546 static void gfx_v12_0_ring_emit_frame_cntl(struct amdgpu_ring *ring,
4547                                            bool start,
4548                                            bool secure)
4549 {
4550         uint32_t v = secure ? FRAME_TMZ : 0;
4551
4552         amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0));
4553         amdgpu_ring_write(ring, v | FRAME_CMD(start ? 0 : 1));
4554 }
4555
4556 static void gfx_v12_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg,
4557                                      uint32_t reg_val_offs)
4558 {
4559         struct amdgpu_device *adev = ring->adev;
4560
4561         amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
4562         amdgpu_ring_write(ring, 0 |     /* src: register*/
4563                                 (5 << 8) |      /* dst: memory */
4564                                 (1 << 20));     /* write confirm */
4565         amdgpu_ring_write(ring, reg);
4566         amdgpu_ring_write(ring, 0);
4567         amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
4568                                 reg_val_offs * 4));
4569         amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
4570                                 reg_val_offs * 4));
4571 }
4572
4573 static void gfx_v12_0_ring_emit_wreg(struct amdgpu_ring *ring,
4574                                      uint32_t reg,
4575                                      uint32_t val)
4576 {
4577         uint32_t cmd = 0;
4578
4579         switch (ring->funcs->type) {
4580         case AMDGPU_RING_TYPE_GFX:
4581                 cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM;
4582                 break;
4583         case AMDGPU_RING_TYPE_KIQ:
4584                 cmd = (1 << 16); /* no inc addr */
4585                 break;
4586         default:
4587                 cmd = WR_CONFIRM;
4588                 break;
4589         }
4590         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4591         amdgpu_ring_write(ring, cmd);
4592         amdgpu_ring_write(ring, reg);
4593         amdgpu_ring_write(ring, 0);
4594         amdgpu_ring_write(ring, val);
4595 }
4596
4597 static void gfx_v12_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
4598                                         uint32_t val, uint32_t mask)
4599 {
4600         gfx_v12_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20);
4601 }
4602
4603 static void gfx_v12_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
4604                                                    uint32_t reg0, uint32_t reg1,
4605                                                    uint32_t ref, uint32_t mask)
4606 {
4607         int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
4608
4609         gfx_v12_0_wait_reg_mem(ring, usepfp, 0, 1, reg0, reg1,
4610                                ref, mask, 0x20);
4611 }
4612
4613 static void gfx_v12_0_ring_soft_recovery(struct amdgpu_ring *ring,
4614                                          unsigned vmid)
4615 {
4616         struct amdgpu_device *adev = ring->adev;
4617         uint32_t value = 0;
4618
4619         value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03);
4620         value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01);
4621         value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1);
4622         value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid);
4623         amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
4624         WREG32_SOC15(GC, 0, regSQ_CMD, value);
4625         amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
4626 }
4627
4628 static void
4629 gfx_v12_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
4630                                       uint32_t me, uint32_t pipe,
4631                                       enum amdgpu_interrupt_state state)
4632 {
4633         uint32_t cp_int_cntl, cp_int_cntl_reg;
4634
4635         if (!me) {
4636                 switch (pipe) {
4637                 case 0:
4638                         cp_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_INT_CNTL_RING0);
4639                         break;
4640                 default:
4641                         DRM_DEBUG("invalid pipe %d\n", pipe);
4642                         return;
4643                 }
4644         } else {
4645                 DRM_DEBUG("invalid me %d\n", me);
4646                 return;
4647         }
4648
4649         switch (state) {
4650         case AMDGPU_IRQ_STATE_DISABLE:
4651                 cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg);
4652                 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
4653                                             TIME_STAMP_INT_ENABLE, 0);
4654                 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
4655                                             GENERIC0_INT_ENABLE, 0);
4656                 WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl);
4657                 break;
4658         case AMDGPU_IRQ_STATE_ENABLE:
4659                 cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg);
4660                 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
4661                                             TIME_STAMP_INT_ENABLE, 1);
4662                 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
4663                                             GENERIC0_INT_ENABLE, 1);
4664                 WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl);
4665                 break;
4666         default:
4667                 break;
4668         }
4669 }
4670
4671 static void gfx_v12_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
4672                                                      int me, int pipe,
4673                                                      enum amdgpu_interrupt_state state)
4674 {
4675         u32 mec_int_cntl, mec_int_cntl_reg;
4676
4677         /*
4678          * amdgpu controls only the first MEC. That's why this function only
4679          * handles the setting of interrupts for this specific MEC. All other
4680          * pipes' interrupts are set by amdkfd.
4681          */
4682
4683         if (me == 1) {
4684                 switch (pipe) {
4685                 case 0:
4686                         mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE0_INT_CNTL);
4687                         break;
4688                 case 1:
4689                         mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE1_INT_CNTL);
4690                         break;
4691                 default:
4692                         DRM_DEBUG("invalid pipe %d\n", pipe);
4693                         return;
4694                 }
4695         } else {
4696                 DRM_DEBUG("invalid me %d\n", me);
4697                 return;
4698         }
4699
4700         switch (state) {
4701         case AMDGPU_IRQ_STATE_DISABLE:
4702                 mec_int_cntl = RREG32_SOC15_IP(GC, mec_int_cntl_reg);
4703                 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
4704                                              TIME_STAMP_INT_ENABLE, 0);
4705                 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
4706                                              GENERIC0_INT_ENABLE, 0);
4707                 WREG32_SOC15_IP(GC, mec_int_cntl_reg, mec_int_cntl);
4708                 break;
4709         case AMDGPU_IRQ_STATE_ENABLE:
4710                 mec_int_cntl = RREG32_SOC15_IP(GC, mec_int_cntl_reg);
4711                 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
4712                                              TIME_STAMP_INT_ENABLE, 1);
4713                 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
4714                                              GENERIC0_INT_ENABLE, 1);
4715                 WREG32_SOC15_IP(GC, mec_int_cntl_reg, mec_int_cntl);
4716                 break;
4717         default:
4718                 break;
4719         }
4720 }
4721
4722 static int gfx_v12_0_set_eop_interrupt_state(struct amdgpu_device *adev,
4723                                             struct amdgpu_irq_src *src,
4724                                             unsigned type,
4725                                             enum amdgpu_interrupt_state state)
4726 {
4727         switch (type) {
4728         case AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP:
4729                 gfx_v12_0_set_gfx_eop_interrupt_state(adev, 0, 0, state);
4730                 break;
4731         case AMDGPU_CP_IRQ_GFX_ME0_PIPE1_EOP:
4732                 gfx_v12_0_set_gfx_eop_interrupt_state(adev, 0, 1, state);
4733                 break;
4734         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
4735                 gfx_v12_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
4736                 break;
4737         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
4738                 gfx_v12_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
4739                 break;
4740         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
4741                 gfx_v12_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
4742                 break;
4743         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
4744                 gfx_v12_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
4745                 break;
4746         default:
4747                 break;
4748         }
4749         return 0;
4750 }
4751
4752 static int gfx_v12_0_eop_irq(struct amdgpu_device *adev,
4753                              struct amdgpu_irq_src *source,
4754                              struct amdgpu_iv_entry *entry)
4755 {
4756         int i;
4757         u8 me_id, pipe_id, queue_id;
4758         struct amdgpu_ring *ring;
4759         uint32_t mes_queue_id = entry->src_data[0];
4760
4761         DRM_DEBUG("IH: CP EOP\n");
4762
4763         if (adev->enable_mes && (mes_queue_id & AMDGPU_FENCE_MES_QUEUE_FLAG)) {
4764                 struct amdgpu_mes_queue *queue;
4765
4766                 mes_queue_id &= AMDGPU_FENCE_MES_QUEUE_ID_MASK;
4767
4768                 spin_lock(&adev->mes.queue_id_lock);
4769                 queue = idr_find(&adev->mes.queue_id_idr, mes_queue_id);
4770                 if (queue) {
4771                         DRM_DEBUG("process mes queue id = %d\n", mes_queue_id);
4772                         amdgpu_fence_process(queue->ring);
4773                 }
4774                 spin_unlock(&adev->mes.queue_id_lock);
4775         } else {
4776                 me_id = (entry->ring_id & 0x0c) >> 2;
4777                 pipe_id = (entry->ring_id & 0x03) >> 0;
4778                 queue_id = (entry->ring_id & 0x70) >> 4;
4779
4780                 switch (me_id) {
4781                 case 0:
4782                         if (pipe_id == 0)
4783                                 amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
4784                         else
4785                                 amdgpu_fence_process(&adev->gfx.gfx_ring[1]);
4786                         break;
4787                 case 1:
4788                 case 2:
4789                         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4790                                 ring = &adev->gfx.compute_ring[i];
4791                                 /* Per-queue interrupt is supported for MEC starting from VI.
4792                                  * The interrupt can only be enabled/disabled per pipe instead
4793                                  * of per queue.
4794                                  */
4795                                 if ((ring->me == me_id) &&
4796                                     (ring->pipe == pipe_id) &&
4797                                     (ring->queue == queue_id))
4798                                         amdgpu_fence_process(ring);
4799                         }
4800                         break;
4801                 }
4802         }
4803
4804         return 0;
4805 }
4806
4807 static int gfx_v12_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
4808                                               struct amdgpu_irq_src *source,
4809                                               unsigned int type,
4810                                               enum amdgpu_interrupt_state state)
4811 {
4812         u32 cp_int_cntl_reg, cp_int_cntl;
4813         int i, j;
4814
4815         switch (state) {
4816         case AMDGPU_IRQ_STATE_DISABLE:
4817         case AMDGPU_IRQ_STATE_ENABLE:
4818                 for (i = 0; i < adev->gfx.me.num_me; i++) {
4819                         for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) {
4820                                 cp_int_cntl_reg = gfx_v12_0_get_cpg_int_cntl(adev, i, j);
4821
4822                                 if (cp_int_cntl_reg) {
4823                                         cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg);
4824                                         cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
4825                                                                     PRIV_REG_INT_ENABLE,
4826                                                                     state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
4827                                         WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl);
4828                                 }
4829                         }
4830                 }
4831                 for (i = 0; i < adev->gfx.mec.num_mec; i++) {
4832                         for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) {
4833                                 /* MECs start at 1 */
4834                                 cp_int_cntl_reg = gfx_v12_0_get_cpc_int_cntl(adev, i + 1, j);
4835
4836                                 if (cp_int_cntl_reg) {
4837                                         cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg);
4838                                         cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_ME1_PIPE0_INT_CNTL,
4839                                                                     PRIV_REG_INT_ENABLE,
4840                                                                     state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
4841                                         WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl);
4842                                 }
4843                         }
4844                 }
4845                 break;
4846         default:
4847                 break;
4848         }
4849
4850         return 0;
4851 }
4852
4853 static int gfx_v12_0_set_bad_op_fault_state(struct amdgpu_device *adev,
4854                                             struct amdgpu_irq_src *source,
4855                                             unsigned type,
4856                                             enum amdgpu_interrupt_state state)
4857 {
4858         u32 cp_int_cntl_reg, cp_int_cntl;
4859         int i, j;
4860
4861         switch (state) {
4862         case AMDGPU_IRQ_STATE_DISABLE:
4863         case AMDGPU_IRQ_STATE_ENABLE:
4864                 for (i = 0; i < adev->gfx.me.num_me; i++) {
4865                         for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) {
4866                                 cp_int_cntl_reg = gfx_v12_0_get_cpg_int_cntl(adev, i, j);
4867
4868                                 if (cp_int_cntl_reg) {
4869                                         cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg);
4870                                         cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
4871                                                                     OPCODE_ERROR_INT_ENABLE,
4872                                                                     state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
4873                                         WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl);
4874                                 }
4875                         }
4876                 }
4877                 for (i = 0; i < adev->gfx.mec.num_mec; i++) {
4878                         for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) {
4879                                 /* MECs start at 1 */
4880                                 cp_int_cntl_reg = gfx_v12_0_get_cpc_int_cntl(adev, i + 1, j);
4881
4882                                 if (cp_int_cntl_reg) {
4883                                         cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg);
4884                                         cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_ME1_PIPE0_INT_CNTL,
4885                                                                     OPCODE_ERROR_INT_ENABLE,
4886                                                                     state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
4887                                         WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl);
4888                                 }
4889                         }
4890                 }
4891                 break;
4892         default:
4893                 break;
4894         }
4895         return 0;
4896 }
4897
4898 static int gfx_v12_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
4899                                                struct amdgpu_irq_src *source,
4900                                                unsigned int type,
4901                                                enum amdgpu_interrupt_state state)
4902 {
4903         u32 cp_int_cntl_reg, cp_int_cntl;
4904         int i, j;
4905
4906         switch (state) {
4907         case AMDGPU_IRQ_STATE_DISABLE:
4908         case AMDGPU_IRQ_STATE_ENABLE:
4909                 for (i = 0; i < adev->gfx.me.num_me; i++) {
4910                         for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) {
4911                                 cp_int_cntl_reg = gfx_v12_0_get_cpg_int_cntl(adev, i, j);
4912
4913                                 if (cp_int_cntl_reg) {
4914                                         cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg);
4915                                         cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
4916                                                                     PRIV_INSTR_INT_ENABLE,
4917                                                                     state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
4918                                         WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl);
4919                                 }
4920                         }
4921                 }
4922                 break;
4923         default:
4924                 break;
4925         }
4926
4927         return 0;
4928 }
4929
4930 static void gfx_v12_0_handle_priv_fault(struct amdgpu_device *adev,
4931                                         struct amdgpu_iv_entry *entry)
4932 {
4933         u8 me_id, pipe_id, queue_id;
4934         struct amdgpu_ring *ring;
4935         int i;
4936
4937         me_id = (entry->ring_id & 0x0c) >> 2;
4938         pipe_id = (entry->ring_id & 0x03) >> 0;
4939         queue_id = (entry->ring_id & 0x70) >> 4;
4940
4941         switch (me_id) {
4942         case 0:
4943                 for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
4944                         ring = &adev->gfx.gfx_ring[i];
4945                         if (ring->me == me_id && ring->pipe == pipe_id &&
4946                             ring->queue == queue_id)
4947                                 drm_sched_fault(&ring->sched);
4948                 }
4949                 break;
4950         case 1:
4951         case 2:
4952                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4953                         ring = &adev->gfx.compute_ring[i];
4954                         if (ring->me == me_id && ring->pipe == pipe_id &&
4955                             ring->queue == queue_id)
4956                                 drm_sched_fault(&ring->sched);
4957                 }
4958                 break;
4959         default:
4960                 BUG();
4961                 break;
4962         }
4963 }
4964
4965 static int gfx_v12_0_priv_reg_irq(struct amdgpu_device *adev,
4966                                   struct amdgpu_irq_src *source,
4967                                   struct amdgpu_iv_entry *entry)
4968 {
4969         DRM_ERROR("Illegal register access in command stream\n");
4970         gfx_v12_0_handle_priv_fault(adev, entry);
4971         return 0;
4972 }
4973
4974 static int gfx_v12_0_bad_op_irq(struct amdgpu_device *adev,
4975                                 struct amdgpu_irq_src *source,
4976                                 struct amdgpu_iv_entry *entry)
4977 {
4978         DRM_ERROR("Illegal opcode in command stream \n");
4979         gfx_v12_0_handle_priv_fault(adev, entry);
4980         return 0;
4981 }
4982
4983 static int gfx_v12_0_priv_inst_irq(struct amdgpu_device *adev,
4984                                    struct amdgpu_irq_src *source,
4985                                    struct amdgpu_iv_entry *entry)
4986 {
4987         DRM_ERROR("Illegal instruction in command stream\n");
4988         gfx_v12_0_handle_priv_fault(adev, entry);
4989         return 0;
4990 }
4991
4992 static void gfx_v12_0_emit_mem_sync(struct amdgpu_ring *ring)
4993 {
4994         const unsigned int gcr_cntl =
4995                         PACKET3_ACQUIRE_MEM_GCR_CNTL_GL2_INV(1) |
4996                         PACKET3_ACQUIRE_MEM_GCR_CNTL_GL2_WB(1) |
4997                         PACKET3_ACQUIRE_MEM_GCR_CNTL_GLM_INV(1) |
4998                         PACKET3_ACQUIRE_MEM_GCR_CNTL_GLM_WB(1) |
4999                         PACKET3_ACQUIRE_MEM_GCR_CNTL_GL1_INV(1) |
5000                         PACKET3_ACQUIRE_MEM_GCR_CNTL_GLV_INV(1) |
5001                         PACKET3_ACQUIRE_MEM_GCR_CNTL_GLK_INV(1) |
5002                         PACKET3_ACQUIRE_MEM_GCR_CNTL_GLI_INV(1);
5003
5004         /* ACQUIRE_MEM - make one or more surfaces valid for use by the subsequent operations */
5005         amdgpu_ring_write(ring, PACKET3(PACKET3_ACQUIRE_MEM, 6));
5006         amdgpu_ring_write(ring, 0); /* CP_COHER_CNTL */
5007         amdgpu_ring_write(ring, 0xffffffff);  /* CP_COHER_SIZE */
5008         amdgpu_ring_write(ring, 0xffffff);  /* CP_COHER_SIZE_HI */
5009         amdgpu_ring_write(ring, 0); /* CP_COHER_BASE */
5010         amdgpu_ring_write(ring, 0);  /* CP_COHER_BASE_HI */
5011         amdgpu_ring_write(ring, 0x0000000A); /* POLL_INTERVAL */
5012         amdgpu_ring_write(ring, gcr_cntl); /* GCR_CNTL */
5013 }
5014
5015 static void gfx_v12_ring_insert_nop(struct amdgpu_ring *ring, uint32_t num_nop)
5016 {
5017         int i;
5018
5019         /* Header itself is a NOP packet */
5020         if (num_nop == 1) {
5021                 amdgpu_ring_write(ring, ring->funcs->nop);
5022                 return;
5023         }
5024
5025         /* Max HW optimization till 0x3ffe, followed by remaining one NOP at a time*/
5026         amdgpu_ring_write(ring, PACKET3(PACKET3_NOP, min(num_nop - 2, 0x3ffe)));
5027
5028         /* Header is at index 0, followed by num_nops - 1 NOP packet's */
5029         for (i = 1; i < num_nop; i++)
5030                 amdgpu_ring_write(ring, ring->funcs->nop);
5031 }
5032
5033 static void gfx_v12_ip_print(void *handle, struct drm_printer *p)
5034 {
5035         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5036         uint32_t i, j, k, reg, index = 0;
5037         uint32_t reg_count = ARRAY_SIZE(gc_reg_list_12_0);
5038
5039         if (!adev->gfx.ip_dump_core)
5040                 return;
5041
5042         for (i = 0; i < reg_count; i++)
5043                 drm_printf(p, "%-50s \t 0x%08x\n",
5044                            gc_reg_list_12_0[i].reg_name,
5045                            adev->gfx.ip_dump_core[i]);
5046
5047         /* print compute queue registers for all instances */
5048         if (!adev->gfx.ip_dump_compute_queues)
5049                 return;
5050
5051         reg_count = ARRAY_SIZE(gc_cp_reg_list_12);
5052         drm_printf(p, "\nnum_mec: %d num_pipe: %d num_queue: %d\n",
5053                    adev->gfx.mec.num_mec,
5054                    adev->gfx.mec.num_pipe_per_mec,
5055                    adev->gfx.mec.num_queue_per_pipe);
5056
5057         for (i = 0; i < adev->gfx.mec.num_mec; i++) {
5058                 for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) {
5059                         for (k = 0; k < adev->gfx.mec.num_queue_per_pipe; k++) {
5060                                 drm_printf(p, "\nmec %d, pipe %d, queue %d\n", i, j, k);
5061                                 for (reg = 0; reg < reg_count; reg++) {
5062                                         drm_printf(p, "%-50s \t 0x%08x\n",
5063                                                    gc_cp_reg_list_12[reg].reg_name,
5064                                                    adev->gfx.ip_dump_compute_queues[index + reg]);
5065                                 }
5066                                 index += reg_count;
5067                         }
5068                 }
5069         }
5070
5071         /* print gfx queue registers for all instances */
5072         if (!adev->gfx.ip_dump_gfx_queues)
5073                 return;
5074
5075         index = 0;
5076         reg_count = ARRAY_SIZE(gc_gfx_queue_reg_list_12);
5077         drm_printf(p, "\nnum_me: %d num_pipe: %d num_queue: %d\n",
5078                    adev->gfx.me.num_me,
5079                    adev->gfx.me.num_pipe_per_me,
5080                    adev->gfx.me.num_queue_per_pipe);
5081
5082         for (i = 0; i < adev->gfx.me.num_me; i++) {
5083                 for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) {
5084                         for (k = 0; k < adev->gfx.me.num_queue_per_pipe; k++) {
5085                                 drm_printf(p, "\nme %d, pipe %d, queue %d\n", i, j, k);
5086                                 for (reg = 0; reg < reg_count; reg++) {
5087                                         drm_printf(p, "%-50s \t 0x%08x\n",
5088                                                    gc_gfx_queue_reg_list_12[reg].reg_name,
5089                                                    adev->gfx.ip_dump_gfx_queues[index + reg]);
5090                                 }
5091                                 index += reg_count;
5092                         }
5093                 }
5094         }
5095 }
5096
5097 static void gfx_v12_ip_dump(void *handle)
5098 {
5099         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5100         uint32_t i, j, k, reg, index = 0;
5101         uint32_t reg_count = ARRAY_SIZE(gc_reg_list_12_0);
5102
5103         if (!adev->gfx.ip_dump_core)
5104                 return;
5105
5106         amdgpu_gfx_off_ctrl(adev, false);
5107         for (i = 0; i < reg_count; i++)
5108                 adev->gfx.ip_dump_core[i] = RREG32(SOC15_REG_ENTRY_OFFSET(gc_reg_list_12_0[i]));
5109         amdgpu_gfx_off_ctrl(adev, true);
5110
5111         /* dump compute queue registers for all instances */
5112         if (!adev->gfx.ip_dump_compute_queues)
5113                 return;
5114
5115         reg_count = ARRAY_SIZE(gc_cp_reg_list_12);
5116         amdgpu_gfx_off_ctrl(adev, false);
5117         mutex_lock(&adev->srbm_mutex);
5118         for (i = 0; i < adev->gfx.mec.num_mec; i++) {
5119                 for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) {
5120                         for (k = 0; k < adev->gfx.mec.num_queue_per_pipe; k++) {
5121                                 /* ME0 is for GFX so start from 1 for CP */
5122                                 soc24_grbm_select(adev, adev->gfx.me.num_me + i, j, k, 0);
5123                                 for (reg = 0; reg < reg_count; reg++) {
5124                                         adev->gfx.ip_dump_compute_queues[index + reg] =
5125                                                 RREG32(SOC15_REG_ENTRY_OFFSET(
5126                                                         gc_cp_reg_list_12[reg]));
5127                                 }
5128                                 index += reg_count;
5129                         }
5130                 }
5131         }
5132         soc24_grbm_select(adev, 0, 0, 0, 0);
5133         mutex_unlock(&adev->srbm_mutex);
5134         amdgpu_gfx_off_ctrl(adev, true);
5135
5136         /* dump gfx queue registers for all instances */
5137         if (!adev->gfx.ip_dump_gfx_queues)
5138                 return;
5139
5140         index = 0;
5141         reg_count = ARRAY_SIZE(gc_gfx_queue_reg_list_12);
5142         amdgpu_gfx_off_ctrl(adev, false);
5143         mutex_lock(&adev->srbm_mutex);
5144         for (i = 0; i < adev->gfx.me.num_me; i++) {
5145                 for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) {
5146                         for (k = 0; k < adev->gfx.me.num_queue_per_pipe; k++) {
5147                                 soc24_grbm_select(adev, i, j, k, 0);
5148
5149                                 for (reg = 0; reg < reg_count; reg++) {
5150                                         adev->gfx.ip_dump_gfx_queues[index + reg] =
5151                                                 RREG32(SOC15_REG_ENTRY_OFFSET(
5152                                                         gc_gfx_queue_reg_list_12[reg]));
5153                                 }
5154                                 index += reg_count;
5155                         }
5156                 }
5157         }
5158         soc24_grbm_select(adev, 0, 0, 0, 0);
5159         mutex_unlock(&adev->srbm_mutex);
5160         amdgpu_gfx_off_ctrl(adev, true);
5161 }
5162
5163 static int gfx_v12_0_reset_kgq(struct amdgpu_ring *ring, unsigned int vmid)
5164 {
5165         struct amdgpu_device *adev = ring->adev;
5166         int r;
5167
5168         if (amdgpu_sriov_vf(adev))
5169                 return -EINVAL;
5170
5171         r = amdgpu_mes_reset_legacy_queue(ring->adev, ring, vmid, false);
5172         if (r) {
5173                 dev_err(adev->dev, "reset via MES failed %d\n", r);
5174                 return r;
5175         }
5176
5177         r = amdgpu_bo_reserve(ring->mqd_obj, false);
5178         if (unlikely(r != 0)) {
5179                 dev_err(adev->dev, "fail to resv mqd_obj\n");
5180                 return r;
5181         }
5182         r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
5183         if (!r) {
5184                 r = gfx_v12_0_kgq_init_queue(ring, true);
5185                 amdgpu_bo_kunmap(ring->mqd_obj);
5186                 ring->mqd_ptr = NULL;
5187         }
5188         amdgpu_bo_unreserve(ring->mqd_obj);
5189         if (r) {
5190                 DRM_ERROR("fail to unresv mqd_obj\n");
5191                 return r;
5192         }
5193
5194         r = amdgpu_mes_map_legacy_queue(adev, ring);
5195         if (r) {
5196                 dev_err(adev->dev, "failed to remap kgq\n");
5197                 return r;
5198         }
5199
5200         return amdgpu_ring_test_ring(ring);
5201 }
5202
5203 static int gfx_v12_0_reset_kcq(struct amdgpu_ring *ring, unsigned int vmid)
5204 {
5205         struct amdgpu_device *adev = ring->adev;
5206         int r, i;
5207
5208         if (amdgpu_sriov_vf(adev))
5209                 return -EINVAL;
5210
5211         amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
5212         mutex_lock(&adev->srbm_mutex);
5213         soc24_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5214         WREG32_SOC15(GC, 0, regCP_HQD_DEQUEUE_REQUEST, 0x2);
5215         WREG32_SOC15(GC, 0, regSPI_COMPUTE_QUEUE_RESET, 0x1);
5216         for (i = 0; i < adev->usec_timeout; i++) {
5217                 if (!(RREG32_SOC15(GC, 0, regCP_HQD_ACTIVE) & 1))
5218                         break;
5219                 udelay(1);
5220         }
5221         soc24_grbm_select(adev, 0, 0, 0, 0);
5222         mutex_unlock(&adev->srbm_mutex);
5223         amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
5224
5225         r = amdgpu_bo_reserve(ring->mqd_obj, false);
5226         if (unlikely(r != 0)) {
5227                 DRM_ERROR("fail to resv mqd_obj\n");
5228                 return r;
5229         }
5230         r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
5231         if (!r) {
5232                 r = gfx_v12_0_kcq_init_queue(ring, true);
5233                 amdgpu_bo_kunmap(ring->mqd_obj);
5234                 ring->mqd_ptr = NULL;
5235         }
5236         amdgpu_bo_unreserve(ring->mqd_obj);
5237         if (r) {
5238                 DRM_ERROR("fail to unresv mqd_obj\n");
5239                 return r;
5240         }
5241         r = amdgpu_mes_map_legacy_queue(adev, ring);
5242         if (r) {
5243                 dev_err(adev->dev, "failed to remap kcq\n");
5244                 return r;
5245         }
5246
5247         return amdgpu_ring_test_ring(ring);
5248 }
5249
5250 static const struct amd_ip_funcs gfx_v12_0_ip_funcs = {
5251         .name = "gfx_v12_0",
5252         .early_init = gfx_v12_0_early_init,
5253         .late_init = gfx_v12_0_late_init,
5254         .sw_init = gfx_v12_0_sw_init,
5255         .sw_fini = gfx_v12_0_sw_fini,
5256         .hw_init = gfx_v12_0_hw_init,
5257         .hw_fini = gfx_v12_0_hw_fini,
5258         .suspend = gfx_v12_0_suspend,
5259         .resume = gfx_v12_0_resume,
5260         .is_idle = gfx_v12_0_is_idle,
5261         .wait_for_idle = gfx_v12_0_wait_for_idle,
5262         .set_clockgating_state = gfx_v12_0_set_clockgating_state,
5263         .set_powergating_state = gfx_v12_0_set_powergating_state,
5264         .get_clockgating_state = gfx_v12_0_get_clockgating_state,
5265         .dump_ip_state = gfx_v12_ip_dump,
5266         .print_ip_state = gfx_v12_ip_print,
5267 };
5268
5269 static const struct amdgpu_ring_funcs gfx_v12_0_ring_funcs_gfx = {
5270         .type = AMDGPU_RING_TYPE_GFX,
5271         .align_mask = 0xff,
5272         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
5273         .support_64bit_ptrs = true,
5274         .secure_submission_supported = true,
5275         .get_rptr = gfx_v12_0_ring_get_rptr_gfx,
5276         .get_wptr = gfx_v12_0_ring_get_wptr_gfx,
5277         .set_wptr = gfx_v12_0_ring_set_wptr_gfx,
5278         .emit_frame_size = /* totally 242 maximum if 16 IBs */
5279                 5 + /* COND_EXEC */
5280                 7 + /* PIPELINE_SYNC */
5281                 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
5282                 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
5283                 2 + /* VM_FLUSH */
5284                 8 + /* FENCE for VM_FLUSH */
5285                 5 + /* COND_EXEC */
5286                 7 + /* HDP_flush */
5287                 4 + /* VGT_flush */
5288                 31 + /* DE_META */
5289                 3 + /* CNTX_CTRL */
5290                 5 + /* HDP_INVL */
5291                 8 + 8 + /* FENCE x2 */
5292                 8, /* gfx_v12_0_emit_mem_sync */
5293         .emit_ib_size = 4, /* gfx_v12_0_ring_emit_ib_gfx */
5294         .emit_ib = gfx_v12_0_ring_emit_ib_gfx,
5295         .emit_fence = gfx_v12_0_ring_emit_fence,
5296         .emit_pipeline_sync = gfx_v12_0_ring_emit_pipeline_sync,
5297         .emit_vm_flush = gfx_v12_0_ring_emit_vm_flush,
5298         .emit_hdp_flush = gfx_v12_0_ring_emit_hdp_flush,
5299         .test_ring = gfx_v12_0_ring_test_ring,
5300         .test_ib = gfx_v12_0_ring_test_ib,
5301         .insert_nop = gfx_v12_ring_insert_nop,
5302         .pad_ib = amdgpu_ring_generic_pad_ib,
5303         .emit_cntxcntl = gfx_v12_0_ring_emit_cntxcntl,
5304         .init_cond_exec = gfx_v12_0_ring_emit_init_cond_exec,
5305         .preempt_ib = gfx_v12_0_ring_preempt_ib,
5306         .emit_frame_cntl = gfx_v12_0_ring_emit_frame_cntl,
5307         .emit_wreg = gfx_v12_0_ring_emit_wreg,
5308         .emit_reg_wait = gfx_v12_0_ring_emit_reg_wait,
5309         .emit_reg_write_reg_wait = gfx_v12_0_ring_emit_reg_write_reg_wait,
5310         .soft_recovery = gfx_v12_0_ring_soft_recovery,
5311         .emit_mem_sync = gfx_v12_0_emit_mem_sync,
5312         .reset = gfx_v12_0_reset_kgq,
5313 };
5314
5315 static const struct amdgpu_ring_funcs gfx_v12_0_ring_funcs_compute = {
5316         .type = AMDGPU_RING_TYPE_COMPUTE,
5317         .align_mask = 0xff,
5318         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
5319         .support_64bit_ptrs = true,
5320         .get_rptr = gfx_v12_0_ring_get_rptr_compute,
5321         .get_wptr = gfx_v12_0_ring_get_wptr_compute,
5322         .set_wptr = gfx_v12_0_ring_set_wptr_compute,
5323         .emit_frame_size =
5324                 7 + /* gfx_v12_0_ring_emit_hdp_flush */
5325                 5 + /* hdp invalidate */
5326                 7 + /* gfx_v12_0_ring_emit_pipeline_sync */
5327                 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
5328                 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
5329                 2 + /* gfx_v12_0_ring_emit_vm_flush */
5330                 8 + 8 + 8 + /* gfx_v12_0_ring_emit_fence x3 for user fence, vm fence */
5331                 8, /* gfx_v12_0_emit_mem_sync */
5332         .emit_ib_size = 7, /* gfx_v12_0_ring_emit_ib_compute */
5333         .emit_ib = gfx_v12_0_ring_emit_ib_compute,
5334         .emit_fence = gfx_v12_0_ring_emit_fence,
5335         .emit_pipeline_sync = gfx_v12_0_ring_emit_pipeline_sync,
5336         .emit_vm_flush = gfx_v12_0_ring_emit_vm_flush,
5337         .emit_hdp_flush = gfx_v12_0_ring_emit_hdp_flush,
5338         .test_ring = gfx_v12_0_ring_test_ring,
5339         .test_ib = gfx_v12_0_ring_test_ib,
5340         .insert_nop = gfx_v12_ring_insert_nop,
5341         .pad_ib = amdgpu_ring_generic_pad_ib,
5342         .emit_wreg = gfx_v12_0_ring_emit_wreg,
5343         .emit_reg_wait = gfx_v12_0_ring_emit_reg_wait,
5344         .emit_reg_write_reg_wait = gfx_v12_0_ring_emit_reg_write_reg_wait,
5345         .soft_recovery = gfx_v12_0_ring_soft_recovery,
5346         .emit_mem_sync = gfx_v12_0_emit_mem_sync,
5347         .reset = gfx_v12_0_reset_kcq,
5348 };
5349
5350 static const struct amdgpu_ring_funcs gfx_v12_0_ring_funcs_kiq = {
5351         .type = AMDGPU_RING_TYPE_KIQ,
5352         .align_mask = 0xff,
5353         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
5354         .support_64bit_ptrs = true,
5355         .get_rptr = gfx_v12_0_ring_get_rptr_compute,
5356         .get_wptr = gfx_v12_0_ring_get_wptr_compute,
5357         .set_wptr = gfx_v12_0_ring_set_wptr_compute,
5358         .emit_frame_size =
5359                 7 + /* gfx_v12_0_ring_emit_hdp_flush */
5360                 5 + /*hdp invalidate */
5361                 7 + /* gfx_v12_0_ring_emit_pipeline_sync */
5362                 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
5363                 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
5364                 2 + /* gfx_v12_0_ring_emit_vm_flush */
5365                 8 + 8 + 8, /* gfx_v12_0_ring_emit_fence_kiq x3 for user fence, vm fence */
5366         .emit_ib_size = 7, /* gfx_v12_0_ring_emit_ib_compute */
5367         .emit_ib = gfx_v12_0_ring_emit_ib_compute,
5368         .emit_fence = gfx_v12_0_ring_emit_fence_kiq,
5369         .test_ring = gfx_v12_0_ring_test_ring,
5370         .test_ib = gfx_v12_0_ring_test_ib,
5371         .insert_nop = amdgpu_ring_insert_nop,
5372         .pad_ib = amdgpu_ring_generic_pad_ib,
5373         .emit_rreg = gfx_v12_0_ring_emit_rreg,
5374         .emit_wreg = gfx_v12_0_ring_emit_wreg,
5375         .emit_reg_wait = gfx_v12_0_ring_emit_reg_wait,
5376         .emit_reg_write_reg_wait = gfx_v12_0_ring_emit_reg_write_reg_wait,
5377 };
5378
5379 static void gfx_v12_0_set_ring_funcs(struct amdgpu_device *adev)
5380 {
5381         int i;
5382
5383         adev->gfx.kiq[0].ring.funcs = &gfx_v12_0_ring_funcs_kiq;
5384
5385         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
5386                 adev->gfx.gfx_ring[i].funcs = &gfx_v12_0_ring_funcs_gfx;
5387
5388         for (i = 0; i < adev->gfx.num_compute_rings; i++)
5389                 adev->gfx.compute_ring[i].funcs = &gfx_v12_0_ring_funcs_compute;
5390 }
5391
5392 static const struct amdgpu_irq_src_funcs gfx_v12_0_eop_irq_funcs = {
5393         .set = gfx_v12_0_set_eop_interrupt_state,
5394         .process = gfx_v12_0_eop_irq,
5395 };
5396
5397 static const struct amdgpu_irq_src_funcs gfx_v12_0_priv_reg_irq_funcs = {
5398         .set = gfx_v12_0_set_priv_reg_fault_state,
5399         .process = gfx_v12_0_priv_reg_irq,
5400 };
5401
5402 static const struct amdgpu_irq_src_funcs gfx_v12_0_bad_op_irq_funcs = {
5403         .set = gfx_v12_0_set_bad_op_fault_state,
5404         .process = gfx_v12_0_bad_op_irq,
5405 };
5406
5407 static const struct amdgpu_irq_src_funcs gfx_v12_0_priv_inst_irq_funcs = {
5408         .set = gfx_v12_0_set_priv_inst_fault_state,
5409         .process = gfx_v12_0_priv_inst_irq,
5410 };
5411
5412 static void gfx_v12_0_set_irq_funcs(struct amdgpu_device *adev)
5413 {
5414         adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
5415         adev->gfx.eop_irq.funcs = &gfx_v12_0_eop_irq_funcs;
5416
5417         adev->gfx.priv_reg_irq.num_types = 1;
5418         adev->gfx.priv_reg_irq.funcs = &gfx_v12_0_priv_reg_irq_funcs;
5419
5420         adev->gfx.bad_op_irq.num_types = 1;
5421         adev->gfx.bad_op_irq.funcs = &gfx_v12_0_bad_op_irq_funcs;
5422
5423         adev->gfx.priv_inst_irq.num_types = 1;
5424         adev->gfx.priv_inst_irq.funcs = &gfx_v12_0_priv_inst_irq_funcs;
5425 }
5426
5427 static void gfx_v12_0_set_imu_funcs(struct amdgpu_device *adev)
5428 {
5429         if (adev->flags & AMD_IS_APU)
5430                 adev->gfx.imu.mode = MISSION_MODE;
5431         else
5432                 adev->gfx.imu.mode = DEBUG_MODE;
5433
5434         adev->gfx.imu.funcs = &gfx_v12_0_imu_funcs;
5435 }
5436
5437 static void gfx_v12_0_set_rlc_funcs(struct amdgpu_device *adev)
5438 {
5439         adev->gfx.rlc.funcs = &gfx_v12_0_rlc_funcs;
5440 }
5441
5442 static void gfx_v12_0_set_mqd_funcs(struct amdgpu_device *adev)
5443 {
5444         /* set gfx eng mqd */
5445         adev->mqds[AMDGPU_HW_IP_GFX].mqd_size =
5446                 sizeof(struct v12_gfx_mqd);
5447         adev->mqds[AMDGPU_HW_IP_GFX].init_mqd =
5448                 gfx_v12_0_gfx_mqd_init;
5449         /* set compute eng mqd */
5450         adev->mqds[AMDGPU_HW_IP_COMPUTE].mqd_size =
5451                 sizeof(struct v12_compute_mqd);
5452         adev->mqds[AMDGPU_HW_IP_COMPUTE].init_mqd =
5453                 gfx_v12_0_compute_mqd_init;
5454 }
5455
5456 static void gfx_v12_0_set_user_wgp_inactive_bitmap_per_sh(struct amdgpu_device *adev,
5457                                                           u32 bitmap)
5458 {
5459         u32 data;
5460
5461         if (!bitmap)
5462                 return;
5463
5464         data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_WGPS__SHIFT;
5465         data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_WGPS_MASK;
5466
5467         WREG32_SOC15(GC, 0, regGC_USER_SHADER_ARRAY_CONFIG, data);
5468 }
5469
5470 static u32 gfx_v12_0_get_wgp_active_bitmap_per_sh(struct amdgpu_device *adev)
5471 {
5472         u32 data, wgp_bitmask;
5473         data = RREG32_SOC15(GC, 0, regCC_GC_SHADER_ARRAY_CONFIG);
5474         data |= RREG32_SOC15(GC, 0, regGC_USER_SHADER_ARRAY_CONFIG);
5475
5476         data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_WGPS_MASK;
5477         data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_WGPS__SHIFT;
5478
5479         wgp_bitmask =
5480                 amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh >> 1);
5481
5482         return (~data) & wgp_bitmask;
5483 }
5484
5485 static u32 gfx_v12_0_get_cu_active_bitmap_per_sh(struct amdgpu_device *adev)
5486 {
5487         u32 wgp_idx, wgp_active_bitmap;
5488         u32 cu_bitmap_per_wgp, cu_active_bitmap;
5489
5490         wgp_active_bitmap = gfx_v12_0_get_wgp_active_bitmap_per_sh(adev);
5491         cu_active_bitmap = 0;
5492
5493         for (wgp_idx = 0; wgp_idx < 16; wgp_idx++) {
5494                 /* if there is one WGP enabled, it means 2 CUs will be enabled */
5495                 cu_bitmap_per_wgp = 3 << (2 * wgp_idx);
5496                 if (wgp_active_bitmap & (1 << wgp_idx))
5497                         cu_active_bitmap |= cu_bitmap_per_wgp;
5498         }
5499
5500         return cu_active_bitmap;
5501 }
5502
5503 static int gfx_v12_0_get_cu_info(struct amdgpu_device *adev,
5504                                  struct amdgpu_cu_info *cu_info)
5505 {
5506         int i, j, k, counter, active_cu_number = 0;
5507         u32 mask, bitmap;
5508         unsigned disable_masks[8 * 2];
5509
5510         if (!adev || !cu_info)
5511                 return -EINVAL;
5512
5513         amdgpu_gfx_parse_disable_cu(disable_masks, 8, 2);
5514
5515         mutex_lock(&adev->grbm_idx_mutex);
5516         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
5517                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
5518                         bitmap = i * adev->gfx.config.max_sh_per_se + j;
5519                         if (!((gfx_v12_0_get_sa_active_bitmap(adev) >> bitmap) & 1))
5520                                 continue;
5521                         mask = 1;
5522                         counter = 0;
5523                         gfx_v12_0_select_se_sh(adev, i, j, 0xffffffff, 0);
5524                         if (i < 8 && j < 2)
5525                                 gfx_v12_0_set_user_wgp_inactive_bitmap_per_sh(
5526                                         adev, disable_masks[i * 2 + j]);
5527                         bitmap = gfx_v12_0_get_cu_active_bitmap_per_sh(adev);
5528
5529                         /**
5530                          * GFX12 could support more than 4 SEs, while the bitmap
5531                          * in cu_info struct is 4x4 and ioctl interface struct
5532                          * drm_amdgpu_info_device should keep stable.
5533                          * So we use last two columns of bitmap to store cu mask for
5534                          * SEs 4 to 7, the layout of the bitmap is as below:
5535                          *    SE0: {SH0,SH1} --> {bitmap[0][0], bitmap[0][1]}
5536                          *    SE1: {SH0,SH1} --> {bitmap[1][0], bitmap[1][1]}
5537                          *    SE2: {SH0,SH1} --> {bitmap[2][0], bitmap[2][1]}
5538                          *    SE3: {SH0,SH1} --> {bitmap[3][0], bitmap[3][1]}
5539                          *    SE4: {SH0,SH1} --> {bitmap[0][2], bitmap[0][3]}
5540                          *    SE5: {SH0,SH1} --> {bitmap[1][2], bitmap[1][3]}
5541                          *    SE6: {SH0,SH1} --> {bitmap[2][2], bitmap[2][3]}
5542                          *    SE7: {SH0,SH1} --> {bitmap[3][2], bitmap[3][3]}
5543                          */
5544                         cu_info->bitmap[0][i % 4][j + (i / 4) * 2] = bitmap;
5545
5546                         for (k = 0; k < adev->gfx.config.max_cu_per_sh; k++) {
5547                                 if (bitmap & mask)
5548                                         counter++;
5549
5550                                 mask <<= 1;
5551                         }
5552                         active_cu_number += counter;
5553                 }
5554         }
5555         gfx_v12_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
5556         mutex_unlock(&adev->grbm_idx_mutex);
5557
5558         cu_info->number = active_cu_number;
5559         cu_info->simd_per_cu = NUM_SIMD_PER_CU;
5560
5561         return 0;
5562 }
5563
5564 const struct amdgpu_ip_block_version gfx_v12_0_ip_block = {
5565         .type = AMD_IP_BLOCK_TYPE_GFX,
5566         .major = 12,
5567         .minor = 0,
5568         .rev = 0,
5569         .funcs = &gfx_v12_0_ip_funcs,
5570 };
This page took 0.374196 seconds and 4 git commands to generate.