]> Git Repo - linux.git/blob - drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c
Merge tag 'drm-msm-fixes-2024-08-19' of https://gitlab.freedesktop.org/drm/msm into...
[linux.git] / drivers / gpu / drm / amd / amdgpu / gfx_v12_0.c
1 /*
2  * Copyright 2023 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23 #include <linux/delay.h>
24 #include <linux/kernel.h>
25 #include <linux/firmware.h>
26 #include <linux/module.h>
27 #include <linux/pci.h>
28 #include "amdgpu.h"
29 #include "amdgpu_gfx.h"
30 #include "amdgpu_psp.h"
31 #include "amdgpu_smu.h"
32 #include "amdgpu_atomfirmware.h"
33 #include "imu_v12_0.h"
34 #include "soc24.h"
35 #include "nvd.h"
36
37 #include "gc/gc_12_0_0_offset.h"
38 #include "gc/gc_12_0_0_sh_mask.h"
39 #include "soc24_enum.h"
40 #include "ivsrcid/gfx/irqsrcs_gfx_11_0_0.h"
41
42 #include "soc15.h"
43 #include "soc15d.h"
44 #include "clearstate_gfx12.h"
45 #include "v12_structs.h"
46 #include "gfx_v12_0.h"
47 #include "nbif_v6_3_1.h"
48 #include "mes_v12_0.h"
49
50 #define GFX12_NUM_GFX_RINGS     1
51 #define GFX12_MEC_HPD_SIZE      2048
52
53 #define RLCG_UCODE_LOADING_START_ADDRESS        0x00002000L
54
55 MODULE_FIRMWARE("amdgpu/gc_12_0_0_pfp.bin");
56 MODULE_FIRMWARE("amdgpu/gc_12_0_0_me.bin");
57 MODULE_FIRMWARE("amdgpu/gc_12_0_0_mec.bin");
58 MODULE_FIRMWARE("amdgpu/gc_12_0_0_rlc.bin");
59 MODULE_FIRMWARE("amdgpu/gc_12_0_0_toc.bin");
60 MODULE_FIRMWARE("amdgpu/gc_12_0_1_pfp.bin");
61 MODULE_FIRMWARE("amdgpu/gc_12_0_1_me.bin");
62 MODULE_FIRMWARE("amdgpu/gc_12_0_1_mec.bin");
63 MODULE_FIRMWARE("amdgpu/gc_12_0_1_rlc.bin");
64 MODULE_FIRMWARE("amdgpu/gc_12_0_1_toc.bin");
65
66 static const struct amdgpu_hwip_reg_entry gc_reg_list_12_0[] = {
67         SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS),
68         SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS2),
69         SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS3),
70         SOC15_REG_ENTRY_STR(GC, 0, regCP_STALLED_STAT1),
71         SOC15_REG_ENTRY_STR(GC, 0, regCP_STALLED_STAT2),
72         SOC15_REG_ENTRY_STR(GC, 0, regCP_STALLED_STAT3),
73         SOC15_REG_ENTRY_STR(GC, 0, regCP_CPC_STALLED_STAT1),
74         SOC15_REG_ENTRY_STR(GC, 0, regCP_CPF_STALLED_STAT1),
75         SOC15_REG_ENTRY_STR(GC, 0, regCP_BUSY_STAT),
76         SOC15_REG_ENTRY_STR(GC, 0, regCP_CPC_BUSY_STAT),
77         SOC15_REG_ENTRY_STR(GC, 0, regCP_CPF_BUSY_STAT),
78         SOC15_REG_ENTRY_STR(GC, 0, regCP_CPC_BUSY_STAT2),
79         SOC15_REG_ENTRY_STR(GC, 0, regCP_CPF_BUSY_STAT2),
80         SOC15_REG_ENTRY_STR(GC, 0, regCP_CPF_STATUS),
81         SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_ERROR),
82         SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HPD_STATUS0),
83         SOC15_REG_ENTRY_STR(GC, 0, regCP_RB_BASE),
84         SOC15_REG_ENTRY_STR(GC, 0, regCP_RB_RPTR),
85         SOC15_REG_ENTRY_STR(GC, 0, regCP_RB_WPTR),
86         SOC15_REG_ENTRY_STR(GC, 0, regCP_RB0_BASE),
87         SOC15_REG_ENTRY_STR(GC, 0, regCP_RB0_RPTR),
88         SOC15_REG_ENTRY_STR(GC, 0, regCP_RB0_WPTR),
89         SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_CMD_BUFSZ),
90         SOC15_REG_ENTRY_STR(GC, 0, regCP_IB2_CMD_BUFSZ),
91         SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BASE_LO),
92         SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BASE_HI),
93         SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BUFSZ),
94         SOC15_REG_ENTRY_STR(GC, 0, regCP_IB2_BASE_LO),
95         SOC15_REG_ENTRY_STR(GC, 0, regCP_IB2_BASE_HI),
96         SOC15_REG_ENTRY_STR(GC, 0, regCP_IB2_BUFSZ),
97         SOC15_REG_ENTRY_STR(GC, 0, regCPF_UTCL1_STATUS),
98         SOC15_REG_ENTRY_STR(GC, 0, regCPC_UTCL1_STATUS),
99         SOC15_REG_ENTRY_STR(GC, 0, regCPG_UTCL1_STATUS),
100         SOC15_REG_ENTRY_STR(GC, 0, regIA_UTCL1_STATUS),
101         SOC15_REG_ENTRY_STR(GC, 0, regIA_UTCL1_STATUS_2),
102         SOC15_REG_ENTRY_STR(GC, 0, regPA_CL_CNTL_STATUS),
103         SOC15_REG_ENTRY_STR(GC, 0, regRMI_UTCL1_STATUS),
104         SOC15_REG_ENTRY_STR(GC, 0, regSQC_CACHES),
105         SOC15_REG_ENTRY_STR(GC, 0, regSQG_STATUS),
106         SOC15_REG_ENTRY_STR(GC, 0, regWD_UTCL1_STATUS),
107         SOC15_REG_ENTRY_STR(GC, 0, regGCVM_L2_PROTECTION_FAULT_CNTL),
108         SOC15_REG_ENTRY_STR(GC, 0, regGCVM_L2_PROTECTION_FAULT_STATUS_LO32),
109         SOC15_REG_ENTRY_STR(GC, 0, regGCVM_L2_PROTECTION_FAULT_STATUS_HI32),
110         SOC15_REG_ENTRY_STR(GC, 0, regCP_DEBUG),
111         SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_CNTL),
112         SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_CNTL),
113         SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_INSTR_PNTR),
114         SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_INSTR_PNTR),
115         SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_INSTR_PNTR),
116         SOC15_REG_ENTRY_STR(GC, 0, regCP_CPC_STATUS),
117         SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_RS64_INSTR_PNTR0),
118         SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_RS64_INSTR_PNTR1),
119         SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_RS64_INSTR_PNTR),
120
121         /* cp header registers */
122         SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_ME1_HEADER_DUMP),
123         SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP),
124         SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
125         SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP),
126         /* SE status registers */
127         SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE0),
128         SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE1),
129         SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE2),
130         SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE3)
131 };
132
133 static const struct amdgpu_hwip_reg_entry gc_cp_reg_list_12[] = {
134         /* compute registers */
135         SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_VMID),
136         SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PERSISTENT_STATE),
137         SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PIPE_PRIORITY),
138         SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_QUEUE_PRIORITY),
139         SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_QUANTUM),
140         SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_BASE),
141         SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_BASE_HI),
142         SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_RPTR),
143         SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_WPTR_POLL_ADDR),
144         SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_WPTR_POLL_ADDR_HI),
145         SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL),
146         SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_CONTROL),
147         SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_IB_BASE_ADDR),
148         SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_IB_BASE_ADDR_HI),
149         SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_IB_RPTR),
150         SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_IB_CONTROL),
151         SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_DEQUEUE_REQUEST),
152         SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_BASE_ADDR),
153         SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_BASE_ADDR_HI),
154         SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_CONTROL),
155         SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_RPTR),
156         SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_WPTR),
157         SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_EVENTS),
158         SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CTX_SAVE_BASE_ADDR_LO),
159         SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CTX_SAVE_BASE_ADDR_HI),
160         SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CTX_SAVE_CONTROL),
161         SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CNTL_STACK_OFFSET),
162         SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CNTL_STACK_SIZE),
163         SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_WG_STATE_OFFSET),
164         SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CTX_SAVE_SIZE),
165         SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_GDS_RESOURCE_STATE),
166         SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_ERROR),
167         SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_WPTR_MEM),
168         SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_WPTR_LO),
169         SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_WPTR_HI),
170         SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_SUSPEND_CNTL_STACK_OFFSET),
171         SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_SUSPEND_CNTL_STACK_DW_CNT),
172         SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_SUSPEND_WG_STATE_OFFSET),
173         SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_DEQUEUE_STATUS)
174 };
175
176 static const struct amdgpu_hwip_reg_entry gc_gfx_queue_reg_list_12[] = {
177         /* gfx queue registers */
178         SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_ACTIVE),
179         SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_VMID),
180         SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_QUEUE_PRIORITY),
181         SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_QUANTUM),
182         SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_BASE),
183         SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_BASE_HI),
184         SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_OFFSET),
185         SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_CNTL),
186         SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_CSMD_RPTR),
187         SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_WPTR),
188         SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_WPTR_HI),
189         SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_DEQUEUE_REQUEST),
190         SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_MAPPED),
191         SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_QUE_MGR_CONTROL),
192         SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_HQ_CONTROL0),
193         SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_HQ_STATUS0),
194         SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_MQD_BASE_ADDR),
195         SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_MQD_BASE_ADDR_HI),
196         SOC15_REG_ENTRY_STR(GC, 0, regCP_RB_WPTR_POLL_ADDR_LO),
197         SOC15_REG_ENTRY_STR(GC, 0, regCP_RB_WPTR_POLL_ADDR_HI),
198         SOC15_REG_ENTRY_STR(GC, 0, regCP_RB_RPTR),
199         SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BASE_LO),
200         SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BASE_HI),
201         SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_CMD_BUFSZ),
202         SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BUFSZ)
203 };
204
205 static const struct soc15_reg_golden golden_settings_gc_12_0[] = {
206         SOC15_REG_GOLDEN_VALUE(GC, 0, regDB_MEM_CONFIG, 0x0000000f, 0x0000000f),
207         SOC15_REG_GOLDEN_VALUE(GC, 0, regCB_HW_CONTROL_1, 0x03000000, 0x03000000),
208         SOC15_REG_GOLDEN_VALUE(GC, 0, regGL2C_CTRL5, 0x00000070, 0x00000020)
209 };
210
211 #define DEFAULT_SH_MEM_CONFIG \
212         ((SH_MEM_ADDRESS_MODE_64 << SH_MEM_CONFIG__ADDRESS_MODE__SHIFT) | \
213          (SH_MEM_ALIGNMENT_MODE_UNALIGNED << SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT) | \
214          (3 << SH_MEM_CONFIG__INITIAL_INST_PREFETCH__SHIFT))
215
216 static void gfx_v12_0_disable_gpa_mode(struct amdgpu_device *adev);
217 static void gfx_v12_0_set_ring_funcs(struct amdgpu_device *adev);
218 static void gfx_v12_0_set_irq_funcs(struct amdgpu_device *adev);
219 static void gfx_v12_0_set_rlc_funcs(struct amdgpu_device *adev);
220 static void gfx_v12_0_set_mqd_funcs(struct amdgpu_device *adev);
221 static void gfx_v12_0_set_imu_funcs(struct amdgpu_device *adev);
222 static int gfx_v12_0_get_cu_info(struct amdgpu_device *adev,
223                                  struct amdgpu_cu_info *cu_info);
224 static uint64_t gfx_v12_0_get_gpu_clock_counter(struct amdgpu_device *adev);
225 static void gfx_v12_0_select_se_sh(struct amdgpu_device *adev, u32 se_num,
226                                    u32 sh_num, u32 instance, int xcc_id);
227 static u32 gfx_v12_0_get_wgp_active_bitmap_per_sh(struct amdgpu_device *adev);
228
229 static void gfx_v12_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start, bool secure);
230 static void gfx_v12_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
231                                      uint32_t val);
232 static int gfx_v12_0_wait_for_rlc_autoload_complete(struct amdgpu_device *adev);
233 static void gfx_v12_0_ring_invalidate_tlbs(struct amdgpu_ring *ring,
234                                            uint16_t pasid, uint32_t flush_type,
235                                            bool all_hub, uint8_t dst_sel);
236 static void gfx_v12_0_set_safe_mode(struct amdgpu_device *adev, int xcc_id);
237 static void gfx_v12_0_unset_safe_mode(struct amdgpu_device *adev, int xcc_id);
238 static void gfx_v12_0_update_perf_clk(struct amdgpu_device *adev,
239                                       bool enable);
240
241 static void gfx_v12_0_kiq_set_resources(struct amdgpu_ring *kiq_ring,
242                                         uint64_t queue_mask)
243 {
244         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
245         amdgpu_ring_write(kiq_ring, PACKET3_SET_RESOURCES_VMID_MASK(0) |
246                           PACKET3_SET_RESOURCES_QUEUE_TYPE(0)); /* vmid_mask:0 queue_type:0 (KIQ) */
247         amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask)); /* queue mask lo */
248         amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask)); /* queue mask hi */
249         amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */
250         amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */
251         amdgpu_ring_write(kiq_ring, 0); /* oac mask */
252         amdgpu_ring_write(kiq_ring, 0);
253 }
254
255 static void gfx_v12_0_kiq_map_queues(struct amdgpu_ring *kiq_ring,
256                                      struct amdgpu_ring *ring)
257 {
258         uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
259         uint64_t wptr_addr = ring->wptr_gpu_addr;
260         uint32_t me = 0, eng_sel = 0;
261
262         switch (ring->funcs->type) {
263         case AMDGPU_RING_TYPE_COMPUTE:
264                 me = 1;
265                 eng_sel = 0;
266                 break;
267         case AMDGPU_RING_TYPE_GFX:
268                 me = 0;
269                 eng_sel = 4;
270                 break;
271         case AMDGPU_RING_TYPE_MES:
272                 me = 2;
273                 eng_sel = 5;
274                 break;
275         default:
276                 WARN_ON(1);
277         }
278
279         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
280         /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
281         amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
282                           PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */
283                           PACKET3_MAP_QUEUES_VMID(0) | /* VMID */
284                           PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
285                           PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
286                           PACKET3_MAP_QUEUES_ME((me)) |
287                           PACKET3_MAP_QUEUES_QUEUE_TYPE(0) | /*queue_type: normal compute queue */
288                           PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) | /* alloc format: all_on_one_pipe */
289                           PACKET3_MAP_QUEUES_ENGINE_SEL(eng_sel) |
290                           PACKET3_MAP_QUEUES_NUM_QUEUES(1)); /* num_queues: must be 1 */
291         amdgpu_ring_write(kiq_ring, PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index));
292         amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
293         amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
294         amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
295         amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
296 }
297
298 static void gfx_v12_0_kiq_unmap_queues(struct amdgpu_ring *kiq_ring,
299                                        struct amdgpu_ring *ring,
300                                        enum amdgpu_unmap_queues_action action,
301                                        u64 gpu_addr, u64 seq)
302 {
303         struct amdgpu_device *adev = kiq_ring->adev;
304         uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
305
306         if (adev->enable_mes && !adev->gfx.kiq[0].ring.sched.ready) {
307                 amdgpu_mes_unmap_legacy_queue(adev, ring, action, gpu_addr, seq);
308                 return;
309         }
310
311         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
312         amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
313                           PACKET3_UNMAP_QUEUES_ACTION(action) |
314                           PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) |
315                           PACKET3_UNMAP_QUEUES_ENGINE_SEL(eng_sel) |
316                           PACKET3_UNMAP_QUEUES_NUM_QUEUES(1));
317         amdgpu_ring_write(kiq_ring,
318                   PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
319
320         if (action == PREEMPT_QUEUES_NO_UNMAP) {
321                 amdgpu_ring_write(kiq_ring, lower_32_bits(gpu_addr));
322                 amdgpu_ring_write(kiq_ring, upper_32_bits(gpu_addr));
323                 amdgpu_ring_write(kiq_ring, seq);
324         } else {
325                 amdgpu_ring_write(kiq_ring, 0);
326                 amdgpu_ring_write(kiq_ring, 0);
327                 amdgpu_ring_write(kiq_ring, 0);
328         }
329 }
330
331 static void gfx_v12_0_kiq_query_status(struct amdgpu_ring *kiq_ring,
332                                        struct amdgpu_ring *ring,
333                                        u64 addr, u64 seq)
334 {
335         uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
336
337         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_QUERY_STATUS, 5));
338         amdgpu_ring_write(kiq_ring,
339                           PACKET3_QUERY_STATUS_CONTEXT_ID(0) |
340                           PACKET3_QUERY_STATUS_INTERRUPT_SEL(0) |
341                           PACKET3_QUERY_STATUS_COMMAND(2));
342         amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
343                           PACKET3_QUERY_STATUS_DOORBELL_OFFSET(ring->doorbell_index) |
344                           PACKET3_QUERY_STATUS_ENG_SEL(eng_sel));
345         amdgpu_ring_write(kiq_ring, lower_32_bits(addr));
346         amdgpu_ring_write(kiq_ring, upper_32_bits(addr));
347         amdgpu_ring_write(kiq_ring, lower_32_bits(seq));
348         amdgpu_ring_write(kiq_ring, upper_32_bits(seq));
349 }
350
351 static void gfx_v12_0_kiq_invalidate_tlbs(struct amdgpu_ring *kiq_ring,
352                                           uint16_t pasid,
353                                           uint32_t flush_type,
354                                           bool all_hub)
355 {
356         gfx_v12_0_ring_invalidate_tlbs(kiq_ring, pasid, flush_type, all_hub, 1);
357 }
358
359 static const struct kiq_pm4_funcs gfx_v12_0_kiq_pm4_funcs = {
360         .kiq_set_resources = gfx_v12_0_kiq_set_resources,
361         .kiq_map_queues = gfx_v12_0_kiq_map_queues,
362         .kiq_unmap_queues = gfx_v12_0_kiq_unmap_queues,
363         .kiq_query_status = gfx_v12_0_kiq_query_status,
364         .kiq_invalidate_tlbs = gfx_v12_0_kiq_invalidate_tlbs,
365         .set_resources_size = 8,
366         .map_queues_size = 7,
367         .unmap_queues_size = 6,
368         .query_status_size = 7,
369         .invalidate_tlbs_size = 2,
370 };
371
372 static void gfx_v12_0_set_kiq_pm4_funcs(struct amdgpu_device *adev)
373 {
374         adev->gfx.kiq[0].pmf = &gfx_v12_0_kiq_pm4_funcs;
375 }
376
377 static void gfx_v12_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel,
378                                    int mem_space, int opt, uint32_t addr0,
379                                    uint32_t addr1, uint32_t ref,
380                                    uint32_t mask, uint32_t inv)
381 {
382         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
383         amdgpu_ring_write(ring,
384                           /* memory (1) or register (0) */
385                           (WAIT_REG_MEM_MEM_SPACE(mem_space) |
386                            WAIT_REG_MEM_OPERATION(opt) | /* wait */
387                            WAIT_REG_MEM_FUNCTION(3) |  /* equal */
388                            WAIT_REG_MEM_ENGINE(eng_sel)));
389
390         if (mem_space)
391                 BUG_ON(addr0 & 0x3); /* Dword align */
392         amdgpu_ring_write(ring, addr0);
393         amdgpu_ring_write(ring, addr1);
394         amdgpu_ring_write(ring, ref);
395         amdgpu_ring_write(ring, mask);
396         amdgpu_ring_write(ring, inv); /* poll interval */
397 }
398
399 static int gfx_v12_0_ring_test_ring(struct amdgpu_ring *ring)
400 {
401         struct amdgpu_device *adev = ring->adev;
402         uint32_t scratch = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG0);
403         uint32_t tmp = 0;
404         unsigned i;
405         int r;
406
407         WREG32(scratch, 0xCAFEDEAD);
408         r = amdgpu_ring_alloc(ring, 5);
409         if (r) {
410                 dev_err(adev->dev,
411                         "amdgpu: cp failed to lock ring %d (%d).\n",
412                         ring->idx, r);
413                 return r;
414         }
415
416         if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ) {
417                 gfx_v12_0_ring_emit_wreg(ring, scratch, 0xDEADBEEF);
418         } else {
419                 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
420                 amdgpu_ring_write(ring, scratch -
421                                   PACKET3_SET_UCONFIG_REG_START);
422                 amdgpu_ring_write(ring, 0xDEADBEEF);
423         }
424         amdgpu_ring_commit(ring);
425
426         for (i = 0; i < adev->usec_timeout; i++) {
427                 tmp = RREG32(scratch);
428                 if (tmp == 0xDEADBEEF)
429                         break;
430                 if (amdgpu_emu_mode == 1)
431                         msleep(1);
432                 else
433                         udelay(1);
434         }
435
436         if (i >= adev->usec_timeout)
437                 r = -ETIMEDOUT;
438         return r;
439 }
440
441 static int gfx_v12_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
442 {
443         struct amdgpu_device *adev = ring->adev;
444         struct amdgpu_ib ib;
445         struct dma_fence *f = NULL;
446         unsigned index;
447         uint64_t gpu_addr;
448         volatile uint32_t *cpu_ptr;
449         long r;
450
451         /* MES KIQ fw hasn't indirect buffer support for now */
452         if (adev->enable_mes_kiq &&
453             ring->funcs->type == AMDGPU_RING_TYPE_KIQ)
454                 return 0;
455
456         memset(&ib, 0, sizeof(ib));
457
458         if (ring->is_mes_queue) {
459                 uint32_t padding, offset;
460
461                 offset = amdgpu_mes_ctx_get_offs(ring, AMDGPU_MES_CTX_IB_OFFS);
462                 padding = amdgpu_mes_ctx_get_offs(ring,
463                                                   AMDGPU_MES_CTX_PADDING_OFFS);
464
465                 ib.gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
466                 ib.ptr = amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset);
467
468                 gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, padding);
469                 cpu_ptr = amdgpu_mes_ctx_get_offs_cpu_addr(ring, padding);
470                 *cpu_ptr = cpu_to_le32(0xCAFEDEAD);
471         } else {
472                 r = amdgpu_device_wb_get(adev, &index);
473                 if (r)
474                         return r;
475
476                 gpu_addr = adev->wb.gpu_addr + (index * 4);
477                 adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
478                 cpu_ptr = &adev->wb.wb[index];
479
480                 r = amdgpu_ib_get(adev, NULL, 16, AMDGPU_IB_POOL_DIRECT, &ib);
481                 if (r) {
482                         dev_err(adev->dev, "amdgpu: failed to get ib (%ld).\n", r);
483                         goto err1;
484                 }
485         }
486
487         ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3);
488         ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
489         ib.ptr[2] = lower_32_bits(gpu_addr);
490         ib.ptr[3] = upper_32_bits(gpu_addr);
491         ib.ptr[4] = 0xDEADBEEF;
492         ib.length_dw = 5;
493
494         r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
495         if (r)
496                 goto err2;
497
498         r = dma_fence_wait_timeout(f, false, timeout);
499         if (r == 0) {
500                 r = -ETIMEDOUT;
501                 goto err2;
502         } else if (r < 0) {
503                 goto err2;
504         }
505
506         if (le32_to_cpu(*cpu_ptr) == 0xDEADBEEF)
507                 r = 0;
508         else
509                 r = -EINVAL;
510 err2:
511         if (!ring->is_mes_queue)
512                 amdgpu_ib_free(adev, &ib, NULL);
513         dma_fence_put(f);
514 err1:
515         if (!ring->is_mes_queue)
516                 amdgpu_device_wb_free(adev, index);
517         return r;
518 }
519
520 static void gfx_v12_0_free_microcode(struct amdgpu_device *adev)
521 {
522         amdgpu_ucode_release(&adev->gfx.pfp_fw);
523         amdgpu_ucode_release(&adev->gfx.me_fw);
524         amdgpu_ucode_release(&adev->gfx.rlc_fw);
525         amdgpu_ucode_release(&adev->gfx.mec_fw);
526
527         kfree(adev->gfx.rlc.register_list_format);
528 }
529
530 static int gfx_v12_0_init_toc_microcode(struct amdgpu_device *adev, const char *ucode_prefix)
531 {
532         const struct psp_firmware_header_v1_0 *toc_hdr;
533         int err = 0;
534
535         err = amdgpu_ucode_request(adev, &adev->psp.toc_fw,
536                                    "amdgpu/%s_toc.bin", ucode_prefix);
537         if (err)
538                 goto out;
539
540         toc_hdr = (const struct psp_firmware_header_v1_0 *)adev->psp.toc_fw->data;
541         adev->psp.toc.fw_version = le32_to_cpu(toc_hdr->header.ucode_version);
542         adev->psp.toc.feature_version = le32_to_cpu(toc_hdr->sos.fw_version);
543         adev->psp.toc.size_bytes = le32_to_cpu(toc_hdr->header.ucode_size_bytes);
544         adev->psp.toc.start_addr = (uint8_t *)toc_hdr +
545                         le32_to_cpu(toc_hdr->header.ucode_array_offset_bytes);
546         return 0;
547 out:
548         amdgpu_ucode_release(&adev->psp.toc_fw);
549         return err;
550 }
551
552 static int gfx_v12_0_init_microcode(struct amdgpu_device *adev)
553 {
554         char ucode_prefix[15];
555         int err;
556         const struct rlc_firmware_header_v2_0 *rlc_hdr;
557         uint16_t version_major;
558         uint16_t version_minor;
559
560         DRM_DEBUG("\n");
561
562         amdgpu_ucode_ip_version_decode(adev, GC_HWIP, ucode_prefix, sizeof(ucode_prefix));
563
564         err = amdgpu_ucode_request(adev, &adev->gfx.pfp_fw,
565                                    "amdgpu/%s_pfp.bin", ucode_prefix);
566         if (err)
567                 goto out;
568         amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_PFP);
569         amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_PFP_P0_STACK);
570
571         err = amdgpu_ucode_request(adev, &adev->gfx.me_fw,
572                                    "amdgpu/%s_me.bin", ucode_prefix);
573         if (err)
574                 goto out;
575         amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_ME);
576         amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_ME_P0_STACK);
577
578         if (!amdgpu_sriov_vf(adev)) {
579                 err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw,
580                                            "amdgpu/%s_rlc.bin", ucode_prefix);
581                 if (err)
582                         goto out;
583                 rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
584                 version_major = le16_to_cpu(rlc_hdr->header.header_version_major);
585                 version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor);
586                 err = amdgpu_gfx_rlc_init_microcode(adev, version_major, version_minor);
587                 if (err)
588                         goto out;
589         }
590
591         err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw,
592                                    "amdgpu/%s_mec.bin", ucode_prefix);
593         if (err)
594                 goto out;
595         amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_MEC);
596         amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_MEC_P0_STACK);
597         amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_MEC_P1_STACK);
598
599         if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO)
600                 err = gfx_v12_0_init_toc_microcode(adev, ucode_prefix);
601
602         /* only one MEC for gfx 12 */
603         adev->gfx.mec2_fw = NULL;
604
605         if (adev->gfx.imu.funcs) {
606                 if (adev->gfx.imu.funcs->init_microcode) {
607                         err = adev->gfx.imu.funcs->init_microcode(adev);
608                         if (err)
609                                 dev_err(adev->dev, "Failed to load imu firmware!\n");
610                 }
611         }
612
613 out:
614         if (err) {
615                 amdgpu_ucode_release(&adev->gfx.pfp_fw);
616                 amdgpu_ucode_release(&adev->gfx.me_fw);
617                 amdgpu_ucode_release(&adev->gfx.rlc_fw);
618                 amdgpu_ucode_release(&adev->gfx.mec_fw);
619         }
620
621         return err;
622 }
623
624 static u32 gfx_v12_0_get_csb_size(struct amdgpu_device *adev)
625 {
626         u32 count = 0;
627         const struct cs_section_def *sect = NULL;
628         const struct cs_extent_def *ext = NULL;
629
630         count += 1;
631
632         for (sect = gfx12_cs_data; sect->section != NULL; ++sect) {
633                 if (sect->id == SECT_CONTEXT) {
634                         for (ext = sect->section; ext->extent != NULL; ++ext)
635                                 count += 2 + ext->reg_count;
636                 } else
637                         return 0;
638         }
639
640         return count;
641 }
642
643 static void gfx_v12_0_get_csb_buffer(struct amdgpu_device *adev,
644                                      volatile u32 *buffer)
645 {
646         u32 count = 0, clustercount = 0, i;
647         const struct cs_section_def *sect = NULL;
648         const struct cs_extent_def *ext = NULL;
649
650         if (adev->gfx.rlc.cs_data == NULL)
651                 return;
652         if (buffer == NULL)
653                 return;
654
655         count += 1;
656
657         for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
658                 if (sect->id == SECT_CONTEXT) {
659                         for (ext = sect->section; ext->extent != NULL; ++ext) {
660                                 clustercount++;
661                                 buffer[count++] = ext->reg_count;
662                                 buffer[count++] = ext->reg_index;
663
664                                 for (i = 0; i < ext->reg_count; i++)
665                                         buffer[count++] = cpu_to_le32(ext->extent[i]);
666                         }
667                 } else
668                         return;
669         }
670
671         buffer[0] = clustercount;
672 }
673
674 static void gfx_v12_0_rlc_fini(struct amdgpu_device *adev)
675 {
676         /* clear state block */
677         amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj,
678                         &adev->gfx.rlc.clear_state_gpu_addr,
679                         (void **)&adev->gfx.rlc.cs_ptr);
680
681         /* jump table block */
682         amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj,
683                         &adev->gfx.rlc.cp_table_gpu_addr,
684                         (void **)&adev->gfx.rlc.cp_table_ptr);
685 }
686
687 static void gfx_v12_0_init_rlcg_reg_access_ctrl(struct amdgpu_device *adev)
688 {
689         struct amdgpu_rlcg_reg_access_ctrl *reg_access_ctrl;
690
691         reg_access_ctrl = &adev->gfx.rlc.reg_access_ctrl[0];
692         reg_access_ctrl->scratch_reg0 = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG0);
693         reg_access_ctrl->scratch_reg1 = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG1);
694         reg_access_ctrl->scratch_reg2 = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG2);
695         reg_access_ctrl->scratch_reg3 = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG3);
696         reg_access_ctrl->grbm_cntl = SOC15_REG_OFFSET(GC, 0, regGRBM_GFX_CNTL);
697         reg_access_ctrl->grbm_idx = SOC15_REG_OFFSET(GC, 0, regGRBM_GFX_INDEX);
698         reg_access_ctrl->spare_int = SOC15_REG_OFFSET(GC, 0, regRLC_SPARE_INT_0);
699         adev->gfx.rlc.rlcg_reg_access_supported = true;
700 }
701
702 static int gfx_v12_0_rlc_init(struct amdgpu_device *adev)
703 {
704         const struct cs_section_def *cs_data;
705         int r;
706
707         adev->gfx.rlc.cs_data = gfx12_cs_data;
708
709         cs_data = adev->gfx.rlc.cs_data;
710
711         if (cs_data) {
712                 /* init clear state block */
713                 r = amdgpu_gfx_rlc_init_csb(adev);
714                 if (r)
715                         return r;
716         }
717
718         /* init spm vmid with 0xf */
719         if (adev->gfx.rlc.funcs->update_spm_vmid)
720                 adev->gfx.rlc.funcs->update_spm_vmid(adev, NULL, 0xf);
721
722         return 0;
723 }
724
725 static void gfx_v12_0_mec_fini(struct amdgpu_device *adev)
726 {
727         amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
728         amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_obj, NULL, NULL);
729         amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_data_obj, NULL, NULL);
730 }
731
732 static void gfx_v12_0_me_init(struct amdgpu_device *adev)
733 {
734         bitmap_zero(adev->gfx.me.queue_bitmap, AMDGPU_MAX_GFX_QUEUES);
735
736         amdgpu_gfx_graphics_queue_acquire(adev);
737 }
738
739 static int gfx_v12_0_mec_init(struct amdgpu_device *adev)
740 {
741         int r;
742         u32 *hpd;
743         size_t mec_hpd_size;
744
745         bitmap_zero(adev->gfx.mec_bitmap[0].queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
746
747         /* take ownership of the relevant compute queues */
748         amdgpu_gfx_compute_queue_acquire(adev);
749         mec_hpd_size = adev->gfx.num_compute_rings * GFX12_MEC_HPD_SIZE;
750
751         if (mec_hpd_size) {
752                 r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
753                                               AMDGPU_GEM_DOMAIN_GTT,
754                                               &adev->gfx.mec.hpd_eop_obj,
755                                               &adev->gfx.mec.hpd_eop_gpu_addr,
756                                               (void **)&hpd);
757                 if (r) {
758                         dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
759                         gfx_v12_0_mec_fini(adev);
760                         return r;
761                 }
762
763                 memset(hpd, 0, mec_hpd_size);
764
765                 amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
766                 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
767         }
768
769         return 0;
770 }
771
772 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t wave, uint32_t address)
773 {
774         WREG32_SOC15(GC, 0, regSQ_IND_INDEX,
775                 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
776                 (address << SQ_IND_INDEX__INDEX__SHIFT));
777         return RREG32_SOC15(GC, 0, regSQ_IND_DATA);
778 }
779
780 static void wave_read_regs(struct amdgpu_device *adev, uint32_t wave,
781                            uint32_t thread, uint32_t regno,
782                            uint32_t num, uint32_t *out)
783 {
784         WREG32_SOC15(GC, 0, regSQ_IND_INDEX,
785                 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
786                 (regno << SQ_IND_INDEX__INDEX__SHIFT) |
787                 (thread << SQ_IND_INDEX__WORKITEM_ID__SHIFT) |
788                 (SQ_IND_INDEX__AUTO_INCR_MASK));
789         while (num--)
790                 *(out++) = RREG32_SOC15(GC, 0, regSQ_IND_DATA);
791 }
792
793 static void gfx_v12_0_read_wave_data(struct amdgpu_device *adev,
794                                      uint32_t xcc_id,
795                                      uint32_t simd, uint32_t wave,
796                                      uint32_t *dst, int *no_fields)
797 {
798         /* in gfx12 the SIMD_ID is specified as part of the INSTANCE
799          * field when performing a select_se_sh so it should be
800          * zero here */
801         WARN_ON(simd != 0);
802
803         /* type 4 wave data */
804         dst[(*no_fields)++] = 4;
805         dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_STATUS);
806         dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_PC_LO);
807         dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_PC_HI);
808         dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_EXEC_LO);
809         dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_EXEC_HI);
810         dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_HW_ID1);
811         dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_HW_ID2);
812         dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_GPR_ALLOC);
813         dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_LDS_ALLOC);
814         dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_IB_STS);
815         dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_IB_STS2);
816         dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_IB_DBG1);
817         dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_M0);
818         dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_MODE);
819         dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_STATE_PRIV);
820         dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_EXCP_FLAG_PRIV);
821         dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_EXCP_FLAG_USER);
822         dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_TRAP_CTRL);
823         dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_ACTIVE);
824         dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_VALID_AND_IDLE);
825         dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_DVGPR_ALLOC_LO);
826         dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_DVGPR_ALLOC_HI);
827         dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_SCHED_MODE);
828 }
829
830 static void gfx_v12_0_read_wave_sgprs(struct amdgpu_device *adev,
831                                       uint32_t xcc_id, uint32_t simd,
832                                       uint32_t wave, uint32_t start,
833                                       uint32_t size, uint32_t *dst)
834 {
835         WARN_ON(simd != 0);
836
837         wave_read_regs(
838                 adev, wave, 0, start + SQIND_WAVE_SGPRS_OFFSET, size,
839                 dst);
840 }
841
842 static void gfx_v12_0_read_wave_vgprs(struct amdgpu_device *adev,
843                                       uint32_t xcc_id, uint32_t simd,
844                                       uint32_t wave, uint32_t thread,
845                                       uint32_t start, uint32_t size,
846                                       uint32_t *dst)
847 {
848         wave_read_regs(
849                 adev, wave, thread,
850                 start + SQIND_WAVE_VGPRS_OFFSET, size, dst);
851 }
852
853 static void gfx_v12_0_select_me_pipe_q(struct amdgpu_device *adev,
854                                        u32 me, u32 pipe, u32 q, u32 vm, u32 xcc_id)
855 {
856         soc24_grbm_select(adev, me, pipe, q, vm);
857 }
858
859 static const struct amdgpu_gfx_funcs gfx_v12_0_gfx_funcs = {
860         .get_gpu_clock_counter = &gfx_v12_0_get_gpu_clock_counter,
861         .select_se_sh = &gfx_v12_0_select_se_sh,
862         .read_wave_data = &gfx_v12_0_read_wave_data,
863         .read_wave_sgprs = &gfx_v12_0_read_wave_sgprs,
864         .read_wave_vgprs = &gfx_v12_0_read_wave_vgprs,
865         .select_me_pipe_q = &gfx_v12_0_select_me_pipe_q,
866         .update_perfmon_mgcg = &gfx_v12_0_update_perf_clk,
867 };
868
869 static int gfx_v12_0_gpu_early_init(struct amdgpu_device *adev)
870 {
871
872         switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
873         case IP_VERSION(12, 0, 0):
874         case IP_VERSION(12, 0, 1):
875                 adev->gfx.config.max_hw_contexts = 8;
876                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
877                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
878                 adev->gfx.config.sc_hiz_tile_fifo_size = 0;
879                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
880                 break;
881         default:
882                 BUG();
883                 break;
884         }
885
886         return 0;
887 }
888
889 static int gfx_v12_0_gfx_ring_init(struct amdgpu_device *adev, int ring_id,
890                                    int me, int pipe, int queue)
891 {
892         int r;
893         struct amdgpu_ring *ring;
894         unsigned int irq_type;
895
896         ring = &adev->gfx.gfx_ring[ring_id];
897
898         ring->me = me;
899         ring->pipe = pipe;
900         ring->queue = queue;
901
902         ring->ring_obj = NULL;
903         ring->use_doorbell = true;
904
905         if (!ring_id)
906                 ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1;
907         else
908                 ring->doorbell_index = adev->doorbell_index.gfx_ring1 << 1;
909         ring->vm_hub = AMDGPU_GFXHUB(0);
910         sprintf(ring->name, "gfx_%d.%d.%d", ring->me, ring->pipe, ring->queue);
911
912         irq_type = AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP + ring->pipe;
913         r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, irq_type,
914                              AMDGPU_RING_PRIO_DEFAULT, NULL);
915         if (r)
916                 return r;
917         return 0;
918 }
919
920 static int gfx_v12_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
921                                        int mec, int pipe, int queue)
922 {
923         int r;
924         unsigned irq_type;
925         struct amdgpu_ring *ring;
926         unsigned int hw_prio;
927
928         ring = &adev->gfx.compute_ring[ring_id];
929
930         /* mec0 is me1 */
931         ring->me = mec + 1;
932         ring->pipe = pipe;
933         ring->queue = queue;
934
935         ring->ring_obj = NULL;
936         ring->use_doorbell = true;
937         ring->doorbell_index = (adev->doorbell_index.mec_ring0 + ring_id) << 1;
938         ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
939                                 + (ring_id * GFX12_MEC_HPD_SIZE);
940         ring->vm_hub = AMDGPU_GFXHUB(0);
941         sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
942
943         irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
944                 + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
945                 + ring->pipe;
946         hw_prio = amdgpu_gfx_is_high_priority_compute_queue(adev, ring) ?
947                         AMDGPU_GFX_PIPE_PRIO_HIGH : AMDGPU_GFX_PIPE_PRIO_NORMAL;
948         /* type-2 packets are deprecated on MEC, use type-3 instead */
949         r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, irq_type,
950                              hw_prio, NULL);
951         if (r)
952                 return r;
953
954         return 0;
955 }
956
957 static struct {
958         SOC24_FIRMWARE_ID       id;
959         unsigned int            offset;
960         unsigned int            size;
961         unsigned int            size_x16;
962 } rlc_autoload_info[SOC24_FIRMWARE_ID_MAX];
963
964 #define RLC_TOC_OFFSET_DWUNIT   8
965 #define RLC_SIZE_MULTIPLE       1024
966 #define RLC_TOC_UMF_SIZE_inM    23ULL
967 #define RLC_TOC_FORMAT_API      165ULL
968
969 static void gfx_v12_0_parse_rlc_toc(struct amdgpu_device *adev, void *rlc_toc)
970 {
971         RLC_TABLE_OF_CONTENT_V2 *ucode = rlc_toc;
972
973         while (ucode && (ucode->id > SOC24_FIRMWARE_ID_INVALID)) {
974                 rlc_autoload_info[ucode->id].id = ucode->id;
975                 rlc_autoload_info[ucode->id].offset =
976                         ucode->offset * RLC_TOC_OFFSET_DWUNIT * 4;
977                 rlc_autoload_info[ucode->id].size =
978                         ucode->size_x16 ? ucode->size * RLC_SIZE_MULTIPLE * 4 :
979                                           ucode->size * 4;
980                 ucode++;
981         }
982 }
983
984 static uint32_t gfx_v12_0_calc_toc_total_size(struct amdgpu_device *adev)
985 {
986         uint32_t total_size = 0;
987         SOC24_FIRMWARE_ID id;
988
989         gfx_v12_0_parse_rlc_toc(adev, adev->psp.toc.start_addr);
990
991         for (id = SOC24_FIRMWARE_ID_RLC_G_UCODE; id < SOC24_FIRMWARE_ID_MAX; id++)
992                 total_size += rlc_autoload_info[id].size;
993
994         /* In case the offset in rlc toc ucode is aligned */
995         if (total_size < rlc_autoload_info[SOC24_FIRMWARE_ID_MAX-1].offset)
996                 total_size = rlc_autoload_info[SOC24_FIRMWARE_ID_MAX-1].offset +
997                         rlc_autoload_info[SOC24_FIRMWARE_ID_MAX-1].size;
998         if (total_size < (RLC_TOC_UMF_SIZE_inM << 20))
999                 total_size = RLC_TOC_UMF_SIZE_inM << 20;
1000
1001         return total_size;
1002 }
1003
1004 static int gfx_v12_0_rlc_autoload_buffer_init(struct amdgpu_device *adev)
1005 {
1006         int r;
1007         uint32_t total_size;
1008
1009         total_size = gfx_v12_0_calc_toc_total_size(adev);
1010
1011         r = amdgpu_bo_create_reserved(adev, total_size, 64 * 1024,
1012                                       AMDGPU_GEM_DOMAIN_VRAM,
1013                                       &adev->gfx.rlc.rlc_autoload_bo,
1014                                       &adev->gfx.rlc.rlc_autoload_gpu_addr,
1015                                       (void **)&adev->gfx.rlc.rlc_autoload_ptr);
1016
1017         if (r) {
1018                 dev_err(adev->dev, "(%d) failed to create fw autoload bo\n", r);
1019                 return r;
1020         }
1021
1022         return 0;
1023 }
1024
1025 static void gfx_v12_0_rlc_backdoor_autoload_copy_ucode(struct amdgpu_device *adev,
1026                                                        SOC24_FIRMWARE_ID id,
1027                                                        const void *fw_data,
1028                                                        uint32_t fw_size)
1029 {
1030         uint32_t toc_offset;
1031         uint32_t toc_fw_size;
1032         char *ptr = adev->gfx.rlc.rlc_autoload_ptr;
1033
1034         if (id <= SOC24_FIRMWARE_ID_INVALID || id >= SOC24_FIRMWARE_ID_MAX)
1035                 return;
1036
1037         toc_offset = rlc_autoload_info[id].offset;
1038         toc_fw_size = rlc_autoload_info[id].size;
1039
1040         if (fw_size == 0)
1041                 fw_size = toc_fw_size;
1042
1043         if (fw_size > toc_fw_size)
1044                 fw_size = toc_fw_size;
1045
1046         memcpy(ptr + toc_offset, fw_data, fw_size);
1047
1048         if (fw_size < toc_fw_size)
1049                 memset(ptr + toc_offset + fw_size, 0, toc_fw_size - fw_size);
1050 }
1051
1052 static void
1053 gfx_v12_0_rlc_backdoor_autoload_copy_toc_ucode(struct amdgpu_device *adev)
1054 {
1055         void *data;
1056         uint32_t size;
1057         uint32_t *toc_ptr;
1058
1059         data = adev->psp.toc.start_addr;
1060         size = rlc_autoload_info[SOC24_FIRMWARE_ID_RLC_TOC].size;
1061
1062         toc_ptr = (uint32_t *)data + size / 4 - 2;
1063         *toc_ptr = (RLC_TOC_FORMAT_API << 24) | 0x1;
1064
1065         gfx_v12_0_rlc_backdoor_autoload_copy_ucode(adev, SOC24_FIRMWARE_ID_RLC_TOC,
1066                                                    data, size);
1067 }
1068
1069 static void
1070 gfx_v12_0_rlc_backdoor_autoload_copy_gfx_ucode(struct amdgpu_device *adev)
1071 {
1072         const __le32 *fw_data;
1073         uint32_t fw_size;
1074         const struct gfx_firmware_header_v2_0 *cpv2_hdr;
1075         const struct rlc_firmware_header_v2_0 *rlc_hdr;
1076         const struct rlc_firmware_header_v2_1 *rlcv21_hdr;
1077         const struct rlc_firmware_header_v2_2 *rlcv22_hdr;
1078         uint16_t version_major, version_minor;
1079
1080         /* pfp ucode */
1081         cpv2_hdr = (const struct gfx_firmware_header_v2_0 *)
1082                 adev->gfx.pfp_fw->data;
1083         /* instruction */
1084         fw_data = (const __le32 *)(adev->gfx.pfp_fw->data +
1085                 le32_to_cpu(cpv2_hdr->ucode_offset_bytes));
1086         fw_size = le32_to_cpu(cpv2_hdr->ucode_size_bytes);
1087         gfx_v12_0_rlc_backdoor_autoload_copy_ucode(adev, SOC24_FIRMWARE_ID_RS64_PFP,
1088                                                    fw_data, fw_size);
1089         /* data */
1090         fw_data = (const __le32 *)(adev->gfx.pfp_fw->data +
1091                 le32_to_cpu(cpv2_hdr->data_offset_bytes));
1092         fw_size = le32_to_cpu(cpv2_hdr->data_size_bytes);
1093         gfx_v12_0_rlc_backdoor_autoload_copy_ucode(adev, SOC24_FIRMWARE_ID_RS64_PFP_P0_STACK,
1094                                                    fw_data, fw_size);
1095         gfx_v12_0_rlc_backdoor_autoload_copy_ucode(adev, SOC24_FIRMWARE_ID_RS64_PFP_P1_STACK,
1096                                                    fw_data, fw_size);
1097         /* me ucode */
1098         cpv2_hdr = (const struct gfx_firmware_header_v2_0 *)
1099                 adev->gfx.me_fw->data;
1100         /* instruction */
1101         fw_data = (const __le32 *)(adev->gfx.me_fw->data +
1102                 le32_to_cpu(cpv2_hdr->ucode_offset_bytes));
1103         fw_size = le32_to_cpu(cpv2_hdr->ucode_size_bytes);
1104         gfx_v12_0_rlc_backdoor_autoload_copy_ucode(adev, SOC24_FIRMWARE_ID_RS64_ME,
1105                                                    fw_data, fw_size);
1106         /* data */
1107         fw_data = (const __le32 *)(adev->gfx.me_fw->data +
1108                 le32_to_cpu(cpv2_hdr->data_offset_bytes));
1109         fw_size = le32_to_cpu(cpv2_hdr->data_size_bytes);
1110         gfx_v12_0_rlc_backdoor_autoload_copy_ucode(adev, SOC24_FIRMWARE_ID_RS64_ME_P0_STACK,
1111                                                    fw_data, fw_size);
1112         gfx_v12_0_rlc_backdoor_autoload_copy_ucode(adev, SOC24_FIRMWARE_ID_RS64_ME_P1_STACK,
1113                                                    fw_data, fw_size);
1114         /* mec ucode */
1115         cpv2_hdr = (const struct gfx_firmware_header_v2_0 *)
1116                 adev->gfx.mec_fw->data;
1117         /* instruction */
1118         fw_data = (const __le32 *) (adev->gfx.mec_fw->data +
1119                 le32_to_cpu(cpv2_hdr->ucode_offset_bytes));
1120         fw_size = le32_to_cpu(cpv2_hdr->ucode_size_bytes);
1121         gfx_v12_0_rlc_backdoor_autoload_copy_ucode(adev, SOC24_FIRMWARE_ID_RS64_MEC,
1122                                                    fw_data, fw_size);
1123         /* data */
1124         fw_data = (const __le32 *) (adev->gfx.mec_fw->data +
1125                 le32_to_cpu(cpv2_hdr->data_offset_bytes));
1126         fw_size = le32_to_cpu(cpv2_hdr->data_size_bytes);
1127         gfx_v12_0_rlc_backdoor_autoload_copy_ucode(adev, SOC24_FIRMWARE_ID_RS64_MEC_P0_STACK,
1128                                                    fw_data, fw_size);
1129         gfx_v12_0_rlc_backdoor_autoload_copy_ucode(adev, SOC24_FIRMWARE_ID_RS64_MEC_P1_STACK,
1130                                                    fw_data, fw_size);
1131         gfx_v12_0_rlc_backdoor_autoload_copy_ucode(adev, SOC24_FIRMWARE_ID_RS64_MEC_P2_STACK,
1132                                                    fw_data, fw_size);
1133         gfx_v12_0_rlc_backdoor_autoload_copy_ucode(adev, SOC24_FIRMWARE_ID_RS64_MEC_P3_STACK,
1134                                                    fw_data, fw_size);
1135
1136         /* rlc ucode */
1137         rlc_hdr = (const struct rlc_firmware_header_v2_0 *)
1138                 adev->gfx.rlc_fw->data;
1139         fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
1140                         le32_to_cpu(rlc_hdr->header.ucode_array_offset_bytes));
1141         fw_size = le32_to_cpu(rlc_hdr->header.ucode_size_bytes);
1142         gfx_v12_0_rlc_backdoor_autoload_copy_ucode(adev, SOC24_FIRMWARE_ID_RLC_G_UCODE,
1143                                                    fw_data, fw_size);
1144
1145         version_major = le16_to_cpu(rlc_hdr->header.header_version_major);
1146         version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor);
1147         if (version_major == 2) {
1148                 if (version_minor >= 1) {
1149                         rlcv21_hdr = (const struct rlc_firmware_header_v2_1 *)adev->gfx.rlc_fw->data;
1150
1151                         fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
1152                                         le32_to_cpu(rlcv21_hdr->save_restore_list_gpm_offset_bytes));
1153                         fw_size = le32_to_cpu(rlcv21_hdr->save_restore_list_gpm_size_bytes);
1154                         gfx_v12_0_rlc_backdoor_autoload_copy_ucode(adev, SOC24_FIRMWARE_ID_RLCG_SCRATCH,
1155                                                    fw_data, fw_size);
1156
1157                         fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
1158                                         le32_to_cpu(rlcv21_hdr->save_restore_list_srm_offset_bytes));
1159                         fw_size = le32_to_cpu(rlcv21_hdr->save_restore_list_srm_size_bytes);
1160                         gfx_v12_0_rlc_backdoor_autoload_copy_ucode(adev, SOC24_FIRMWARE_ID_RLC_SRM_ARAM,
1161                                                    fw_data, fw_size);
1162                 }
1163                 if (version_minor >= 2) {
1164                         rlcv22_hdr = (const struct rlc_firmware_header_v2_2 *)adev->gfx.rlc_fw->data;
1165
1166                         fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
1167                                         le32_to_cpu(rlcv22_hdr->rlc_iram_ucode_offset_bytes));
1168                         fw_size = le32_to_cpu(rlcv22_hdr->rlc_iram_ucode_size_bytes);
1169                         gfx_v12_0_rlc_backdoor_autoload_copy_ucode(adev, SOC24_FIRMWARE_ID_RLX6_UCODE,
1170                                                    fw_data, fw_size);
1171
1172                         fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
1173                                         le32_to_cpu(rlcv22_hdr->rlc_dram_ucode_offset_bytes));
1174                         fw_size = le32_to_cpu(rlcv22_hdr->rlc_dram_ucode_size_bytes);
1175                         gfx_v12_0_rlc_backdoor_autoload_copy_ucode(adev, SOC24_FIRMWARE_ID_RLX6_DRAM_BOOT,
1176                                                    fw_data, fw_size);
1177                 }
1178         }
1179 }
1180
1181 static void
1182 gfx_v12_0_rlc_backdoor_autoload_copy_sdma_ucode(struct amdgpu_device *adev)
1183 {
1184         const __le32 *fw_data;
1185         uint32_t fw_size;
1186         const struct sdma_firmware_header_v3_0 *sdma_hdr;
1187
1188         sdma_hdr = (const struct sdma_firmware_header_v3_0 *)
1189                 adev->sdma.instance[0].fw->data;
1190         fw_data = (const __le32 *) (adev->sdma.instance[0].fw->data +
1191                         le32_to_cpu(sdma_hdr->ucode_offset_bytes));
1192         fw_size = le32_to_cpu(sdma_hdr->ucode_size_bytes);
1193
1194         gfx_v12_0_rlc_backdoor_autoload_copy_ucode(adev, SOC24_FIRMWARE_ID_SDMA_UCODE_TH0,
1195                                                    fw_data, fw_size);
1196 }
1197
1198 static void
1199 gfx_v12_0_rlc_backdoor_autoload_copy_mes_ucode(struct amdgpu_device *adev)
1200 {
1201         const __le32 *fw_data;
1202         unsigned fw_size;
1203         const struct mes_firmware_header_v1_0 *mes_hdr;
1204         int pipe, ucode_id, data_id;
1205
1206         for (pipe = 0; pipe < 2; pipe++) {
1207                 if (pipe == 0) {
1208                         ucode_id = SOC24_FIRMWARE_ID_RS64_MES_P0;
1209                         data_id  = SOC24_FIRMWARE_ID_RS64_MES_P0_STACK;
1210                 } else {
1211                         ucode_id = SOC24_FIRMWARE_ID_RS64_MES_P1;
1212                         data_id  = SOC24_FIRMWARE_ID_RS64_MES_P1_STACK;
1213                 }
1214
1215                 mes_hdr = (const struct mes_firmware_header_v1_0 *)
1216                         adev->mes.fw[pipe]->data;
1217
1218                 fw_data = (const __le32 *)(adev->mes.fw[pipe]->data +
1219                                 le32_to_cpu(mes_hdr->mes_ucode_offset_bytes));
1220                 fw_size = le32_to_cpu(mes_hdr->mes_ucode_size_bytes);
1221
1222                 gfx_v12_0_rlc_backdoor_autoload_copy_ucode(adev, ucode_id, fw_data, fw_size);
1223
1224                 fw_data = (const __le32 *)(adev->mes.fw[pipe]->data +
1225                                 le32_to_cpu(mes_hdr->mes_ucode_data_offset_bytes));
1226                 fw_size = le32_to_cpu(mes_hdr->mes_ucode_data_size_bytes);
1227
1228                 gfx_v12_0_rlc_backdoor_autoload_copy_ucode(adev, data_id, fw_data, fw_size);
1229         }
1230 }
1231
1232 static int gfx_v12_0_rlc_backdoor_autoload_enable(struct amdgpu_device *adev)
1233 {
1234         uint32_t rlc_g_offset, rlc_g_size;
1235         uint64_t gpu_addr;
1236         uint32_t data;
1237
1238         /* RLC autoload sequence 2: copy ucode */
1239         gfx_v12_0_rlc_backdoor_autoload_copy_sdma_ucode(adev);
1240         gfx_v12_0_rlc_backdoor_autoload_copy_gfx_ucode(adev);
1241         gfx_v12_0_rlc_backdoor_autoload_copy_mes_ucode(adev);
1242         gfx_v12_0_rlc_backdoor_autoload_copy_toc_ucode(adev);
1243
1244         rlc_g_offset = rlc_autoload_info[SOC24_FIRMWARE_ID_RLC_G_UCODE].offset;
1245         rlc_g_size = rlc_autoload_info[SOC24_FIRMWARE_ID_RLC_G_UCODE].size;
1246         gpu_addr = adev->gfx.rlc.rlc_autoload_gpu_addr + rlc_g_offset - adev->gmc.vram_start;
1247
1248         WREG32_SOC15(GC, 0, regGFX_IMU_RLC_BOOTLOADER_ADDR_HI, upper_32_bits(gpu_addr));
1249         WREG32_SOC15(GC, 0, regGFX_IMU_RLC_BOOTLOADER_ADDR_LO, lower_32_bits(gpu_addr));
1250
1251         WREG32_SOC15(GC, 0, regGFX_IMU_RLC_BOOTLOADER_SIZE, rlc_g_size);
1252
1253         if (adev->gfx.imu.funcs && (amdgpu_dpm > 0)) {
1254                 /* RLC autoload sequence 3: load IMU fw */
1255                 if (adev->gfx.imu.funcs->load_microcode)
1256                         adev->gfx.imu.funcs->load_microcode(adev);
1257                 /* RLC autoload sequence 4 init IMU fw */
1258                 if (adev->gfx.imu.funcs->setup_imu)
1259                         adev->gfx.imu.funcs->setup_imu(adev);
1260                 if (adev->gfx.imu.funcs->start_imu)
1261                         adev->gfx.imu.funcs->start_imu(adev);
1262
1263                 /* RLC autoload sequence 5 disable gpa mode */
1264                 gfx_v12_0_disable_gpa_mode(adev);
1265         } else {
1266                 /* unhalt rlc to start autoload without imu */
1267                 data = RREG32_SOC15(GC, 0, regRLC_GPM_THREAD_ENABLE);
1268                 data = REG_SET_FIELD(data, RLC_GPM_THREAD_ENABLE, THREAD0_ENABLE, 1);
1269                 data = REG_SET_FIELD(data, RLC_GPM_THREAD_ENABLE, THREAD1_ENABLE, 1);
1270                 WREG32_SOC15(GC, 0, regRLC_GPM_THREAD_ENABLE, data);
1271                 WREG32_SOC15(GC, 0, regRLC_CNTL, RLC_CNTL__RLC_ENABLE_F32_MASK);
1272         }
1273
1274         return 0;
1275 }
1276
1277 static void gfx_v12_0_alloc_ip_dump(struct amdgpu_device *adev)
1278 {
1279         uint32_t reg_count = ARRAY_SIZE(gc_reg_list_12_0);
1280         uint32_t *ptr;
1281         uint32_t inst;
1282
1283         ptr = kcalloc(reg_count, sizeof(uint32_t), GFP_KERNEL);
1284         if (ptr == NULL) {
1285                 DRM_ERROR("Failed to allocate memory for GFX IP Dump\n");
1286                 adev->gfx.ip_dump_core = NULL;
1287         } else {
1288                 adev->gfx.ip_dump_core = ptr;
1289         }
1290
1291         /* Allocate memory for compute queue registers for all the instances */
1292         reg_count = ARRAY_SIZE(gc_cp_reg_list_12);
1293         inst = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe_per_mec *
1294                 adev->gfx.mec.num_queue_per_pipe;
1295
1296         ptr = kcalloc(reg_count * inst, sizeof(uint32_t), GFP_KERNEL);
1297         if (ptr == NULL) {
1298                 DRM_ERROR("Failed to allocate memory for Compute Queues IP Dump\n");
1299                 adev->gfx.ip_dump_compute_queues = NULL;
1300         } else {
1301                 adev->gfx.ip_dump_compute_queues = ptr;
1302         }
1303
1304         /* Allocate memory for gfx queue registers for all the instances */
1305         reg_count = ARRAY_SIZE(gc_gfx_queue_reg_list_12);
1306         inst = adev->gfx.me.num_me * adev->gfx.me.num_pipe_per_me *
1307                 adev->gfx.me.num_queue_per_pipe;
1308
1309         ptr = kcalloc(reg_count * inst, sizeof(uint32_t), GFP_KERNEL);
1310         if (ptr == NULL) {
1311                 DRM_ERROR("Failed to allocate memory for GFX Queues IP Dump\n");
1312                 adev->gfx.ip_dump_gfx_queues = NULL;
1313         } else {
1314                 adev->gfx.ip_dump_gfx_queues = ptr;
1315         }
1316 }
1317
1318 static int gfx_v12_0_sw_init(void *handle)
1319 {
1320         int i, j, k, r, ring_id = 0;
1321         unsigned num_compute_rings;
1322         int xcc_id = 0;
1323         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1324
1325         switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
1326         case IP_VERSION(12, 0, 0):
1327         case IP_VERSION(12, 0, 1):
1328                 adev->gfx.me.num_me = 1;
1329                 adev->gfx.me.num_pipe_per_me = 1;
1330                 adev->gfx.me.num_queue_per_pipe = 1;
1331                 adev->gfx.mec.num_mec = 2;
1332                 adev->gfx.mec.num_pipe_per_mec = 2;
1333                 adev->gfx.mec.num_queue_per_pipe = 4;
1334                 break;
1335         default:
1336                 adev->gfx.me.num_me = 1;
1337                 adev->gfx.me.num_pipe_per_me = 1;
1338                 adev->gfx.me.num_queue_per_pipe = 1;
1339                 adev->gfx.mec.num_mec = 1;
1340                 adev->gfx.mec.num_pipe_per_mec = 4;
1341                 adev->gfx.mec.num_queue_per_pipe = 8;
1342                 break;
1343         }
1344
1345         /* recalculate compute rings to use based on hardware configuration */
1346         num_compute_rings = (adev->gfx.mec.num_pipe_per_mec *
1347                              adev->gfx.mec.num_queue_per_pipe) / 2;
1348         adev->gfx.num_compute_rings = min(adev->gfx.num_compute_rings,
1349                                           num_compute_rings);
1350
1351         /* EOP Event */
1352         r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GRBM_CP,
1353                               GFX_11_0_0__SRCID__CP_EOP_INTERRUPT,
1354                               &adev->gfx.eop_irq);
1355         if (r)
1356                 return r;
1357
1358         /* Privileged reg */
1359         r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GRBM_CP,
1360                               GFX_11_0_0__SRCID__CP_PRIV_REG_FAULT,
1361                               &adev->gfx.priv_reg_irq);
1362         if (r)
1363                 return r;
1364
1365         /* Privileged inst */
1366         r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GRBM_CP,
1367                               GFX_11_0_0__SRCID__CP_PRIV_INSTR_FAULT,
1368                               &adev->gfx.priv_inst_irq);
1369         if (r)
1370                 return r;
1371
1372         adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
1373
1374         gfx_v12_0_me_init(adev);
1375
1376         r = gfx_v12_0_rlc_init(adev);
1377         if (r) {
1378                 dev_err(adev->dev, "Failed to init rlc BOs!\n");
1379                 return r;
1380         }
1381
1382         r = gfx_v12_0_mec_init(adev);
1383         if (r) {
1384                 dev_err(adev->dev, "Failed to init MEC BOs!\n");
1385                 return r;
1386         }
1387
1388         /* set up the gfx ring */
1389         for (i = 0; i < adev->gfx.me.num_me; i++) {
1390                 for (j = 0; j < adev->gfx.me.num_queue_per_pipe; j++) {
1391                         for (k = 0; k < adev->gfx.me.num_pipe_per_me; k++) {
1392                                 if (!amdgpu_gfx_is_me_queue_enabled(adev, i, k, j))
1393                                         continue;
1394
1395                                 r = gfx_v12_0_gfx_ring_init(adev, ring_id,
1396                                                             i, k, j);
1397                                 if (r)
1398                                         return r;
1399                                 ring_id++;
1400                         }
1401                 }
1402         }
1403
1404         ring_id = 0;
1405         /* set up the compute queues - allocate horizontally across pipes */
1406         for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
1407                 for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
1408                         for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
1409                                 if (!amdgpu_gfx_is_mec_queue_enabled(adev,
1410                                                                 0, i, k, j))
1411                                         continue;
1412
1413                                 r = gfx_v12_0_compute_ring_init(adev, ring_id,
1414                                                                 i, k, j);
1415                                 if (r)
1416                                         return r;
1417
1418                                 ring_id++;
1419                         }
1420                 }
1421         }
1422
1423         if (!adev->enable_mes_kiq) {
1424                 r = amdgpu_gfx_kiq_init(adev, GFX12_MEC_HPD_SIZE, 0);
1425                 if (r) {
1426                         dev_err(adev->dev, "Failed to init KIQ BOs!\n");
1427                         return r;
1428                 }
1429
1430                 r = amdgpu_gfx_kiq_init_ring(adev, xcc_id);
1431                 if (r)
1432                         return r;
1433         }
1434
1435         r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct v12_compute_mqd), 0);
1436         if (r)
1437                 return r;
1438
1439         /* allocate visible FB for rlc auto-loading fw */
1440         if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) {
1441                 r = gfx_v12_0_rlc_autoload_buffer_init(adev);
1442                 if (r)
1443                         return r;
1444         }
1445
1446         r = gfx_v12_0_gpu_early_init(adev);
1447         if (r)
1448                 return r;
1449
1450         gfx_v12_0_alloc_ip_dump(adev);
1451
1452         return 0;
1453 }
1454
1455 static void gfx_v12_0_pfp_fini(struct amdgpu_device *adev)
1456 {
1457         amdgpu_bo_free_kernel(&adev->gfx.pfp.pfp_fw_obj,
1458                               &adev->gfx.pfp.pfp_fw_gpu_addr,
1459                               (void **)&adev->gfx.pfp.pfp_fw_ptr);
1460
1461         amdgpu_bo_free_kernel(&adev->gfx.pfp.pfp_fw_data_obj,
1462                               &adev->gfx.pfp.pfp_fw_data_gpu_addr,
1463                               (void **)&adev->gfx.pfp.pfp_fw_data_ptr);
1464 }
1465
1466 static void gfx_v12_0_me_fini(struct amdgpu_device *adev)
1467 {
1468         amdgpu_bo_free_kernel(&adev->gfx.me.me_fw_obj,
1469                               &adev->gfx.me.me_fw_gpu_addr,
1470                               (void **)&adev->gfx.me.me_fw_ptr);
1471
1472         amdgpu_bo_free_kernel(&adev->gfx.me.me_fw_data_obj,
1473                                &adev->gfx.me.me_fw_data_gpu_addr,
1474                                (void **)&adev->gfx.me.me_fw_data_ptr);
1475 }
1476
1477 static void gfx_v12_0_rlc_autoload_buffer_fini(struct amdgpu_device *adev)
1478 {
1479         amdgpu_bo_free_kernel(&adev->gfx.rlc.rlc_autoload_bo,
1480                         &adev->gfx.rlc.rlc_autoload_gpu_addr,
1481                         (void **)&adev->gfx.rlc.rlc_autoload_ptr);
1482 }
1483
1484 static int gfx_v12_0_sw_fini(void *handle)
1485 {
1486         int i;
1487         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1488
1489         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
1490                 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
1491         for (i = 0; i < adev->gfx.num_compute_rings; i++)
1492                 amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
1493
1494         amdgpu_gfx_mqd_sw_fini(adev, 0);
1495
1496         if (!adev->enable_mes_kiq) {
1497                 amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq[0].ring);
1498                 amdgpu_gfx_kiq_fini(adev, 0);
1499         }
1500
1501         gfx_v12_0_pfp_fini(adev);
1502         gfx_v12_0_me_fini(adev);
1503         gfx_v12_0_rlc_fini(adev);
1504         gfx_v12_0_mec_fini(adev);
1505
1506         if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO)
1507                 gfx_v12_0_rlc_autoload_buffer_fini(adev);
1508
1509         gfx_v12_0_free_microcode(adev);
1510
1511         kfree(adev->gfx.ip_dump_core);
1512         kfree(adev->gfx.ip_dump_compute_queues);
1513         kfree(adev->gfx.ip_dump_gfx_queues);
1514
1515         return 0;
1516 }
1517
1518 static void gfx_v12_0_select_se_sh(struct amdgpu_device *adev, u32 se_num,
1519                                    u32 sh_num, u32 instance, int xcc_id)
1520 {
1521         u32 data;
1522
1523         if (instance == 0xffffffff)
1524                 data = REG_SET_FIELD(0, GRBM_GFX_INDEX,
1525                                      INSTANCE_BROADCAST_WRITES, 1);
1526         else
1527                 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX,
1528                                      instance);
1529
1530         if (se_num == 0xffffffff)
1531                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES,
1532                                      1);
1533         else
1534                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
1535
1536         if (sh_num == 0xffffffff)
1537                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SA_BROADCAST_WRITES,
1538                                      1);
1539         else
1540                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SA_INDEX, sh_num);
1541
1542         WREG32_SOC15(GC, 0, regGRBM_GFX_INDEX, data);
1543 }
1544
1545 static u32 gfx_v12_0_get_sa_active_bitmap(struct amdgpu_device *adev)
1546 {
1547         u32 gc_disabled_sa_mask, gc_user_disabled_sa_mask, sa_mask;
1548
1549         gc_disabled_sa_mask = RREG32_SOC15(GC, 0, regGRBM_CC_GC_SA_UNIT_DISABLE);
1550         gc_disabled_sa_mask = REG_GET_FIELD(gc_disabled_sa_mask,
1551                                             GRBM_CC_GC_SA_UNIT_DISABLE,
1552                                             SA_DISABLE);
1553         gc_user_disabled_sa_mask = RREG32_SOC15(GC, 0, regGRBM_GC_USER_SA_UNIT_DISABLE);
1554         gc_user_disabled_sa_mask = REG_GET_FIELD(gc_user_disabled_sa_mask,
1555                                                  GRBM_GC_USER_SA_UNIT_DISABLE,
1556                                                  SA_DISABLE);
1557         sa_mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_sh_per_se *
1558                                             adev->gfx.config.max_shader_engines);
1559
1560         return sa_mask & (~(gc_disabled_sa_mask | gc_user_disabled_sa_mask));
1561 }
1562
1563 static u32 gfx_v12_0_get_rb_active_bitmap(struct amdgpu_device *adev)
1564 {
1565         u32 gc_disabled_rb_mask, gc_user_disabled_rb_mask;
1566         u32 rb_mask;
1567
1568         gc_disabled_rb_mask = RREG32_SOC15(GC, 0, regCC_RB_BACKEND_DISABLE);
1569         gc_disabled_rb_mask = REG_GET_FIELD(gc_disabled_rb_mask,
1570                                             CC_RB_BACKEND_DISABLE,
1571                                             BACKEND_DISABLE);
1572         gc_user_disabled_rb_mask = RREG32_SOC15(GC, 0, regGC_USER_RB_BACKEND_DISABLE);
1573         gc_user_disabled_rb_mask = REG_GET_FIELD(gc_user_disabled_rb_mask,
1574                                                  GC_USER_RB_BACKEND_DISABLE,
1575                                                  BACKEND_DISABLE);
1576         rb_mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se *
1577                                             adev->gfx.config.max_shader_engines);
1578
1579         return rb_mask & (~(gc_disabled_rb_mask | gc_user_disabled_rb_mask));
1580 }
1581
1582 static void gfx_v12_0_setup_rb(struct amdgpu_device *adev)
1583 {
1584         u32 rb_bitmap_width_per_sa;
1585         u32 max_sa;
1586         u32 active_sa_bitmap;
1587         u32 global_active_rb_bitmap;
1588         u32 active_rb_bitmap = 0;
1589         u32 i;
1590
1591         /* query sa bitmap from SA_UNIT_DISABLE registers */
1592         active_sa_bitmap = gfx_v12_0_get_sa_active_bitmap(adev);
1593         /* query rb bitmap from RB_BACKEND_DISABLE registers */
1594         global_active_rb_bitmap = gfx_v12_0_get_rb_active_bitmap(adev);
1595
1596         /* generate active rb bitmap according to active sa bitmap */
1597         max_sa = adev->gfx.config.max_shader_engines *
1598                  adev->gfx.config.max_sh_per_se;
1599         rb_bitmap_width_per_sa = adev->gfx.config.max_backends_per_se /
1600                                  adev->gfx.config.max_sh_per_se;
1601         for (i = 0; i < max_sa; i++) {
1602                 if (active_sa_bitmap & (1 << i))
1603                         active_rb_bitmap |= (0x3 << (i * rb_bitmap_width_per_sa));
1604         }
1605
1606         active_rb_bitmap |= global_active_rb_bitmap;
1607         adev->gfx.config.backend_enable_mask = active_rb_bitmap;
1608         adev->gfx.config.num_rbs = hweight32(active_rb_bitmap);
1609 }
1610
1611 #define LDS_APP_BASE           0x1
1612 #define SCRATCH_APP_BASE       0x2
1613
1614 static void gfx_v12_0_init_compute_vmid(struct amdgpu_device *adev)
1615 {
1616         int i;
1617         uint32_t sh_mem_bases;
1618         uint32_t data;
1619
1620         /*
1621          * Configure apertures:
1622          * LDS:         0x60000000'00000000 - 0x60000001'00000000 (4GB)
1623          * Scratch:     0x60000001'00000000 - 0x60000002'00000000 (4GB)
1624          * GPUVM:       0x60010000'00000000 - 0x60020000'00000000 (1TB)
1625          */
1626         sh_mem_bases = (LDS_APP_BASE << SH_MEM_BASES__SHARED_BASE__SHIFT) |
1627                         SCRATCH_APP_BASE;
1628
1629         mutex_lock(&adev->srbm_mutex);
1630         for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
1631                 soc24_grbm_select(adev, 0, 0, 0, i);
1632                 /* CP and shaders */
1633                 WREG32_SOC15(GC, 0, regSH_MEM_CONFIG, DEFAULT_SH_MEM_CONFIG);
1634                 WREG32_SOC15(GC, 0, regSH_MEM_BASES, sh_mem_bases);
1635
1636                 /* Enable trap for each kfd vmid. */
1637                 data = RREG32_SOC15(GC, 0, regSPI_GDBG_PER_VMID_CNTL);
1638                 data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, TRAP_EN, 1);
1639                 WREG32_SOC15(GC, 0, regSPI_GDBG_PER_VMID_CNTL, data);
1640         }
1641         soc24_grbm_select(adev, 0, 0, 0, 0);
1642         mutex_unlock(&adev->srbm_mutex);
1643 }
1644
1645 static void gfx_v12_0_tcp_harvest(struct amdgpu_device *adev)
1646 {
1647         /* TODO: harvest feature to be added later. */
1648 }
1649
1650 static void gfx_v12_0_get_tcc_info(struct amdgpu_device *adev)
1651 {
1652 }
1653
1654 static void gfx_v12_0_constants_init(struct amdgpu_device *adev)
1655 {
1656         u32 tmp;
1657         int i;
1658
1659         if (!amdgpu_sriov_vf(adev))
1660                 WREG32_FIELD15_PREREG(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff);
1661
1662         gfx_v12_0_setup_rb(adev);
1663         gfx_v12_0_get_cu_info(adev, &adev->gfx.cu_info);
1664         gfx_v12_0_get_tcc_info(adev);
1665         adev->gfx.config.pa_sc_tile_steering_override = 0;
1666
1667         /* XXX SH_MEM regs */
1668         /* where to put LDS, scratch, GPUVM in FSA64 space */
1669         mutex_lock(&adev->srbm_mutex);
1670         for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB(0)].num_ids; i++) {
1671                 soc24_grbm_select(adev, 0, 0, 0, i);
1672                 /* CP and shaders */
1673                 WREG32_SOC15(GC, 0, regSH_MEM_CONFIG, DEFAULT_SH_MEM_CONFIG);
1674                 if (i != 0) {
1675                         tmp = REG_SET_FIELD(0, SH_MEM_BASES, PRIVATE_BASE,
1676                                 (adev->gmc.private_aperture_start >> 48));
1677                         tmp = REG_SET_FIELD(tmp, SH_MEM_BASES, SHARED_BASE,
1678                                 (adev->gmc.shared_aperture_start >> 48));
1679                         WREG32_SOC15(GC, 0, regSH_MEM_BASES, tmp);
1680                 }
1681         }
1682         soc24_grbm_select(adev, 0, 0, 0, 0);
1683
1684         mutex_unlock(&adev->srbm_mutex);
1685
1686         gfx_v12_0_init_compute_vmid(adev);
1687 }
1688
1689 static void gfx_v12_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
1690                                                 bool enable)
1691 {
1692         u32 tmp;
1693
1694         if (amdgpu_sriov_vf(adev))
1695                 return;
1696
1697         tmp = RREG32_SOC15(GC, 0, regCP_INT_CNTL_RING0);
1698
1699         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE,
1700                             enable ? 1 : 0);
1701         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE,
1702                             enable ? 1 : 0);
1703         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE,
1704                             enable ? 1 : 0);
1705         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE,
1706                             enable ? 1 : 0);
1707
1708         WREG32_SOC15(GC, 0, regCP_INT_CNTL_RING0, tmp);
1709 }
1710
1711 static int gfx_v12_0_init_csb(struct amdgpu_device *adev)
1712 {
1713         adev->gfx.rlc.funcs->get_csb_buffer(adev, adev->gfx.rlc.cs_ptr);
1714
1715         WREG32_SOC15(GC, 0, regRLC_CSIB_ADDR_HI,
1716                         adev->gfx.rlc.clear_state_gpu_addr >> 32);
1717         WREG32_SOC15(GC, 0, regRLC_CSIB_ADDR_LO,
1718                         adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
1719         WREG32_SOC15(GC, 0, regRLC_CSIB_LENGTH, adev->gfx.rlc.clear_state_size);
1720
1721         return 0;
1722 }
1723
1724 static void gfx_v12_0_rlc_stop(struct amdgpu_device *adev)
1725 {
1726         u32 tmp = RREG32_SOC15(GC, 0, regRLC_CNTL);
1727
1728         tmp = REG_SET_FIELD(tmp, RLC_CNTL, RLC_ENABLE_F32, 0);
1729         WREG32_SOC15(GC, 0, regRLC_CNTL, tmp);
1730 }
1731
1732 static void gfx_v12_0_rlc_reset(struct amdgpu_device *adev)
1733 {
1734         WREG32_FIELD15_PREREG(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
1735         udelay(50);
1736         WREG32_FIELD15_PREREG(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
1737         udelay(50);
1738 }
1739
1740 static void gfx_v12_0_rlc_smu_handshake_cntl(struct amdgpu_device *adev,
1741                                              bool enable)
1742 {
1743         uint32_t rlc_pg_cntl;
1744
1745         rlc_pg_cntl = RREG32_SOC15(GC, 0, regRLC_PG_CNTL);
1746
1747         if (!enable) {
1748                 /* RLC_PG_CNTL[23] = 0 (default)
1749                  * RLC will wait for handshake acks with SMU
1750                  * GFXOFF will be enabled
1751                  * RLC_PG_CNTL[23] = 1
1752                  * RLC will not issue any message to SMU
1753                  * hence no handshake between SMU & RLC
1754                  * GFXOFF will be disabled
1755                  */
1756                 rlc_pg_cntl |= RLC_PG_CNTL__SMU_HANDSHAKE_DISABLE_MASK;
1757         } else
1758                 rlc_pg_cntl &= ~RLC_PG_CNTL__SMU_HANDSHAKE_DISABLE_MASK;
1759         WREG32_SOC15(GC, 0, regRLC_PG_CNTL, rlc_pg_cntl);
1760 }
1761
1762 static void gfx_v12_0_rlc_start(struct amdgpu_device *adev)
1763 {
1764         /* TODO: enable rlc & smu handshake until smu
1765          * and gfxoff feature works as expected */
1766         if (!(amdgpu_pp_feature_mask & PP_GFXOFF_MASK))
1767                 gfx_v12_0_rlc_smu_handshake_cntl(adev, false);
1768
1769         WREG32_FIELD15_PREREG(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 1);
1770         udelay(50);
1771 }
1772
1773 static void gfx_v12_0_rlc_enable_srm(struct amdgpu_device *adev)
1774 {
1775         uint32_t tmp;
1776
1777         /* enable Save Restore Machine */
1778         tmp = RREG32(SOC15_REG_OFFSET(GC, 0, regRLC_SRM_CNTL));
1779         tmp |= RLC_SRM_CNTL__AUTO_INCR_ADDR_MASK;
1780         tmp |= RLC_SRM_CNTL__SRM_ENABLE_MASK;
1781         WREG32(SOC15_REG_OFFSET(GC, 0, regRLC_SRM_CNTL), tmp);
1782 }
1783
1784 static void gfx_v12_0_load_rlcg_microcode(struct amdgpu_device *adev)
1785 {
1786         const struct rlc_firmware_header_v2_0 *hdr;
1787         const __le32 *fw_data;
1788         unsigned i, fw_size;
1789
1790         hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
1791         fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
1792                            le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1793         fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
1794
1795         WREG32_SOC15(GC, 0, regRLC_GPM_UCODE_ADDR,
1796                      RLCG_UCODE_LOADING_START_ADDRESS);
1797
1798         for (i = 0; i < fw_size; i++)
1799                 WREG32_SOC15(GC, 0, regRLC_GPM_UCODE_DATA,
1800                              le32_to_cpup(fw_data++));
1801
1802         WREG32_SOC15(GC, 0, regRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
1803 }
1804
1805 static void gfx_v12_0_load_rlc_iram_dram_microcode(struct amdgpu_device *adev)
1806 {
1807         const struct rlc_firmware_header_v2_2 *hdr;
1808         const __le32 *fw_data;
1809         unsigned i, fw_size;
1810         u32 tmp;
1811
1812         hdr = (const struct rlc_firmware_header_v2_2 *)adev->gfx.rlc_fw->data;
1813
1814         fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
1815                         le32_to_cpu(hdr->rlc_iram_ucode_offset_bytes));
1816         fw_size = le32_to_cpu(hdr->rlc_iram_ucode_size_bytes) / 4;
1817
1818         WREG32_SOC15(GC, 0, regRLC_LX6_IRAM_ADDR, 0);
1819
1820         for (i = 0; i < fw_size; i++) {
1821                 if ((amdgpu_emu_mode == 1) && (i % 100 == 99))
1822                         msleep(1);
1823                 WREG32_SOC15(GC, 0, regRLC_LX6_IRAM_DATA,
1824                                 le32_to_cpup(fw_data++));
1825         }
1826
1827         WREG32_SOC15(GC, 0, regRLC_LX6_IRAM_ADDR, adev->gfx.rlc_fw_version);
1828
1829         fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
1830                         le32_to_cpu(hdr->rlc_dram_ucode_offset_bytes));
1831         fw_size = le32_to_cpu(hdr->rlc_dram_ucode_size_bytes) / 4;
1832
1833         WREG32_SOC15(GC, 0, regRLC_LX6_DRAM_ADDR, 0);
1834         for (i = 0; i < fw_size; i++) {
1835                 if ((amdgpu_emu_mode == 1) && (i % 100 == 99))
1836                         msleep(1);
1837                 WREG32_SOC15(GC, 0, regRLC_LX6_DRAM_DATA,
1838                                 le32_to_cpup(fw_data++));
1839         }
1840
1841         WREG32_SOC15(GC, 0, regRLC_LX6_IRAM_ADDR, adev->gfx.rlc_fw_version);
1842
1843         tmp = RREG32_SOC15(GC, 0, regRLC_LX6_CNTL);
1844         tmp = REG_SET_FIELD(tmp, RLC_LX6_CNTL, PDEBUG_ENABLE, 1);
1845         tmp = REG_SET_FIELD(tmp, RLC_LX6_CNTL, BRESET, 0);
1846         WREG32_SOC15(GC, 0, regRLC_LX6_CNTL, tmp);
1847 }
1848
1849 static int gfx_v12_0_rlc_load_microcode(struct amdgpu_device *adev)
1850 {
1851         const struct rlc_firmware_header_v2_0 *hdr;
1852         uint16_t version_major;
1853         uint16_t version_minor;
1854
1855         if (!adev->gfx.rlc_fw)
1856                 return -EINVAL;
1857
1858         hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
1859         amdgpu_ucode_print_rlc_hdr(&hdr->header);
1860
1861         version_major = le16_to_cpu(hdr->header.header_version_major);
1862         version_minor = le16_to_cpu(hdr->header.header_version_minor);
1863
1864         if (version_major == 2) {
1865                 gfx_v12_0_load_rlcg_microcode(adev);
1866                 if (amdgpu_dpm == 1) {
1867                         if (version_minor >= 2)
1868                                 gfx_v12_0_load_rlc_iram_dram_microcode(adev);
1869                 }
1870
1871                 return 0;
1872         }
1873
1874         return -EINVAL;
1875 }
1876
1877 static int gfx_v12_0_rlc_resume(struct amdgpu_device *adev)
1878 {
1879         int r;
1880
1881         if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
1882                 gfx_v12_0_init_csb(adev);
1883
1884                 if (!amdgpu_sriov_vf(adev)) /* enable RLC SRM */
1885                         gfx_v12_0_rlc_enable_srm(adev);
1886         } else {
1887                 if (amdgpu_sriov_vf(adev)) {
1888                         gfx_v12_0_init_csb(adev);
1889                         return 0;
1890                 }
1891
1892                 adev->gfx.rlc.funcs->stop(adev);
1893
1894                 /* disable CG */
1895                 WREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL, 0);
1896
1897                 /* disable PG */
1898                 WREG32_SOC15(GC, 0, regRLC_PG_CNTL, 0);
1899
1900                 if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
1901                         /* legacy rlc firmware loading */
1902                         r = gfx_v12_0_rlc_load_microcode(adev);
1903                         if (r)
1904                                 return r;
1905                 }
1906
1907                 gfx_v12_0_init_csb(adev);
1908
1909                 adev->gfx.rlc.funcs->start(adev);
1910         }
1911
1912         return 0;
1913 }
1914
1915 static void gfx_v12_0_config_gfx_rs64(struct amdgpu_device *adev)
1916 {
1917         const struct gfx_firmware_header_v2_0 *pfp_hdr;
1918         const struct gfx_firmware_header_v2_0 *me_hdr;
1919         const struct gfx_firmware_header_v2_0 *mec_hdr;
1920         uint32_t pipe_id, tmp;
1921
1922         mec_hdr = (const struct gfx_firmware_header_v2_0 *)
1923                 adev->gfx.mec_fw->data;
1924         me_hdr = (const struct gfx_firmware_header_v2_0 *)
1925                 adev->gfx.me_fw->data;
1926         pfp_hdr = (const struct gfx_firmware_header_v2_0 *)
1927                 adev->gfx.pfp_fw->data;
1928
1929         /* config pfp program start addr */
1930         for (pipe_id = 0; pipe_id < 2; pipe_id++) {
1931                 soc24_grbm_select(adev, 0, pipe_id, 0, 0);
1932                 WREG32_SOC15(GC, 0, regCP_PFP_PRGRM_CNTR_START,
1933                         (pfp_hdr->ucode_start_addr_hi << 30) |
1934                         (pfp_hdr->ucode_start_addr_lo >> 2));
1935                 WREG32_SOC15(GC, 0, regCP_PFP_PRGRM_CNTR_START_HI,
1936                         pfp_hdr->ucode_start_addr_hi >> 2);
1937         }
1938         soc24_grbm_select(adev, 0, 0, 0, 0);
1939
1940         /* reset pfp pipe */
1941         tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL);
1942         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_PIPE0_RESET, 1);
1943         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_PIPE1_RESET, 1);
1944         WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
1945
1946         /* clear pfp pipe reset */
1947         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_PIPE0_RESET, 0);
1948         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_PIPE1_RESET, 0);
1949         WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
1950
1951         /* config me program start addr */
1952         for (pipe_id = 0; pipe_id < 2; pipe_id++) {
1953                 soc24_grbm_select(adev, 0, pipe_id, 0, 0);
1954                 WREG32_SOC15(GC, 0, regCP_ME_PRGRM_CNTR_START,
1955                         (me_hdr->ucode_start_addr_hi << 30) |
1956                         (me_hdr->ucode_start_addr_lo >> 2));
1957                 WREG32_SOC15(GC, 0, regCP_ME_PRGRM_CNTR_START_HI,
1958                         me_hdr->ucode_start_addr_hi>>2);
1959         }
1960         soc24_grbm_select(adev, 0, 0, 0, 0);
1961
1962         /* reset me pipe */
1963         tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL);
1964         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_PIPE0_RESET, 1);
1965         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_PIPE1_RESET, 1);
1966         WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
1967
1968         /* clear me pipe reset */
1969         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_PIPE0_RESET, 0);
1970         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_PIPE1_RESET, 0);
1971         WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
1972
1973         /* config mec program start addr */
1974         for (pipe_id = 0; pipe_id < 4; pipe_id++) {
1975                 soc24_grbm_select(adev, 1, pipe_id, 0, 0);
1976                 WREG32_SOC15(GC, 0, regCP_MEC_RS64_PRGRM_CNTR_START,
1977                                         mec_hdr->ucode_start_addr_lo >> 2 |
1978                                         mec_hdr->ucode_start_addr_hi << 30);
1979                 WREG32_SOC15(GC, 0, regCP_MEC_RS64_PRGRM_CNTR_START_HI,
1980                                         mec_hdr->ucode_start_addr_hi >> 2);
1981         }
1982         soc24_grbm_select(adev, 0, 0, 0, 0);
1983
1984         /* reset mec pipe */
1985         tmp = RREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL);
1986         tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE0_RESET, 1);
1987         tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE1_RESET, 1);
1988         tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE2_RESET, 1);
1989         tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE3_RESET, 1);
1990         WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, tmp);
1991
1992         /* clear mec pipe reset */
1993         tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE0_RESET, 0);
1994         tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE1_RESET, 0);
1995         tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE2_RESET, 0);
1996         tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE3_RESET, 0);
1997         WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, tmp);
1998 }
1999
2000 static void gfx_v12_0_set_pfp_ucode_start_addr(struct amdgpu_device *adev)
2001 {
2002         const struct gfx_firmware_header_v2_0 *cp_hdr;
2003         unsigned pipe_id, tmp;
2004
2005         cp_hdr = (const struct gfx_firmware_header_v2_0 *)
2006                 adev->gfx.pfp_fw->data;
2007         mutex_lock(&adev->srbm_mutex);
2008         for (pipe_id = 0; pipe_id < adev->gfx.me.num_pipe_per_me; pipe_id++) {
2009                 soc24_grbm_select(adev, 0, pipe_id, 0, 0);
2010                 WREG32_SOC15(GC, 0, regCP_PFP_PRGRM_CNTR_START,
2011                              (cp_hdr->ucode_start_addr_hi << 30) |
2012                              (cp_hdr->ucode_start_addr_lo >> 2));
2013                 WREG32_SOC15(GC, 0, regCP_PFP_PRGRM_CNTR_START_HI,
2014                              cp_hdr->ucode_start_addr_hi>>2);
2015
2016                 /*
2017                  * Program CP_ME_CNTL to reset given PIPE to take
2018                  * effect of CP_PFP_PRGRM_CNTR_START.
2019                  */
2020                 tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL);
2021                 if (pipe_id == 0)
2022                         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
2023                                         PFP_PIPE0_RESET, 1);
2024                 else
2025                         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
2026                                         PFP_PIPE1_RESET, 1);
2027                 WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
2028
2029                 /* Clear pfp pipe0 reset bit. */
2030                 if (pipe_id == 0)
2031                         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
2032                                         PFP_PIPE0_RESET, 0);
2033                 else
2034                         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
2035                                         PFP_PIPE1_RESET, 0);
2036                 WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
2037         }
2038         soc24_grbm_select(adev, 0, 0, 0, 0);
2039         mutex_unlock(&adev->srbm_mutex);
2040 }
2041
2042 static void gfx_v12_0_set_me_ucode_start_addr(struct amdgpu_device *adev)
2043 {
2044         const struct gfx_firmware_header_v2_0 *cp_hdr;
2045         unsigned pipe_id, tmp;
2046
2047         cp_hdr = (const struct gfx_firmware_header_v2_0 *)
2048                 adev->gfx.me_fw->data;
2049         mutex_lock(&adev->srbm_mutex);
2050         for (pipe_id = 0; pipe_id < adev->gfx.me.num_pipe_per_me; pipe_id++) {
2051                 soc24_grbm_select(adev, 0, pipe_id, 0, 0);
2052                 WREG32_SOC15(GC, 0, regCP_ME_PRGRM_CNTR_START,
2053                              (cp_hdr->ucode_start_addr_hi << 30) |
2054                              (cp_hdr->ucode_start_addr_lo >> 2) );
2055                 WREG32_SOC15(GC, 0, regCP_ME_PRGRM_CNTR_START_HI,
2056                              cp_hdr->ucode_start_addr_hi>>2);
2057
2058                 /*
2059                  * Program CP_ME_CNTL to reset given PIPE to take
2060                  * effect of CP_ME_PRGRM_CNTR_START.
2061                  */
2062                 tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL);
2063                 if (pipe_id == 0)
2064                         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
2065                                         ME_PIPE0_RESET, 1);
2066                 else
2067                         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
2068                                         ME_PIPE1_RESET, 1);
2069                 WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
2070
2071                 /* Clear pfp pipe0 reset bit. */
2072                 if (pipe_id == 0)
2073                         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
2074                                         ME_PIPE0_RESET, 0);
2075                 else
2076                         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
2077                                         ME_PIPE1_RESET, 0);
2078                 WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
2079         }
2080         soc24_grbm_select(adev, 0, 0, 0, 0);
2081         mutex_unlock(&adev->srbm_mutex);
2082 }
2083
2084 static void gfx_v12_0_set_mec_ucode_start_addr(struct amdgpu_device *adev)
2085 {
2086         const struct gfx_firmware_header_v2_0 *cp_hdr;
2087         unsigned pipe_id;
2088
2089         cp_hdr = (const struct gfx_firmware_header_v2_0 *)
2090                 adev->gfx.mec_fw->data;
2091         mutex_lock(&adev->srbm_mutex);
2092         for (pipe_id = 0; pipe_id < adev->gfx.mec.num_pipe_per_mec; pipe_id++) {
2093                 soc24_grbm_select(adev, 1, pipe_id, 0, 0);
2094                 WREG32_SOC15(GC, 0, regCP_MEC_RS64_PRGRM_CNTR_START,
2095                              cp_hdr->ucode_start_addr_lo >> 2 |
2096                              cp_hdr->ucode_start_addr_hi << 30);
2097                 WREG32_SOC15(GC, 0, regCP_MEC_RS64_PRGRM_CNTR_START_HI,
2098                              cp_hdr->ucode_start_addr_hi >> 2);
2099         }
2100         soc24_grbm_select(adev, 0, 0, 0, 0);
2101         mutex_unlock(&adev->srbm_mutex);
2102 }
2103
2104 static int gfx_v12_0_wait_for_rlc_autoload_complete(struct amdgpu_device *adev)
2105 {
2106         uint32_t cp_status;
2107         uint32_t bootload_status;
2108         int i;
2109
2110         for (i = 0; i < adev->usec_timeout; i++) {
2111                 cp_status = RREG32_SOC15(GC, 0, regCP_STAT);
2112                 bootload_status = RREG32_SOC15(GC, 0, regRLC_RLCS_BOOTLOAD_STATUS);
2113
2114                 if ((cp_status == 0) &&
2115                     (REG_GET_FIELD(bootload_status,
2116                         RLC_RLCS_BOOTLOAD_STATUS, BOOTLOAD_COMPLETE) == 1)) {
2117                         break;
2118                 }
2119                 udelay(1);
2120                 if (amdgpu_emu_mode)
2121                         msleep(10);
2122         }
2123
2124         if (i >= adev->usec_timeout) {
2125                 dev_err(adev->dev, "rlc autoload: gc ucode autoload timeout\n");
2126                 return -ETIMEDOUT;
2127         }
2128
2129         if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) {
2130                 gfx_v12_0_set_pfp_ucode_start_addr(adev);
2131                 gfx_v12_0_set_me_ucode_start_addr(adev);
2132                 gfx_v12_0_set_mec_ucode_start_addr(adev);
2133         }
2134
2135         return 0;
2136 }
2137
2138 static int gfx_v12_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
2139 {
2140         int i;
2141         u32 tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL);
2142
2143         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, enable ? 0 : 1);
2144         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, enable ? 0 : 1);
2145         WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
2146
2147         for (i = 0; i < adev->usec_timeout; i++) {
2148                 if (RREG32_SOC15(GC, 0, regCP_STAT) == 0)
2149                         break;
2150                 udelay(1);
2151         }
2152
2153         if (i >= adev->usec_timeout)
2154                 DRM_ERROR("failed to %s cp gfx\n", enable ? "unhalt" : "halt");
2155
2156         return 0;
2157 }
2158
2159 static int gfx_v12_0_cp_gfx_load_pfp_microcode_rs64(struct amdgpu_device *adev)
2160 {
2161         int r;
2162         const struct gfx_firmware_header_v2_0 *pfp_hdr;
2163         const __le32 *fw_ucode, *fw_data;
2164         unsigned i, pipe_id, fw_ucode_size, fw_data_size;
2165         uint32_t tmp;
2166         uint32_t usec_timeout = 50000;  /* wait for 50ms */
2167
2168         pfp_hdr = (const struct gfx_firmware_header_v2_0 *)
2169                 adev->gfx.pfp_fw->data;
2170
2171         amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
2172
2173         /* instruction */
2174         fw_ucode = (const __le32 *)(adev->gfx.pfp_fw->data +
2175                 le32_to_cpu(pfp_hdr->ucode_offset_bytes));
2176         fw_ucode_size = le32_to_cpu(pfp_hdr->ucode_size_bytes);
2177         /* data */
2178         fw_data = (const __le32 *)(adev->gfx.pfp_fw->data +
2179                 le32_to_cpu(pfp_hdr->data_offset_bytes));
2180         fw_data_size = le32_to_cpu(pfp_hdr->data_size_bytes);
2181
2182         /* 64kb align */
2183         r = amdgpu_bo_create_reserved(adev, fw_ucode_size,
2184                                       64 * 1024, AMDGPU_GEM_DOMAIN_VRAM,
2185                                       &adev->gfx.pfp.pfp_fw_obj,
2186                                       &adev->gfx.pfp.pfp_fw_gpu_addr,
2187                                       (void **)&adev->gfx.pfp.pfp_fw_ptr);
2188         if (r) {
2189                 dev_err(adev->dev, "(%d) failed to create pfp ucode fw bo\n", r);
2190                 gfx_v12_0_pfp_fini(adev);
2191                 return r;
2192         }
2193
2194         r = amdgpu_bo_create_reserved(adev, fw_data_size,
2195                                       64 * 1024, AMDGPU_GEM_DOMAIN_VRAM,
2196                                       &adev->gfx.pfp.pfp_fw_data_obj,
2197                                       &adev->gfx.pfp.pfp_fw_data_gpu_addr,
2198                                       (void **)&adev->gfx.pfp.pfp_fw_data_ptr);
2199         if (r) {
2200                 dev_err(adev->dev, "(%d) failed to create pfp data fw bo\n", r);
2201                 gfx_v12_0_pfp_fini(adev);
2202                 return r;
2203         }
2204
2205         memcpy(adev->gfx.pfp.pfp_fw_ptr, fw_ucode, fw_ucode_size);
2206         memcpy(adev->gfx.pfp.pfp_fw_data_ptr, fw_data, fw_data_size);
2207
2208         amdgpu_bo_kunmap(adev->gfx.pfp.pfp_fw_obj);
2209         amdgpu_bo_kunmap(adev->gfx.pfp.pfp_fw_data_obj);
2210         amdgpu_bo_unreserve(adev->gfx.pfp.pfp_fw_obj);
2211         amdgpu_bo_unreserve(adev->gfx.pfp.pfp_fw_data_obj);
2212
2213         if (amdgpu_emu_mode == 1)
2214                 adev->hdp.funcs->flush_hdp(adev, NULL);
2215
2216         WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_LO,
2217                 lower_32_bits(adev->gfx.pfp.pfp_fw_gpu_addr));
2218         WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_HI,
2219                 upper_32_bits(adev->gfx.pfp.pfp_fw_gpu_addr));
2220
2221         tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_CNTL);
2222         tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, VMID, 0);
2223         tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, CACHE_POLICY, 0);
2224         tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, EXE_DISABLE, 0);
2225         WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_CNTL, tmp);
2226
2227         /*
2228          * Programming any of the CP_PFP_IC_BASE registers
2229          * forces invalidation of the ME L1 I$. Wait for the
2230          * invalidation complete
2231          */
2232         for (i = 0; i < usec_timeout; i++) {
2233                 tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL);
2234                 if (1 == REG_GET_FIELD(tmp, CP_PFP_IC_OP_CNTL,
2235                         INVALIDATE_CACHE_COMPLETE))
2236                         break;
2237                 udelay(1);
2238         }
2239
2240         if (i >= usec_timeout) {
2241                 dev_err(adev->dev, "failed to invalidate instruction cache\n");
2242                 return -EINVAL;
2243         }
2244
2245         /* Prime the L1 instruction caches */
2246         tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL);
2247         tmp = REG_SET_FIELD(tmp, CP_PFP_IC_OP_CNTL, PRIME_ICACHE, 1);
2248         WREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL, tmp);
2249         /* Waiting for cache primed*/
2250         for (i = 0; i < usec_timeout; i++) {
2251                 tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL);
2252                 if (1 == REG_GET_FIELD(tmp, CP_PFP_IC_OP_CNTL,
2253                         ICACHE_PRIMED))
2254                         break;
2255                 udelay(1);
2256         }
2257
2258         if (i >= usec_timeout) {
2259                 dev_err(adev->dev, "failed to prime instruction cache\n");
2260                 return -EINVAL;
2261         }
2262
2263         mutex_lock(&adev->srbm_mutex);
2264         for (pipe_id = 0; pipe_id < adev->gfx.me.num_pipe_per_me; pipe_id++) {
2265                 soc24_grbm_select(adev, 0, pipe_id, 0, 0);
2266
2267                 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE0_LO,
2268                         lower_32_bits(adev->gfx.pfp.pfp_fw_data_gpu_addr));
2269                 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE0_HI,
2270                         upper_32_bits(adev->gfx.pfp.pfp_fw_data_gpu_addr));
2271         }
2272         soc24_grbm_select(adev, 0, 0, 0, 0);
2273         mutex_unlock(&adev->srbm_mutex);
2274
2275         tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL);
2276         tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, VMID, 0);
2277         tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, CACHE_POLICY, 0);
2278         WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL, tmp);
2279
2280         /* Invalidate the data caches */
2281         tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL);
2282         tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL, INVALIDATE_DCACHE, 1);
2283         WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL, tmp);
2284
2285         for (i = 0; i < usec_timeout; i++) {
2286                 tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL);
2287                 if (1 == REG_GET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL,
2288                         INVALIDATE_DCACHE_COMPLETE))
2289                         break;
2290                 udelay(1);
2291         }
2292
2293         if (i >= usec_timeout) {
2294                 dev_err(adev->dev, "failed to invalidate RS64 data cache\n");
2295                 return -EINVAL;
2296         }
2297
2298         gfx_v12_0_set_pfp_ucode_start_addr(adev);
2299
2300         return 0;
2301 }
2302
2303 static int gfx_v12_0_cp_gfx_load_me_microcode_rs64(struct amdgpu_device *adev)
2304 {
2305         int r;
2306         const struct gfx_firmware_header_v2_0 *me_hdr;
2307         const __le32 *fw_ucode, *fw_data;
2308         unsigned i, pipe_id, fw_ucode_size, fw_data_size;
2309         uint32_t tmp;
2310         uint32_t usec_timeout = 50000;  /* wait for 50ms */
2311
2312         me_hdr = (const struct gfx_firmware_header_v2_0 *)
2313                 adev->gfx.me_fw->data;
2314
2315         amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
2316
2317         /* instruction */
2318         fw_ucode = (const __le32 *)(adev->gfx.me_fw->data +
2319                 le32_to_cpu(me_hdr->ucode_offset_bytes));
2320         fw_ucode_size = le32_to_cpu(me_hdr->ucode_size_bytes);
2321         /* data */
2322         fw_data = (const __le32 *)(adev->gfx.me_fw->data +
2323                 le32_to_cpu(me_hdr->data_offset_bytes));
2324         fw_data_size = le32_to_cpu(me_hdr->data_size_bytes);
2325
2326         /* 64kb align*/
2327         r = amdgpu_bo_create_reserved(adev, fw_ucode_size,
2328                                       64 * 1024, AMDGPU_GEM_DOMAIN_VRAM,
2329                                       &adev->gfx.me.me_fw_obj,
2330                                       &adev->gfx.me.me_fw_gpu_addr,
2331                                       (void **)&adev->gfx.me.me_fw_ptr);
2332         if (r) {
2333                 dev_err(adev->dev, "(%d) failed to create me ucode bo\n", r);
2334                 gfx_v12_0_me_fini(adev);
2335                 return r;
2336         }
2337
2338         r = amdgpu_bo_create_reserved(adev, fw_data_size,
2339                                       64 * 1024, AMDGPU_GEM_DOMAIN_VRAM,
2340                                       &adev->gfx.me.me_fw_data_obj,
2341                                       &adev->gfx.me.me_fw_data_gpu_addr,
2342                                       (void **)&adev->gfx.me.me_fw_data_ptr);
2343         if (r) {
2344                 dev_err(adev->dev, "(%d) failed to create me data bo\n", r);
2345                 gfx_v12_0_pfp_fini(adev);
2346                 return r;
2347         }
2348
2349         memcpy(adev->gfx.me.me_fw_ptr, fw_ucode, fw_ucode_size);
2350         memcpy(adev->gfx.me.me_fw_data_ptr, fw_data, fw_data_size);
2351
2352         amdgpu_bo_kunmap(adev->gfx.me.me_fw_obj);
2353         amdgpu_bo_kunmap(adev->gfx.me.me_fw_data_obj);
2354         amdgpu_bo_unreserve(adev->gfx.me.me_fw_obj);
2355         amdgpu_bo_unreserve(adev->gfx.me.me_fw_data_obj);
2356
2357         if (amdgpu_emu_mode == 1)
2358                 adev->hdp.funcs->flush_hdp(adev, NULL);
2359
2360         WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_LO,
2361                 lower_32_bits(adev->gfx.me.me_fw_gpu_addr));
2362         WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_HI,
2363                 upper_32_bits(adev->gfx.me.me_fw_gpu_addr));
2364
2365         tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_BASE_CNTL);
2366         tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, VMID, 0);
2367         tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, CACHE_POLICY, 0);
2368         tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, EXE_DISABLE, 0);
2369         WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_CNTL, tmp);
2370
2371         /*
2372          * Programming any of the CP_ME_IC_BASE registers
2373          * forces invalidation of the ME L1 I$. Wait for the
2374          * invalidation complete
2375          */
2376         for (i = 0; i < usec_timeout; i++) {
2377                 tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL);
2378                 if (1 == REG_GET_FIELD(tmp, CP_ME_IC_OP_CNTL,
2379                         INVALIDATE_CACHE_COMPLETE))
2380                         break;
2381                 udelay(1);
2382         }
2383
2384         if (i >= usec_timeout) {
2385                 dev_err(adev->dev, "failed to invalidate instruction cache\n");
2386                 return -EINVAL;
2387         }
2388
2389         /* Prime the instruction caches */
2390         tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL);
2391         tmp = REG_SET_FIELD(tmp, CP_ME_IC_OP_CNTL, PRIME_ICACHE, 1);
2392         WREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL, tmp);
2393
2394         /* Waiting for instruction cache primed*/
2395         for (i = 0; i < usec_timeout; i++) {
2396                 tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL);
2397                 if (1 == REG_GET_FIELD(tmp, CP_ME_IC_OP_CNTL,
2398                         ICACHE_PRIMED))
2399                         break;
2400                 udelay(1);
2401         }
2402
2403         if (i >= usec_timeout) {
2404                 dev_err(adev->dev, "failed to prime instruction cache\n");
2405                 return -EINVAL;
2406         }
2407
2408         mutex_lock(&adev->srbm_mutex);
2409         for (pipe_id = 0; pipe_id < adev->gfx.me.num_pipe_per_me; pipe_id++) {
2410                 soc24_grbm_select(adev, 0, pipe_id, 0, 0);
2411
2412                 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE1_LO,
2413                         lower_32_bits(adev->gfx.me.me_fw_data_gpu_addr));
2414                 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE1_HI,
2415                         upper_32_bits(adev->gfx.me.me_fw_data_gpu_addr));
2416         }
2417         soc24_grbm_select(adev, 0, 0, 0, 0);
2418         mutex_unlock(&adev->srbm_mutex);
2419
2420         tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL);
2421         tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, VMID, 0);
2422         tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, CACHE_POLICY, 0);
2423         WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL, tmp);
2424
2425         /* Invalidate the data caches */
2426         tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL);
2427         tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL, INVALIDATE_DCACHE, 1);
2428         WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL, tmp);
2429
2430         for (i = 0; i < usec_timeout; i++) {
2431                 tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL);
2432                 if (1 == REG_GET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL,
2433                         INVALIDATE_DCACHE_COMPLETE))
2434                         break;
2435                 udelay(1);
2436         }
2437
2438         if (i >= usec_timeout) {
2439                 dev_err(adev->dev, "failed to invalidate RS64 data cache\n");
2440                 return -EINVAL;
2441         }
2442
2443         gfx_v12_0_set_me_ucode_start_addr(adev);
2444
2445         return 0;
2446 }
2447
2448 static int gfx_v12_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
2449 {
2450         int r;
2451
2452         if (!adev->gfx.me_fw || !adev->gfx.pfp_fw)
2453                 return -EINVAL;
2454
2455         gfx_v12_0_cp_gfx_enable(adev, false);
2456
2457         r = gfx_v12_0_cp_gfx_load_pfp_microcode_rs64(adev);
2458         if (r) {
2459                 dev_err(adev->dev, "(%d) failed to load pfp fw\n", r);
2460                 return r;
2461         }
2462
2463         r = gfx_v12_0_cp_gfx_load_me_microcode_rs64(adev);
2464         if (r) {
2465                 dev_err(adev->dev, "(%d) failed to load me fw\n", r);
2466                 return r;
2467         }
2468
2469         return 0;
2470 }
2471
2472 static int gfx_v12_0_cp_gfx_start(struct amdgpu_device *adev)
2473 {
2474         /* init the CP */
2475         WREG32_SOC15(GC, 0, regCP_MAX_CONTEXT,
2476                      adev->gfx.config.max_hw_contexts - 1);
2477         WREG32_SOC15(GC, 0, regCP_DEVICE_ID, 1);
2478
2479         if (!amdgpu_async_gfx_ring)
2480                 gfx_v12_0_cp_gfx_enable(adev, true);
2481
2482         return 0;
2483 }
2484
2485 static void gfx_v12_0_cp_gfx_switch_pipe(struct amdgpu_device *adev,
2486                                          CP_PIPE_ID pipe)
2487 {
2488         u32 tmp;
2489
2490         tmp = RREG32_SOC15(GC, 0, regGRBM_GFX_CNTL);
2491         tmp = REG_SET_FIELD(tmp, GRBM_GFX_CNTL, PIPEID, pipe);
2492
2493         WREG32_SOC15(GC, 0, regGRBM_GFX_CNTL, tmp);
2494 }
2495
2496 static void gfx_v12_0_cp_gfx_set_doorbell(struct amdgpu_device *adev,
2497                                           struct amdgpu_ring *ring)
2498 {
2499         u32 tmp;
2500
2501         tmp = RREG32_SOC15(GC, 0, regCP_RB_DOORBELL_CONTROL);
2502         if (ring->use_doorbell) {
2503                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
2504                                     DOORBELL_OFFSET, ring->doorbell_index);
2505                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
2506                                     DOORBELL_EN, 1);
2507         } else {
2508                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
2509                                     DOORBELL_EN, 0);
2510         }
2511         WREG32_SOC15(GC, 0, regCP_RB_DOORBELL_CONTROL, tmp);
2512
2513         tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
2514                             DOORBELL_RANGE_LOWER, ring->doorbell_index);
2515         WREG32_SOC15(GC, 0, regCP_RB_DOORBELL_RANGE_LOWER, tmp);
2516
2517         WREG32_SOC15(GC, 0, regCP_RB_DOORBELL_RANGE_UPPER,
2518                      CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
2519 }
2520
2521 static int gfx_v12_0_cp_gfx_resume(struct amdgpu_device *adev)
2522 {
2523         struct amdgpu_ring *ring;
2524         u32 tmp;
2525         u32 rb_bufsz;
2526         u64 rb_addr, rptr_addr, wptr_gpu_addr;
2527         u32 i;
2528
2529         /* Set the write pointer delay */
2530         WREG32_SOC15(GC, 0, regCP_RB_WPTR_DELAY, 0);
2531
2532         /* set the RB to use vmid 0 */
2533         WREG32_SOC15(GC, 0, regCP_RB_VMID, 0);
2534
2535         /* Init gfx ring 0 for pipe 0 */
2536         mutex_lock(&adev->srbm_mutex);
2537         gfx_v12_0_cp_gfx_switch_pipe(adev, PIPE_ID0);
2538
2539         /* Set ring buffer size */
2540         ring = &adev->gfx.gfx_ring[0];
2541         rb_bufsz = order_base_2(ring->ring_size / 8);
2542         tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
2543         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
2544         WREG32_SOC15(GC, 0, regCP_RB0_CNTL, tmp);
2545
2546         /* Initialize the ring buffer's write pointers */
2547         ring->wptr = 0;
2548         WREG32_SOC15(GC, 0, regCP_RB0_WPTR, lower_32_bits(ring->wptr));
2549         WREG32_SOC15(GC, 0, regCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
2550
2551         /* set the wb address wether it's enabled or not */
2552         rptr_addr = ring->rptr_gpu_addr;
2553         WREG32_SOC15(GC, 0, regCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
2554         WREG32_SOC15(GC, 0, regCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) &
2555                      CP_RB_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK);
2556
2557         wptr_gpu_addr = ring->wptr_gpu_addr;
2558         WREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_ADDR_LO,
2559                      lower_32_bits(wptr_gpu_addr));
2560         WREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_ADDR_HI,
2561                      upper_32_bits(wptr_gpu_addr));
2562
2563         mdelay(1);
2564         WREG32_SOC15(GC, 0, regCP_RB0_CNTL, tmp);
2565
2566         rb_addr = ring->gpu_addr >> 8;
2567         WREG32_SOC15(GC, 0, regCP_RB0_BASE, rb_addr);
2568         WREG32_SOC15(GC, 0, regCP_RB0_BASE_HI, upper_32_bits(rb_addr));
2569
2570         WREG32_SOC15(GC, 0, regCP_RB_ACTIVE, 1);
2571
2572         gfx_v12_0_cp_gfx_set_doorbell(adev, ring);
2573         mutex_unlock(&adev->srbm_mutex);
2574
2575         /* Switch to pipe 0 */
2576         mutex_lock(&adev->srbm_mutex);
2577         gfx_v12_0_cp_gfx_switch_pipe(adev, PIPE_ID0);
2578         mutex_unlock(&adev->srbm_mutex);
2579
2580         /* start the ring */
2581         gfx_v12_0_cp_gfx_start(adev);
2582
2583         for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
2584                 ring = &adev->gfx.gfx_ring[i];
2585                 ring->sched.ready = true;
2586         }
2587
2588         return 0;
2589 }
2590
2591 static void gfx_v12_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
2592 {
2593         u32 data;
2594
2595         data = RREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL);
2596         data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_INVALIDATE_ICACHE,
2597                                                  enable ? 0 : 1);
2598         data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE0_RESET,
2599                                                  enable ? 0 : 1);
2600         data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE1_RESET,
2601                                                  enable ? 0 : 1);
2602         data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE2_RESET,
2603                                                  enable ? 0 : 1);
2604         data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE3_RESET,
2605                                                  enable ? 0 : 1);
2606         data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE0_ACTIVE,
2607                                                  enable ? 1 : 0);
2608         data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE1_ACTIVE,
2609                                                  enable ? 1 : 0);
2610         data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE2_ACTIVE,
2611                                                  enable ? 1 : 0);
2612         data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE3_ACTIVE,
2613                                                  enable ? 1 : 0);
2614         data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_HALT,
2615                                                  enable ? 0 : 1);
2616         WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, data);
2617
2618         adev->gfx.kiq[0].ring.sched.ready = enable;
2619
2620         udelay(50);
2621 }
2622
2623 static int gfx_v12_0_cp_compute_load_microcode_rs64(struct amdgpu_device *adev)
2624 {
2625         const struct gfx_firmware_header_v2_0 *mec_hdr;
2626         const __le32 *fw_ucode, *fw_data;
2627         u32 tmp, fw_ucode_size, fw_data_size;
2628         u32 i, usec_timeout = 50000; /* Wait for 50 ms */
2629         u32 *fw_ucode_ptr, *fw_data_ptr;
2630         int r;
2631
2632         if (!adev->gfx.mec_fw)
2633                 return -EINVAL;
2634
2635         gfx_v12_0_cp_compute_enable(adev, false);
2636
2637         mec_hdr = (const struct gfx_firmware_header_v2_0 *)adev->gfx.mec_fw->data;
2638         amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
2639
2640         fw_ucode = (const __le32 *) (adev->gfx.mec_fw->data +
2641                                 le32_to_cpu(mec_hdr->ucode_offset_bytes));
2642         fw_ucode_size = le32_to_cpu(mec_hdr->ucode_size_bytes);
2643
2644         fw_data = (const __le32 *) (adev->gfx.mec_fw->data +
2645                                 le32_to_cpu(mec_hdr->data_offset_bytes));
2646         fw_data_size = le32_to_cpu(mec_hdr->data_size_bytes);
2647
2648         r = amdgpu_bo_create_reserved(adev, fw_ucode_size,
2649                                       64 * 1024, AMDGPU_GEM_DOMAIN_VRAM,
2650                                       &adev->gfx.mec.mec_fw_obj,
2651                                       &adev->gfx.mec.mec_fw_gpu_addr,
2652                                       (void **)&fw_ucode_ptr);
2653         if (r) {
2654                 dev_err(adev->dev, "(%d) failed to create mec fw ucode bo\n", r);
2655                 gfx_v12_0_mec_fini(adev);
2656                 return r;
2657         }
2658
2659         r = amdgpu_bo_create_reserved(adev,
2660                                       ALIGN(fw_data_size, 64 * 1024) *
2661                                       adev->gfx.mec.num_pipe_per_mec,
2662                                       64 * 1024, AMDGPU_GEM_DOMAIN_VRAM,
2663                                       &adev->gfx.mec.mec_fw_data_obj,
2664                                       &adev->gfx.mec.mec_fw_data_gpu_addr,
2665                                       (void **)&fw_data_ptr);
2666         if (r) {
2667                 dev_err(adev->dev, "(%d) failed to create mec fw ucode bo\n", r);
2668                 gfx_v12_0_mec_fini(adev);
2669                 return r;
2670         }
2671
2672         memcpy(fw_ucode_ptr, fw_ucode, fw_ucode_size);
2673         for (i = 0; i < adev->gfx.mec.num_pipe_per_mec; i++) {
2674                 memcpy(fw_data_ptr + i * ALIGN(fw_data_size, 64 * 1024) / 4, fw_data, fw_data_size);
2675         }
2676
2677         amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj);
2678         amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_data_obj);
2679         amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj);
2680         amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_data_obj);
2681
2682         tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_CNTL);
2683         tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, VMID, 0);
2684         tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, EXE_DISABLE, 0);
2685         tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0);
2686         WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_CNTL, tmp);
2687
2688         tmp = RREG32_SOC15(GC, 0, regCP_MEC_DC_BASE_CNTL);
2689         tmp = REG_SET_FIELD(tmp, CP_MEC_DC_BASE_CNTL, VMID, 0);
2690         tmp = REG_SET_FIELD(tmp, CP_MEC_DC_BASE_CNTL, CACHE_POLICY, 0);
2691         WREG32_SOC15(GC, 0, regCP_MEC_DC_BASE_CNTL, tmp);
2692
2693         mutex_lock(&adev->srbm_mutex);
2694         for (i = 0; i < adev->gfx.mec.num_pipe_per_mec; i++) {
2695                 soc24_grbm_select(adev, 1, i, 0, 0);
2696
2697                 WREG32_SOC15(GC, 0, regCP_MEC_MDBASE_LO,
2698                              lower_32_bits(adev->gfx.mec.mec_fw_data_gpu_addr +
2699                                            i * ALIGN(fw_data_size, 64 * 1024)));
2700                 WREG32_SOC15(GC, 0, regCP_MEC_MDBASE_HI,
2701                              upper_32_bits(adev->gfx.mec.mec_fw_data_gpu_addr +
2702                                            i * ALIGN(fw_data_size, 64 * 1024)));
2703
2704                 WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_LO,
2705                              lower_32_bits(adev->gfx.mec.mec_fw_gpu_addr));
2706                 WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_HI,
2707                              upper_32_bits(adev->gfx.mec.mec_fw_gpu_addr));
2708         }
2709         mutex_unlock(&adev->srbm_mutex);
2710         soc24_grbm_select(adev, 0, 0, 0, 0);
2711
2712         /* Trigger an invalidation of the L1 instruction caches */
2713         tmp = RREG32_SOC15(GC, 0, regCP_MEC_DC_OP_CNTL);
2714         tmp = REG_SET_FIELD(tmp, CP_MEC_DC_OP_CNTL, INVALIDATE_DCACHE, 1);
2715         WREG32_SOC15(GC, 0, regCP_MEC_DC_OP_CNTL, tmp);
2716
2717         /* Wait for invalidation complete */
2718         for (i = 0; i < usec_timeout; i++) {
2719                 tmp = RREG32_SOC15(GC, 0, regCP_MEC_DC_OP_CNTL);
2720                 if (1 == REG_GET_FIELD(tmp, CP_MEC_DC_OP_CNTL,
2721                                        INVALIDATE_DCACHE_COMPLETE))
2722                         break;
2723                 udelay(1);
2724         }
2725
2726         if (i >= usec_timeout) {
2727                 dev_err(adev->dev, "failed to invalidate instruction cache\n");
2728                 return -EINVAL;
2729         }
2730
2731         /* Trigger an invalidation of the L1 instruction caches */
2732         tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL);
2733         tmp = REG_SET_FIELD(tmp, CP_CPC_IC_OP_CNTL, INVALIDATE_CACHE, 1);
2734         WREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL, tmp);
2735
2736         /* Wait for invalidation complete */
2737         for (i = 0; i < usec_timeout; i++) {
2738                 tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL);
2739                 if (1 == REG_GET_FIELD(tmp, CP_CPC_IC_OP_CNTL,
2740                                        INVALIDATE_CACHE_COMPLETE))
2741                         break;
2742                 udelay(1);
2743         }
2744
2745         if (i >= usec_timeout) {
2746                 dev_err(adev->dev, "failed to invalidate instruction cache\n");
2747                 return -EINVAL;
2748         }
2749
2750         gfx_v12_0_set_mec_ucode_start_addr(adev);
2751
2752         return 0;
2753 }
2754
2755 static void gfx_v12_0_kiq_setting(struct amdgpu_ring *ring)
2756 {
2757         uint32_t tmp;
2758         struct amdgpu_device *adev = ring->adev;
2759
2760         /* tell RLC which is KIQ queue */
2761         tmp = RREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS);
2762         tmp &= 0xffffff00;
2763         tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
2764         WREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS, tmp);
2765         tmp |= 0x80;
2766         WREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS, tmp);
2767 }
2768
2769 static void gfx_v12_0_cp_set_doorbell_range(struct amdgpu_device *adev)
2770 {
2771         /* set graphics engine doorbell range */
2772         WREG32_SOC15(GC, 0, regCP_RB_DOORBELL_RANGE_LOWER,
2773                      (adev->doorbell_index.gfx_ring0 * 2) << 2);
2774         WREG32_SOC15(GC, 0, regCP_RB_DOORBELL_RANGE_UPPER,
2775                      (adev->doorbell_index.gfx_userqueue_end * 2) << 2);
2776
2777         /* set compute engine doorbell range */
2778         WREG32_SOC15(GC, 0, regCP_MEC_DOORBELL_RANGE_LOWER,
2779                      (adev->doorbell_index.kiq * 2) << 2);
2780         WREG32_SOC15(GC, 0, regCP_MEC_DOORBELL_RANGE_UPPER,
2781                      (adev->doorbell_index.userqueue_end * 2) << 2);
2782 }
2783
2784 static int gfx_v12_0_gfx_mqd_init(struct amdgpu_device *adev, void *m,
2785                                   struct amdgpu_mqd_prop *prop)
2786 {
2787         struct v12_gfx_mqd *mqd = m;
2788         uint64_t hqd_gpu_addr, wb_gpu_addr;
2789         uint32_t tmp;
2790         uint32_t rb_bufsz;
2791
2792         /* set up gfx hqd wptr */
2793         mqd->cp_gfx_hqd_wptr = 0;
2794         mqd->cp_gfx_hqd_wptr_hi = 0;
2795
2796         /* set the pointer to the MQD */
2797         mqd->cp_mqd_base_addr = prop->mqd_gpu_addr & 0xfffffffc;
2798         mqd->cp_mqd_base_addr_hi = upper_32_bits(prop->mqd_gpu_addr);
2799
2800         /* set up mqd control */
2801         tmp = RREG32_SOC15(GC, 0, regCP_GFX_MQD_CONTROL);
2802         tmp = REG_SET_FIELD(tmp, CP_GFX_MQD_CONTROL, VMID, 0);
2803         tmp = REG_SET_FIELD(tmp, CP_GFX_MQD_CONTROL, PRIV_STATE, 1);
2804         tmp = REG_SET_FIELD(tmp, CP_GFX_MQD_CONTROL, CACHE_POLICY, 0);
2805         mqd->cp_gfx_mqd_control = tmp;
2806
2807         /* set up gfx_hqd_vimd with 0x0 to indicate the ring buffer's vmid */
2808         tmp = RREG32_SOC15(GC, 0, regCP_GFX_HQD_VMID);
2809         tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_VMID, VMID, 0);
2810         mqd->cp_gfx_hqd_vmid = 0;
2811
2812         /* set up default queue priority level
2813          * 0x0 = low priority, 0x1 = high priority */
2814         tmp = RREG32_SOC15(GC, 0, regCP_GFX_HQD_QUEUE_PRIORITY);
2815         tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_QUEUE_PRIORITY, PRIORITY_LEVEL, 0);
2816         mqd->cp_gfx_hqd_queue_priority = tmp;
2817
2818         /* set up time quantum */
2819         tmp = RREG32_SOC15(GC, 0, regCP_GFX_HQD_QUANTUM);
2820         tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_QUANTUM, QUANTUM_EN, 1);
2821         mqd->cp_gfx_hqd_quantum = tmp;
2822
2823         /* set up gfx hqd base. this is similar as CP_RB_BASE */
2824         hqd_gpu_addr = prop->hqd_base_gpu_addr >> 8;
2825         mqd->cp_gfx_hqd_base = hqd_gpu_addr;
2826         mqd->cp_gfx_hqd_base_hi = upper_32_bits(hqd_gpu_addr);
2827
2828         /* set up hqd_rptr_addr/_hi, similar as CP_RB_RPTR */
2829         wb_gpu_addr = prop->rptr_gpu_addr;
2830         mqd->cp_gfx_hqd_rptr_addr = wb_gpu_addr & 0xfffffffc;
2831         mqd->cp_gfx_hqd_rptr_addr_hi =
2832                 upper_32_bits(wb_gpu_addr) & 0xffff;
2833
2834         /* set up rb_wptr_poll addr */
2835         wb_gpu_addr = prop->wptr_gpu_addr;
2836         mqd->cp_rb_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
2837         mqd->cp_rb_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
2838
2839         /* set up the gfx_hqd_control, similar as CP_RB0_CNTL */
2840         rb_bufsz = order_base_2(prop->queue_size / 4) - 1;
2841         tmp = RREG32_SOC15(GC, 0, regCP_GFX_HQD_CNTL);
2842         tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, RB_BUFSZ, rb_bufsz);
2843         tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, RB_BLKSZ, rb_bufsz - 2);
2844 #ifdef __BIG_ENDIAN
2845         tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, BUF_SWAP, 1);
2846 #endif
2847         mqd->cp_gfx_hqd_cntl = tmp;
2848
2849         /* set up cp_doorbell_control */
2850         tmp = RREG32_SOC15(GC, 0, regCP_RB_DOORBELL_CONTROL);
2851         if (prop->use_doorbell) {
2852                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
2853                                     DOORBELL_OFFSET, prop->doorbell_index);
2854                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
2855                                     DOORBELL_EN, 1);
2856         } else
2857                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
2858                                     DOORBELL_EN, 0);
2859         mqd->cp_rb_doorbell_control = tmp;
2860
2861         /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
2862         mqd->cp_gfx_hqd_rptr = RREG32_SOC15(GC, 0, regCP_GFX_HQD_RPTR);
2863
2864         /* active the queue */
2865         mqd->cp_gfx_hqd_active = 1;
2866
2867         return 0;
2868 }
2869
2870 static int gfx_v12_0_gfx_init_queue(struct amdgpu_ring *ring)
2871 {
2872         struct amdgpu_device *adev = ring->adev;
2873         struct v12_gfx_mqd *mqd = ring->mqd_ptr;
2874         int mqd_idx = ring - &adev->gfx.gfx_ring[0];
2875
2876         if (!amdgpu_in_reset(adev) && !adev->in_suspend) {
2877                 memset((void *)mqd, 0, sizeof(*mqd));
2878                 mutex_lock(&adev->srbm_mutex);
2879                 soc24_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
2880                 amdgpu_ring_init_mqd(ring);
2881                 soc24_grbm_select(adev, 0, 0, 0, 0);
2882                 mutex_unlock(&adev->srbm_mutex);
2883                 if (adev->gfx.me.mqd_backup[mqd_idx])
2884                         memcpy_fromio(adev->gfx.me.mqd_backup[mqd_idx], mqd, sizeof(*mqd));
2885         } else {
2886                 /* restore mqd with the backup copy */
2887                 if (adev->gfx.me.mqd_backup[mqd_idx])
2888                         memcpy_toio(mqd, adev->gfx.me.mqd_backup[mqd_idx], sizeof(*mqd));
2889                 /* reset the ring */
2890                 ring->wptr = 0;
2891                 *ring->wptr_cpu_addr = 0;
2892                 amdgpu_ring_clear_ring(ring);
2893         }
2894
2895         return 0;
2896 }
2897
2898 static int gfx_v12_0_cp_async_gfx_ring_resume(struct amdgpu_device *adev)
2899 {
2900         int r, i;
2901         struct amdgpu_ring *ring;
2902
2903         for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
2904                 ring = &adev->gfx.gfx_ring[i];
2905
2906                 r = amdgpu_bo_reserve(ring->mqd_obj, false);
2907                 if (unlikely(r != 0))
2908                         goto done;
2909
2910                 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
2911                 if (!r) {
2912                         r = gfx_v12_0_gfx_init_queue(ring);
2913                         amdgpu_bo_kunmap(ring->mqd_obj);
2914                         ring->mqd_ptr = NULL;
2915                 }
2916                 amdgpu_bo_unreserve(ring->mqd_obj);
2917                 if (r)
2918                         goto done;
2919         }
2920
2921         r = amdgpu_gfx_enable_kgq(adev, 0);
2922         if (r)
2923                 goto done;
2924
2925         r = gfx_v12_0_cp_gfx_start(adev);
2926         if (r)
2927                 goto done;
2928
2929         for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
2930                 ring = &adev->gfx.gfx_ring[i];
2931                 ring->sched.ready = true;
2932         }
2933 done:
2934         return r;
2935 }
2936
2937 static int gfx_v12_0_compute_mqd_init(struct amdgpu_device *adev, void *m,
2938                                       struct amdgpu_mqd_prop *prop)
2939 {
2940         struct v12_compute_mqd *mqd = m;
2941         uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
2942         uint32_t tmp;
2943
2944         mqd->header = 0xC0310800;
2945         mqd->compute_pipelinestat_enable = 0x00000001;
2946         mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
2947         mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
2948         mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
2949         mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
2950         mqd->compute_misc_reserved = 0x00000007;
2951
2952         eop_base_addr = prop->eop_gpu_addr >> 8;
2953         mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
2954         mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
2955
2956         /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
2957         tmp = RREG32_SOC15(GC, 0, regCP_HQD_EOP_CONTROL);
2958         tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
2959                         (order_base_2(GFX12_MEC_HPD_SIZE / 4) - 1));
2960
2961         mqd->cp_hqd_eop_control = tmp;
2962
2963         /* enable doorbell? */
2964         tmp = RREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL);
2965
2966         if (prop->use_doorbell) {
2967                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
2968                                     DOORBELL_OFFSET, prop->doorbell_index);
2969                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
2970                                     DOORBELL_EN, 1);
2971                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
2972                                     DOORBELL_SOURCE, 0);
2973                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
2974                                     DOORBELL_HIT, 0);
2975         } else {
2976                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
2977                                     DOORBELL_EN, 0);
2978         }
2979
2980         mqd->cp_hqd_pq_doorbell_control = tmp;
2981
2982         /* disable the queue if it's active */
2983         mqd->cp_hqd_dequeue_request = 0;
2984         mqd->cp_hqd_pq_rptr = 0;
2985         mqd->cp_hqd_pq_wptr_lo = 0;
2986         mqd->cp_hqd_pq_wptr_hi = 0;
2987
2988         /* set the pointer to the MQD */
2989         mqd->cp_mqd_base_addr_lo = prop->mqd_gpu_addr & 0xfffffffc;
2990         mqd->cp_mqd_base_addr_hi = upper_32_bits(prop->mqd_gpu_addr);
2991
2992         /* set MQD vmid to 0 */
2993         tmp = RREG32_SOC15(GC, 0, regCP_MQD_CONTROL);
2994         tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
2995         mqd->cp_mqd_control = tmp;
2996
2997         /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
2998         hqd_gpu_addr = prop->hqd_base_gpu_addr >> 8;
2999         mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
3000         mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
3001
3002         /* set up the HQD, this is similar to CP_RB0_CNTL */
3003         tmp = RREG32_SOC15(GC, 0, regCP_HQD_PQ_CONTROL);
3004         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
3005                             (order_base_2(prop->queue_size / 4) - 1));
3006         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
3007                             (order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1));
3008         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
3009         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, TUNNEL_DISPATCH, 0);
3010         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
3011         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
3012         mqd->cp_hqd_pq_control = tmp;
3013
3014         /* set the wb address whether it's enabled or not */
3015         wb_gpu_addr = prop->rptr_gpu_addr;
3016         mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
3017         mqd->cp_hqd_pq_rptr_report_addr_hi =
3018                 upper_32_bits(wb_gpu_addr) & 0xffff;
3019
3020         /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
3021         wb_gpu_addr = prop->wptr_gpu_addr;
3022         mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
3023         mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
3024
3025         tmp = 0;
3026         /* enable the doorbell if requested */
3027         if (prop->use_doorbell) {
3028                 tmp = RREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL);
3029                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3030                                 DOORBELL_OFFSET, prop->doorbell_index);
3031
3032                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3033                                     DOORBELL_EN, 1);
3034                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3035                                     DOORBELL_SOURCE, 0);
3036                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3037                                     DOORBELL_HIT, 0);
3038         }
3039
3040         mqd->cp_hqd_pq_doorbell_control = tmp;
3041
3042         /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3043         mqd->cp_hqd_pq_rptr = RREG32_SOC15(GC, 0, regCP_HQD_PQ_RPTR);
3044
3045         /* set the vmid for the queue */
3046         mqd->cp_hqd_vmid = 0;
3047
3048         tmp = RREG32_SOC15(GC, 0, regCP_HQD_PERSISTENT_STATE);
3049         tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x55);
3050         mqd->cp_hqd_persistent_state = tmp;
3051
3052         /* set MIN_IB_AVAIL_SIZE */
3053         tmp = RREG32_SOC15(GC, 0, regCP_HQD_IB_CONTROL);
3054         tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
3055         mqd->cp_hqd_ib_control = tmp;
3056
3057         /* set static priority for a compute queue/ring */
3058         mqd->cp_hqd_pipe_priority = prop->hqd_pipe_priority;
3059         mqd->cp_hqd_queue_priority = prop->hqd_queue_priority;
3060
3061         mqd->cp_hqd_active = prop->hqd_active;
3062
3063         return 0;
3064 }
3065
3066 static int gfx_v12_0_kiq_init_register(struct amdgpu_ring *ring)
3067 {
3068         struct amdgpu_device *adev = ring->adev;
3069         struct v12_compute_mqd *mqd = ring->mqd_ptr;
3070         int j;
3071
3072         /* inactivate the queue */
3073         if (amdgpu_sriov_vf(adev))
3074                 WREG32_SOC15(GC, 0, regCP_HQD_ACTIVE, 0);
3075
3076         /* disable wptr polling */
3077         WREG32_FIELD15_PREREG(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
3078
3079         /* write the EOP addr */
3080         WREG32_SOC15(GC, 0, regCP_HQD_EOP_BASE_ADDR,
3081                mqd->cp_hqd_eop_base_addr_lo);
3082         WREG32_SOC15(GC, 0, regCP_HQD_EOP_BASE_ADDR_HI,
3083                mqd->cp_hqd_eop_base_addr_hi);
3084
3085         /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
3086         WREG32_SOC15(GC, 0, regCP_HQD_EOP_CONTROL,
3087                mqd->cp_hqd_eop_control);
3088
3089         /* enable doorbell? */
3090         WREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL,
3091                mqd->cp_hqd_pq_doorbell_control);
3092
3093         /* disable the queue if it's active */
3094         if (RREG32_SOC15(GC, 0, regCP_HQD_ACTIVE) & 1) {
3095                 WREG32_SOC15(GC, 0, regCP_HQD_DEQUEUE_REQUEST, 1);
3096                 for (j = 0; j < adev->usec_timeout; j++) {
3097                         if (!(RREG32_SOC15(GC, 0, regCP_HQD_ACTIVE) & 1))
3098                                 break;
3099                         udelay(1);
3100                 }
3101                 WREG32_SOC15(GC, 0, regCP_HQD_DEQUEUE_REQUEST,
3102                        mqd->cp_hqd_dequeue_request);
3103                 WREG32_SOC15(GC, 0, regCP_HQD_PQ_RPTR,
3104                        mqd->cp_hqd_pq_rptr);
3105                 WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_LO,
3106                        mqd->cp_hqd_pq_wptr_lo);
3107                 WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_HI,
3108                        mqd->cp_hqd_pq_wptr_hi);
3109         }
3110
3111         /* set the pointer to the MQD */
3112         WREG32_SOC15(GC, 0, regCP_MQD_BASE_ADDR,
3113                mqd->cp_mqd_base_addr_lo);
3114         WREG32_SOC15(GC, 0, regCP_MQD_BASE_ADDR_HI,
3115                mqd->cp_mqd_base_addr_hi);
3116
3117         /* set MQD vmid to 0 */
3118         WREG32_SOC15(GC, 0, regCP_MQD_CONTROL,
3119                mqd->cp_mqd_control);
3120
3121         /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3122         WREG32_SOC15(GC, 0, regCP_HQD_PQ_BASE,
3123                mqd->cp_hqd_pq_base_lo);
3124         WREG32_SOC15(GC, 0, regCP_HQD_PQ_BASE_HI,
3125                mqd->cp_hqd_pq_base_hi);
3126
3127         /* set up the HQD, this is similar to CP_RB0_CNTL */
3128         WREG32_SOC15(GC, 0, regCP_HQD_PQ_CONTROL,
3129                mqd->cp_hqd_pq_control);
3130
3131         /* set the wb address whether it's enabled or not */
3132         WREG32_SOC15(GC, 0, regCP_HQD_PQ_RPTR_REPORT_ADDR,
3133                 mqd->cp_hqd_pq_rptr_report_addr_lo);
3134         WREG32_SOC15(GC, 0, regCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
3135                 mqd->cp_hqd_pq_rptr_report_addr_hi);
3136
3137         /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
3138         WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_POLL_ADDR,
3139                mqd->cp_hqd_pq_wptr_poll_addr_lo);
3140         WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_POLL_ADDR_HI,
3141                mqd->cp_hqd_pq_wptr_poll_addr_hi);
3142
3143         /* enable the doorbell if requested */
3144         if (ring->use_doorbell) {
3145                 WREG32_SOC15(GC, 0, regCP_MEC_DOORBELL_RANGE_LOWER,
3146                         (adev->doorbell_index.kiq * 2) << 2);
3147                 WREG32_SOC15(GC, 0, regCP_MEC_DOORBELL_RANGE_UPPER,
3148                         (adev->doorbell_index.userqueue_end * 2) << 2);
3149         }
3150
3151         WREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL,
3152                mqd->cp_hqd_pq_doorbell_control);
3153
3154         /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3155         WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_LO,
3156                mqd->cp_hqd_pq_wptr_lo);
3157         WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_HI,
3158                mqd->cp_hqd_pq_wptr_hi);
3159
3160         /* set the vmid for the queue */
3161         WREG32_SOC15(GC, 0, regCP_HQD_VMID, mqd->cp_hqd_vmid);
3162
3163         WREG32_SOC15(GC, 0, regCP_HQD_PERSISTENT_STATE,
3164                mqd->cp_hqd_persistent_state);
3165
3166         /* activate the queue */
3167         WREG32_SOC15(GC, 0, regCP_HQD_ACTIVE,
3168                mqd->cp_hqd_active);
3169
3170         if (ring->use_doorbell)
3171                 WREG32_FIELD15_PREREG(GC, 0, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
3172
3173         return 0;
3174 }
3175
3176 static int gfx_v12_0_kiq_init_queue(struct amdgpu_ring *ring)
3177 {
3178         struct amdgpu_device *adev = ring->adev;
3179         struct v12_compute_mqd *mqd = ring->mqd_ptr;
3180         int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS;
3181
3182         gfx_v12_0_kiq_setting(ring);
3183
3184         if (amdgpu_in_reset(adev)) { /* for GPU_RESET case */
3185                 /* reset MQD to a clean status */
3186                 if (adev->gfx.mec.mqd_backup[mqd_idx])
3187                         memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(*mqd));
3188
3189                 /* reset ring buffer */
3190                 ring->wptr = 0;
3191                 amdgpu_ring_clear_ring(ring);
3192
3193                 mutex_lock(&adev->srbm_mutex);
3194                 soc24_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3195                 gfx_v12_0_kiq_init_register(ring);
3196                 soc24_grbm_select(adev, 0, 0, 0, 0);
3197                 mutex_unlock(&adev->srbm_mutex);
3198         } else {
3199                 memset((void *)mqd, 0, sizeof(*mqd));
3200                 if (amdgpu_sriov_vf(adev) && adev->in_suspend)
3201                         amdgpu_ring_clear_ring(ring);
3202                 mutex_lock(&adev->srbm_mutex);
3203                 soc24_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3204                 amdgpu_ring_init_mqd(ring);
3205                 gfx_v12_0_kiq_init_register(ring);
3206                 soc24_grbm_select(adev, 0, 0, 0, 0);
3207                 mutex_unlock(&adev->srbm_mutex);
3208
3209                 if (adev->gfx.mec.mqd_backup[mqd_idx])
3210                         memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(*mqd));
3211         }
3212
3213         return 0;
3214 }
3215
3216 static int gfx_v12_0_kcq_init_queue(struct amdgpu_ring *ring)
3217 {
3218         struct amdgpu_device *adev = ring->adev;
3219         struct v12_compute_mqd *mqd = ring->mqd_ptr;
3220         int mqd_idx = ring - &adev->gfx.compute_ring[0];
3221
3222         if (!amdgpu_in_reset(adev) && !adev->in_suspend) {
3223                 memset((void *)mqd, 0, sizeof(*mqd));
3224                 mutex_lock(&adev->srbm_mutex);
3225                 soc24_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3226                 amdgpu_ring_init_mqd(ring);
3227                 soc24_grbm_select(adev, 0, 0, 0, 0);
3228                 mutex_unlock(&adev->srbm_mutex);
3229
3230                 if (adev->gfx.mec.mqd_backup[mqd_idx])
3231                         memcpy_fromio(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(*mqd));
3232         } else {
3233                 /* restore MQD to a clean status */
3234                 if (adev->gfx.mec.mqd_backup[mqd_idx])
3235                         memcpy_toio(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(*mqd));
3236                 /* reset ring buffer */
3237                 ring->wptr = 0;
3238                 atomic64_set((atomic64_t *)ring->wptr_cpu_addr, 0);
3239                 amdgpu_ring_clear_ring(ring);
3240         }
3241
3242         return 0;
3243 }
3244
3245 static int gfx_v12_0_kiq_resume(struct amdgpu_device *adev)
3246 {
3247         struct amdgpu_ring *ring;
3248         int r;
3249
3250         ring = &adev->gfx.kiq[0].ring;
3251
3252         r = amdgpu_bo_reserve(ring->mqd_obj, false);
3253         if (unlikely(r != 0))
3254                 return r;
3255
3256         r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
3257         if (unlikely(r != 0)) {
3258                 amdgpu_bo_unreserve(ring->mqd_obj);
3259                 return r;
3260         }
3261
3262         gfx_v12_0_kiq_init_queue(ring);
3263         amdgpu_bo_kunmap(ring->mqd_obj);
3264         ring->mqd_ptr = NULL;
3265         amdgpu_bo_unreserve(ring->mqd_obj);
3266         ring->sched.ready = true;
3267         return 0;
3268 }
3269
3270 static int gfx_v12_0_kcq_resume(struct amdgpu_device *adev)
3271 {
3272         struct amdgpu_ring *ring = NULL;
3273         int r = 0, i;
3274
3275         if (!amdgpu_async_gfx_ring)
3276                 gfx_v12_0_cp_compute_enable(adev, true);
3277
3278         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3279                 ring = &adev->gfx.compute_ring[i];
3280
3281                 r = amdgpu_bo_reserve(ring->mqd_obj, false);
3282                 if (unlikely(r != 0))
3283                         goto done;
3284                 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
3285                 if (!r) {
3286                         r = gfx_v12_0_kcq_init_queue(ring);
3287                         amdgpu_bo_kunmap(ring->mqd_obj);
3288                         ring->mqd_ptr = NULL;
3289                 }
3290                 amdgpu_bo_unreserve(ring->mqd_obj);
3291                 if (r)
3292                         goto done;
3293         }
3294
3295         r = amdgpu_gfx_enable_kcq(adev, 0);
3296 done:
3297         return r;
3298 }
3299
3300 static int gfx_v12_0_cp_resume(struct amdgpu_device *adev)
3301 {
3302         int r, i;
3303         struct amdgpu_ring *ring;
3304
3305         if (!(adev->flags & AMD_IS_APU))
3306                 gfx_v12_0_enable_gui_idle_interrupt(adev, false);
3307
3308         if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
3309                 /* legacy firmware loading */
3310                 r = gfx_v12_0_cp_gfx_load_microcode(adev);
3311                 if (r)
3312                         return r;
3313
3314                 r = gfx_v12_0_cp_compute_load_microcode_rs64(adev);
3315                 if (r)
3316                         return r;
3317         }
3318
3319         gfx_v12_0_cp_set_doorbell_range(adev);
3320
3321         if (amdgpu_async_gfx_ring) {
3322                 gfx_v12_0_cp_compute_enable(adev, true);
3323                 gfx_v12_0_cp_gfx_enable(adev, true);
3324         }
3325
3326         if (adev->enable_mes_kiq && adev->mes.kiq_hw_init)
3327                 r = amdgpu_mes_kiq_hw_init(adev);
3328         else
3329                 r = gfx_v12_0_kiq_resume(adev);
3330         if (r)
3331                 return r;
3332
3333         r = gfx_v12_0_kcq_resume(adev);
3334         if (r)
3335                 return r;
3336
3337         if (!amdgpu_async_gfx_ring) {
3338                 r = gfx_v12_0_cp_gfx_resume(adev);
3339                 if (r)
3340                         return r;
3341         } else {
3342                 r = gfx_v12_0_cp_async_gfx_ring_resume(adev);
3343                 if (r)
3344                         return r;
3345         }
3346
3347         for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
3348                 ring = &adev->gfx.gfx_ring[i];
3349                 r = amdgpu_ring_test_helper(ring);
3350                 if (r)
3351                         return r;
3352         }
3353
3354         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3355                 ring = &adev->gfx.compute_ring[i];
3356                 r = amdgpu_ring_test_helper(ring);
3357                 if (r)
3358                         return r;
3359         }
3360
3361         return 0;
3362 }
3363
3364 static void gfx_v12_0_cp_enable(struct amdgpu_device *adev, bool enable)
3365 {
3366         gfx_v12_0_cp_gfx_enable(adev, enable);
3367         gfx_v12_0_cp_compute_enable(adev, enable);
3368 }
3369
3370 static int gfx_v12_0_gfxhub_enable(struct amdgpu_device *adev)
3371 {
3372         int r;
3373         bool value;
3374
3375         r = adev->gfxhub.funcs->gart_enable(adev);
3376         if (r)
3377                 return r;
3378
3379         adev->hdp.funcs->flush_hdp(adev, NULL);
3380
3381         value = (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_ALWAYS) ?
3382                 false : true;
3383
3384         adev->gfxhub.funcs->set_fault_enable_default(adev, value);
3385         /* TODO investigate why this and the hdp flush above is needed,
3386          * are we missing a flush somewhere else? */
3387         adev->gmc.gmc_funcs->flush_gpu_tlb(adev, 0, AMDGPU_GFXHUB(0), 0);
3388
3389         return 0;
3390 }
3391
3392 static int get_gb_addr_config(struct amdgpu_device *adev)
3393 {
3394         u32 gb_addr_config;
3395
3396         gb_addr_config = RREG32_SOC15(GC, 0, regGB_ADDR_CONFIG);
3397         if (gb_addr_config == 0)
3398                 return -EINVAL;
3399
3400         adev->gfx.config.gb_addr_config_fields.num_pkrs =
3401                 1 << REG_GET_FIELD(gb_addr_config, GB_ADDR_CONFIG, NUM_PKRS);
3402
3403         adev->gfx.config.gb_addr_config = gb_addr_config;
3404
3405         adev->gfx.config.gb_addr_config_fields.num_pipes = 1 <<
3406                         REG_GET_FIELD(adev->gfx.config.gb_addr_config,
3407                                       GB_ADDR_CONFIG, NUM_PIPES);
3408
3409         adev->gfx.config.max_tile_pipes =
3410                 adev->gfx.config.gb_addr_config_fields.num_pipes;
3411
3412         adev->gfx.config.gb_addr_config_fields.max_compress_frags = 1 <<
3413                         REG_GET_FIELD(adev->gfx.config.gb_addr_config,
3414                                       GB_ADDR_CONFIG, MAX_COMPRESSED_FRAGS);
3415         adev->gfx.config.gb_addr_config_fields.num_rb_per_se = 1 <<
3416                         REG_GET_FIELD(adev->gfx.config.gb_addr_config,
3417                                       GB_ADDR_CONFIG, NUM_RB_PER_SE);
3418         adev->gfx.config.gb_addr_config_fields.num_se = 1 <<
3419                         REG_GET_FIELD(adev->gfx.config.gb_addr_config,
3420                                       GB_ADDR_CONFIG, NUM_SHADER_ENGINES);
3421         adev->gfx.config.gb_addr_config_fields.pipe_interleave_size = 1 << (8 +
3422                         REG_GET_FIELD(adev->gfx.config.gb_addr_config,
3423                                       GB_ADDR_CONFIG, PIPE_INTERLEAVE_SIZE));
3424
3425         return 0;
3426 }
3427
3428 static void gfx_v12_0_disable_gpa_mode(struct amdgpu_device *adev)
3429 {
3430         uint32_t data;
3431
3432         data = RREG32_SOC15(GC, 0, regCPC_PSP_DEBUG);
3433         data |= CPC_PSP_DEBUG__GPA_OVERRIDE_MASK;
3434         WREG32_SOC15(GC, 0, regCPC_PSP_DEBUG, data);
3435
3436         data = RREG32_SOC15(GC, 0, regCPG_PSP_DEBUG);
3437         data |= CPG_PSP_DEBUG__GPA_OVERRIDE_MASK;
3438         WREG32_SOC15(GC, 0, regCPG_PSP_DEBUG, data);
3439 }
3440
3441 static void gfx_v12_0_init_golden_registers(struct amdgpu_device *adev)
3442 {
3443         if (amdgpu_sriov_vf(adev))
3444                 return;
3445
3446         switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
3447         case IP_VERSION(12, 0, 0):
3448         case IP_VERSION(12, 0, 1):
3449                 if (adev->rev_id == 0)
3450                         soc15_program_register_sequence(adev,
3451                                         golden_settings_gc_12_0,
3452                                         (const u32)ARRAY_SIZE(golden_settings_gc_12_0));
3453                 break;
3454         default:
3455                 break;
3456         }
3457 }
3458
3459 static int gfx_v12_0_hw_init(void *handle)
3460 {
3461         int r;
3462         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3463
3464         if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) {
3465                 if (adev->gfx.imu.funcs && (amdgpu_dpm > 0)) {
3466                         /* RLC autoload sequence 1: Program rlc ram */
3467                         if (adev->gfx.imu.funcs->program_rlc_ram)
3468                                 adev->gfx.imu.funcs->program_rlc_ram(adev);
3469                 }
3470                 /* rlc autoload firmware */
3471                 r = gfx_v12_0_rlc_backdoor_autoload_enable(adev);
3472                 if (r)
3473                         return r;
3474         } else {
3475                 if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
3476                         if (adev->gfx.imu.funcs && (amdgpu_dpm > 0)) {
3477                                 if (adev->gfx.imu.funcs->load_microcode)
3478                                         adev->gfx.imu.funcs->load_microcode(adev);
3479                                 if (adev->gfx.imu.funcs->setup_imu)
3480                                         adev->gfx.imu.funcs->setup_imu(adev);
3481                                 if (adev->gfx.imu.funcs->start_imu)
3482                                         adev->gfx.imu.funcs->start_imu(adev);
3483                         }
3484
3485                         /* disable gpa mode in backdoor loading */
3486                         gfx_v12_0_disable_gpa_mode(adev);
3487                 }
3488         }
3489
3490         if ((adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) ||
3491             (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP)) {
3492                 r = gfx_v12_0_wait_for_rlc_autoload_complete(adev);
3493                 if (r) {
3494                         dev_err(adev->dev, "(%d) failed to wait rlc autoload complete\n", r);
3495                         return r;
3496                 }
3497         }
3498
3499         if (!amdgpu_emu_mode)
3500                 gfx_v12_0_init_golden_registers(adev);
3501
3502         adev->gfx.is_poweron = true;
3503
3504         if (get_gb_addr_config(adev))
3505                 DRM_WARN("Invalid gb_addr_config !\n");
3506
3507         if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP)
3508                 gfx_v12_0_config_gfx_rs64(adev);
3509
3510         r = gfx_v12_0_gfxhub_enable(adev);
3511         if (r)
3512                 return r;
3513
3514         if ((adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT ||
3515              adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) &&
3516              (amdgpu_dpm == 1)) {
3517                 /**
3518                  * For gfx 12, rlc firmware loading relies on smu firmware is
3519                  * loaded firstly, so in direct type, it has to load smc ucode
3520                  * here before rlc.
3521                  */
3522                 r = amdgpu_pm_load_smu_firmware(adev, NULL);
3523                 if (r)
3524                         return r;
3525         }
3526
3527         gfx_v12_0_constants_init(adev);
3528
3529         if (adev->nbio.funcs->gc_doorbell_init)
3530                 adev->nbio.funcs->gc_doorbell_init(adev);
3531
3532         r = gfx_v12_0_rlc_resume(adev);
3533         if (r)
3534                 return r;
3535
3536         /*
3537          * init golden registers and rlc resume may override some registers,
3538          * reconfig them here
3539          */
3540         gfx_v12_0_tcp_harvest(adev);
3541
3542         r = gfx_v12_0_cp_resume(adev);
3543         if (r)
3544                 return r;
3545
3546         return r;
3547 }
3548
3549 static int gfx_v12_0_hw_fini(void *handle)
3550 {
3551         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3552         uint32_t tmp;
3553
3554         amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
3555         amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
3556
3557         if (!adev->no_hw_access) {
3558                 if (amdgpu_async_gfx_ring) {
3559                         if (amdgpu_gfx_disable_kgq(adev, 0))
3560                                 DRM_ERROR("KGQ disable failed\n");
3561                 }
3562
3563                 if (amdgpu_gfx_disable_kcq(adev, 0))
3564                         DRM_ERROR("KCQ disable failed\n");
3565
3566                 amdgpu_mes_kiq_hw_fini(adev);
3567         }
3568
3569         if (amdgpu_sriov_vf(adev)) {
3570                 gfx_v12_0_cp_gfx_enable(adev, false);
3571                 /* Program KIQ position of RLC_CP_SCHEDULERS during destroy */
3572                 tmp = RREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS);
3573                 tmp &= 0xffffff00;
3574                 WREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS, tmp);
3575
3576                 return 0;
3577         }
3578         gfx_v12_0_cp_enable(adev, false);
3579         gfx_v12_0_enable_gui_idle_interrupt(adev, false);
3580
3581         adev->gfxhub.funcs->gart_disable(adev);
3582
3583         adev->gfx.is_poweron = false;
3584
3585         return 0;
3586 }
3587
3588 static int gfx_v12_0_suspend(void *handle)
3589 {
3590         return gfx_v12_0_hw_fini(handle);
3591 }
3592
3593 static int gfx_v12_0_resume(void *handle)
3594 {
3595         return gfx_v12_0_hw_init(handle);
3596 }
3597
3598 static bool gfx_v12_0_is_idle(void *handle)
3599 {
3600         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3601
3602         if (REG_GET_FIELD(RREG32_SOC15(GC, 0, regGRBM_STATUS),
3603                                 GRBM_STATUS, GUI_ACTIVE))
3604                 return false;
3605         else
3606                 return true;
3607 }
3608
3609 static int gfx_v12_0_wait_for_idle(void *handle)
3610 {
3611         unsigned i;
3612         u32 tmp;
3613         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3614
3615         for (i = 0; i < adev->usec_timeout; i++) {
3616                 /* read MC_STATUS */
3617                 tmp = RREG32_SOC15(GC, 0, regGRBM_STATUS) &
3618                         GRBM_STATUS__GUI_ACTIVE_MASK;
3619
3620                 if (!REG_GET_FIELD(tmp, GRBM_STATUS, GUI_ACTIVE))
3621                         return 0;
3622                 udelay(1);
3623         }
3624         return -ETIMEDOUT;
3625 }
3626
3627 static uint64_t gfx_v12_0_get_gpu_clock_counter(struct amdgpu_device *adev)
3628 {
3629         uint64_t clock = 0;
3630
3631         if (adev->smuio.funcs &&
3632             adev->smuio.funcs->get_gpu_clock_counter)
3633                 clock = adev->smuio.funcs->get_gpu_clock_counter(adev);
3634         else
3635                 dev_warn(adev->dev, "query gpu clock counter is not supported\n");
3636
3637         return clock;
3638 }
3639
3640 static int gfx_v12_0_early_init(void *handle)
3641 {
3642         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3643
3644         adev->gfx.funcs = &gfx_v12_0_gfx_funcs;
3645
3646         adev->gfx.num_gfx_rings = GFX12_NUM_GFX_RINGS;
3647         adev->gfx.num_compute_rings = min(amdgpu_gfx_get_num_kcq(adev),
3648                                           AMDGPU_MAX_COMPUTE_RINGS);
3649
3650         gfx_v12_0_set_kiq_pm4_funcs(adev);
3651         gfx_v12_0_set_ring_funcs(adev);
3652         gfx_v12_0_set_irq_funcs(adev);
3653         gfx_v12_0_set_rlc_funcs(adev);
3654         gfx_v12_0_set_mqd_funcs(adev);
3655         gfx_v12_0_set_imu_funcs(adev);
3656
3657         gfx_v12_0_init_rlcg_reg_access_ctrl(adev);
3658
3659         return gfx_v12_0_init_microcode(adev);
3660 }
3661
3662 static int gfx_v12_0_late_init(void *handle)
3663 {
3664         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3665         int r;
3666
3667         r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
3668         if (r)
3669                 return r;
3670
3671         r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
3672         if (r)
3673                 return r;
3674
3675         return 0;
3676 }
3677
3678 static bool gfx_v12_0_is_rlc_enabled(struct amdgpu_device *adev)
3679 {
3680         uint32_t rlc_cntl;
3681
3682         /* if RLC is not enabled, do nothing */
3683         rlc_cntl = RREG32_SOC15(GC, 0, regRLC_CNTL);
3684         return (REG_GET_FIELD(rlc_cntl, RLC_CNTL, RLC_ENABLE_F32)) ? true : false;
3685 }
3686
3687 static void gfx_v12_0_set_safe_mode(struct amdgpu_device *adev,
3688                                     int xcc_id)
3689 {
3690         uint32_t data;
3691         unsigned i;
3692
3693         data = RLC_SAFE_MODE__CMD_MASK;
3694         data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
3695
3696         WREG32_SOC15(GC, 0, regRLC_SAFE_MODE, data);
3697
3698         /* wait for RLC_SAFE_MODE */
3699         for (i = 0; i < adev->usec_timeout; i++) {
3700                 if (!REG_GET_FIELD(RREG32_SOC15(GC, 0, regRLC_SAFE_MODE),
3701                                    RLC_SAFE_MODE, CMD))
3702                         break;
3703                 udelay(1);
3704         }
3705 }
3706
3707 static void gfx_v12_0_unset_safe_mode(struct amdgpu_device *adev,
3708                                       int xcc_id)
3709 {
3710         WREG32_SOC15(GC, 0, regRLC_SAFE_MODE, RLC_SAFE_MODE__CMD_MASK);
3711 }
3712
3713 static void gfx_v12_0_update_perf_clk(struct amdgpu_device *adev,
3714                                       bool enable)
3715 {
3716         uint32_t def, data;
3717
3718         if (!(adev->cg_flags & AMD_CG_SUPPORT_GFX_PERF_CLK))
3719                 return;
3720
3721         def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE);
3722
3723         if (enable)
3724                 data &= ~RLC_CGTT_MGCG_OVERRIDE__PERFMON_CLOCK_STATE_MASK;
3725         else
3726                 data |= RLC_CGTT_MGCG_OVERRIDE__PERFMON_CLOCK_STATE_MASK;
3727
3728         if (def != data)
3729                 WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data);
3730 }
3731
3732 static void gfx_v12_0_update_spm_vmid(struct amdgpu_device *adev,
3733                                       struct amdgpu_ring *ring,
3734                                       unsigned vmid)
3735 {
3736         u32 reg, data;
3737
3738         reg = SOC15_REG_OFFSET(GC, 0, regRLC_SPM_MC_CNTL);
3739         if (amdgpu_sriov_is_pp_one_vf(adev))
3740                 data = RREG32_NO_KIQ(reg);
3741         else
3742                 data = RREG32(reg);
3743
3744         data &= ~RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK;
3745         data |= (vmid & RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK) << RLC_SPM_MC_CNTL__RLC_SPM_VMID__SHIFT;
3746
3747         if (amdgpu_sriov_is_pp_one_vf(adev))
3748                 WREG32_SOC15_NO_KIQ(GC, 0, regRLC_SPM_MC_CNTL, data);
3749         else
3750                 WREG32_SOC15(GC, 0, regRLC_SPM_MC_CNTL, data);
3751
3752         if (ring
3753             && amdgpu_sriov_is_pp_one_vf(adev)
3754             && ((ring->funcs->type == AMDGPU_RING_TYPE_GFX)
3755                 || (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE))) {
3756                 uint32_t reg = SOC15_REG_OFFSET(GC, 0, regRLC_SPM_MC_CNTL);
3757                 amdgpu_ring_emit_wreg(ring, reg, data);
3758         }
3759 }
3760
3761 static const struct amdgpu_rlc_funcs gfx_v12_0_rlc_funcs = {
3762         .is_rlc_enabled = gfx_v12_0_is_rlc_enabled,
3763         .set_safe_mode = gfx_v12_0_set_safe_mode,
3764         .unset_safe_mode = gfx_v12_0_unset_safe_mode,
3765         .init = gfx_v12_0_rlc_init,
3766         .get_csb_size = gfx_v12_0_get_csb_size,
3767         .get_csb_buffer = gfx_v12_0_get_csb_buffer,
3768         .resume = gfx_v12_0_rlc_resume,
3769         .stop = gfx_v12_0_rlc_stop,
3770         .reset = gfx_v12_0_rlc_reset,
3771         .start = gfx_v12_0_rlc_start,
3772         .update_spm_vmid = gfx_v12_0_update_spm_vmid,
3773 };
3774
3775 #if 0
3776 static void gfx_v12_cntl_power_gating(struct amdgpu_device *adev, bool enable)
3777 {
3778         /* TODO */
3779 }
3780
3781 static void gfx_v12_cntl_pg(struct amdgpu_device *adev, bool enable)
3782 {
3783         /* TODO */
3784 }
3785 #endif
3786
3787 static int gfx_v12_0_set_powergating_state(void *handle,
3788                                            enum amd_powergating_state state)
3789 {
3790         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3791         bool enable = (state == AMD_PG_STATE_GATE);
3792
3793         if (amdgpu_sriov_vf(adev))
3794                 return 0;
3795
3796         switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
3797         case IP_VERSION(12, 0, 0):
3798         case IP_VERSION(12, 0, 1):
3799                 amdgpu_gfx_off_ctrl(adev, enable);
3800                 break;
3801         default:
3802                 break;
3803         }
3804
3805         return 0;
3806 }
3807
3808 static void gfx_v12_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
3809                                                        bool enable)
3810 {
3811         uint32_t def, data;
3812
3813         if (!(adev->cg_flags &
3814               (AMD_CG_SUPPORT_GFX_CGCG |
3815               AMD_CG_SUPPORT_GFX_CGLS |
3816               AMD_CG_SUPPORT_GFX_3D_CGCG |
3817               AMD_CG_SUPPORT_GFX_3D_CGLS)))
3818                 return;
3819
3820         if (enable) {
3821                 def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE);
3822
3823                 /* unset CGCG override */
3824                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)
3825                         data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGCG_OVERRIDE_MASK;
3826                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
3827                         data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
3828                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG ||
3829                     adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS)
3830                         data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_GFX3D_CG_OVERRIDE_MASK;
3831
3832                 /* update CGCG override bits */
3833                 if (def != data)
3834                         WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data);
3835
3836                 /* enable cgcg FSM(0x0000363F) */
3837                 def = data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL);
3838
3839                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
3840                         data &= ~RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD_MASK;
3841                         data |= (0x36 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
3842                                  RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
3843                 }
3844
3845                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
3846                         data &= ~RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY_MASK;
3847                         data |= (0x000F << RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
3848                                  RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
3849                 }
3850
3851                 if (def != data)
3852                         WREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL, data);
3853
3854                 /* Program RLC_CGCG_CGLS_CTRL_3D */
3855                 def = data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL_3D);
3856
3857                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG) {
3858                         data &= ~RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD_MASK;
3859                         data |= (0x36 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
3860                                  RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK;
3861                 }
3862
3863                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) {
3864                         data &= ~RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY_MASK;
3865                         data |= (0xf << RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
3866                                  RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK;
3867                 }
3868
3869                 if (def != data)
3870                         WREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL_3D, data);
3871
3872                 /* set IDLE_POLL_COUNT(0x00900100) */
3873                 def = data = RREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_CNTL);
3874
3875                 data &= ~(CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY_MASK | CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT_MASK);
3876                 data |= (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
3877                         (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
3878
3879                 if (def != data)
3880                         WREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_CNTL, data);
3881
3882                 data = RREG32_SOC15(GC, 0, regCP_INT_CNTL);
3883                 data = REG_SET_FIELD(data, CP_INT_CNTL, CNTX_BUSY_INT_ENABLE, 1);
3884                 data = REG_SET_FIELD(data, CP_INT_CNTL, CNTX_EMPTY_INT_ENABLE, 1);
3885                 data = REG_SET_FIELD(data, CP_INT_CNTL, CMP_BUSY_INT_ENABLE, 1);
3886                 data = REG_SET_FIELD(data, CP_INT_CNTL, GFX_IDLE_INT_ENABLE, 1);
3887                 WREG32_SOC15(GC, 0, regCP_INT_CNTL, data);
3888
3889                 data = RREG32_SOC15(GC, 0, regSDMA0_RLC_CGCG_CTRL);
3890                 data = REG_SET_FIELD(data, SDMA0_RLC_CGCG_CTRL, CGCG_INT_ENABLE, 1);
3891                 WREG32_SOC15(GC, 0, regSDMA0_RLC_CGCG_CTRL, data);
3892
3893                 /* Some ASICs only have one SDMA instance, not need to configure SDMA1 */
3894                 if (adev->sdma.num_instances > 1) {
3895                         data = RREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL);
3896                         data = REG_SET_FIELD(data, SDMA1_RLC_CGCG_CTRL, CGCG_INT_ENABLE, 1);
3897                         WREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL, data);
3898                 }
3899         } else {
3900                 /* Program RLC_CGCG_CGLS_CTRL */
3901                 def = data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL);
3902
3903                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)
3904                         data &= ~RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
3905
3906                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
3907                         data &= ~RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
3908
3909                 if (def != data)
3910                         WREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL, data);
3911
3912                 /* Program RLC_CGCG_CGLS_CTRL_3D */
3913                 def = data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL_3D);
3914
3915                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG)
3916                         data &= ~RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK;
3917                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS)
3918                         data &= ~RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK;
3919
3920                 if (def != data)
3921                         WREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL_3D, data);
3922
3923                 data = RREG32_SOC15(GC, 0, regSDMA0_RLC_CGCG_CTRL);
3924                 data &= ~SDMA0_RLC_CGCG_CTRL__CGCG_INT_ENABLE_MASK;
3925                 WREG32_SOC15(GC, 0, regSDMA0_RLC_CGCG_CTRL, data);
3926
3927                 /* Some ASICs only have one SDMA instance, not need to configure SDMA1 */
3928                 if (adev->sdma.num_instances > 1) {
3929                         data = RREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL);
3930                         data &= ~SDMA1_RLC_CGCG_CTRL__CGCG_INT_ENABLE_MASK;
3931                         WREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL, data);
3932                 }
3933         }
3934 }
3935
3936 static void gfx_v12_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
3937                                                        bool enable)
3938 {
3939         uint32_t data, def;
3940         if (!(adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)))
3941                 return;
3942
3943         /* It is disabled by HW by default */
3944         if (enable) {
3945                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
3946                         /* 1 - RLC_CGTT_MGCG_OVERRIDE */
3947                         def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE);
3948
3949                         data &= ~(RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
3950                                   RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK |
3951                                   RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK);
3952
3953                         if (def != data)
3954                                 WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data);
3955                 }
3956         } else {
3957                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
3958                         def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE);
3959
3960                         data |= (RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK |
3961                                  RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
3962                                  RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK);
3963
3964                         if (def != data)
3965                                 WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data);
3966                 }
3967         }
3968 }
3969
3970 static void gfx_v12_0_update_repeater_fgcg(struct amdgpu_device *adev,
3971                                            bool enable)
3972 {
3973         uint32_t def, data;
3974
3975         if (!(adev->cg_flags & AMD_CG_SUPPORT_REPEATER_FGCG))
3976                 return;
3977
3978         def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE);
3979
3980         if (enable)
3981                 data &= ~(RLC_CGTT_MGCG_OVERRIDE__GFXIP_REPEATER_FGCG_OVERRIDE_MASK |
3982                                   RLC_CGTT_MGCG_OVERRIDE__RLC_REPEATER_FGCG_OVERRIDE_MASK);
3983         else
3984                 data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_REPEATER_FGCG_OVERRIDE_MASK |
3985                                 RLC_CGTT_MGCG_OVERRIDE__RLC_REPEATER_FGCG_OVERRIDE_MASK;
3986
3987         if (def != data)
3988                 WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data);
3989 }
3990
3991 static void gfx_v12_0_update_sram_fgcg(struct amdgpu_device *adev,
3992                                        bool enable)
3993 {
3994         uint32_t def, data;
3995
3996         if (!(adev->cg_flags & AMD_CG_SUPPORT_GFX_FGCG))
3997                 return;
3998
3999         def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE);
4000
4001         if (enable)
4002                 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_FGCG_OVERRIDE_MASK;
4003         else
4004                 data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_FGCG_OVERRIDE_MASK;
4005
4006         if (def != data)
4007                 WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data);
4008 }
4009
4010 static int gfx_v12_0_update_gfx_clock_gating(struct amdgpu_device *adev,
4011                                             bool enable)
4012 {
4013         amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
4014
4015         gfx_v12_0_update_coarse_grain_clock_gating(adev, enable);
4016
4017         gfx_v12_0_update_medium_grain_clock_gating(adev, enable);
4018
4019         gfx_v12_0_update_repeater_fgcg(adev, enable);
4020
4021         gfx_v12_0_update_sram_fgcg(adev, enable);
4022
4023         gfx_v12_0_update_perf_clk(adev, enable);
4024
4025         if (adev->cg_flags &
4026             (AMD_CG_SUPPORT_GFX_MGCG |
4027              AMD_CG_SUPPORT_GFX_CGLS |
4028              AMD_CG_SUPPORT_GFX_CGCG |
4029              AMD_CG_SUPPORT_GFX_3D_CGCG |
4030              AMD_CG_SUPPORT_GFX_3D_CGLS))
4031                 gfx_v12_0_enable_gui_idle_interrupt(adev, enable);
4032
4033         amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
4034
4035         return 0;
4036 }
4037
4038 static int gfx_v12_0_set_clockgating_state(void *handle,
4039                                            enum amd_clockgating_state state)
4040 {
4041         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4042
4043         if (amdgpu_sriov_vf(adev))
4044                 return 0;
4045
4046         switch (adev->ip_versions[GC_HWIP][0]) {
4047         case IP_VERSION(12, 0, 0):
4048         case IP_VERSION(12, 0, 1):
4049                 gfx_v12_0_update_gfx_clock_gating(adev,
4050                                                   state == AMD_CG_STATE_GATE);
4051                 break;
4052         default:
4053                 break;
4054         }
4055
4056         return 0;
4057 }
4058
4059 static void gfx_v12_0_get_clockgating_state(void *handle, u64 *flags)
4060 {
4061         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4062         int data;
4063
4064         /* AMD_CG_SUPPORT_GFX_MGCG */
4065         data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE);
4066         if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK))
4067                 *flags |= AMD_CG_SUPPORT_GFX_MGCG;
4068
4069         /* AMD_CG_SUPPORT_REPEATER_FGCG */
4070         if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_REPEATER_FGCG_OVERRIDE_MASK))
4071                 *flags |= AMD_CG_SUPPORT_REPEATER_FGCG;
4072
4073         /* AMD_CG_SUPPORT_GFX_FGCG */
4074         if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_FGCG_OVERRIDE_MASK))
4075                 *flags |= AMD_CG_SUPPORT_GFX_FGCG;
4076
4077         /* AMD_CG_SUPPORT_GFX_PERF_CLK */
4078         if (!(data & RLC_CGTT_MGCG_OVERRIDE__PERFMON_CLOCK_STATE_MASK))
4079                 *flags |= AMD_CG_SUPPORT_GFX_PERF_CLK;
4080
4081         /* AMD_CG_SUPPORT_GFX_CGCG */
4082         data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL);
4083         if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
4084                 *flags |= AMD_CG_SUPPORT_GFX_CGCG;
4085
4086         /* AMD_CG_SUPPORT_GFX_CGLS */
4087         if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
4088                 *flags |= AMD_CG_SUPPORT_GFX_CGLS;
4089
4090         /* AMD_CG_SUPPORT_GFX_3D_CGCG */
4091         data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL_3D);
4092         if (data & RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK)
4093                 *flags |= AMD_CG_SUPPORT_GFX_3D_CGCG;
4094
4095         /* AMD_CG_SUPPORT_GFX_3D_CGLS */
4096         if (data & RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK)
4097                 *flags |= AMD_CG_SUPPORT_GFX_3D_CGLS;
4098 }
4099
4100 static u64 gfx_v12_0_ring_get_rptr_gfx(struct amdgpu_ring *ring)
4101 {
4102         /* gfx12 is 32bit rptr*/
4103         return *(uint32_t *)ring->rptr_cpu_addr;
4104 }
4105
4106 static u64 gfx_v12_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
4107 {
4108         struct amdgpu_device *adev = ring->adev;
4109         u64 wptr;
4110
4111         /* XXX check if swapping is necessary on BE */
4112         if (ring->use_doorbell) {
4113                 wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr);
4114         } else {
4115                 wptr = RREG32_SOC15(GC, 0, regCP_RB0_WPTR);
4116                 wptr += (u64)RREG32_SOC15(GC, 0, regCP_RB0_WPTR_HI) << 32;
4117         }
4118
4119         return wptr;
4120 }
4121
4122 static void gfx_v12_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
4123 {
4124         struct amdgpu_device *adev = ring->adev;
4125         uint32_t *wptr_saved;
4126         uint32_t *is_queue_unmap;
4127         uint64_t aggregated_db_index;
4128         uint32_t mqd_size = adev->mqds[AMDGPU_HW_IP_GFX].mqd_size;
4129         uint64_t wptr_tmp;
4130
4131         if (ring->is_mes_queue) {
4132                 wptr_saved = (uint32_t *)(ring->mqd_ptr + mqd_size);
4133                 is_queue_unmap = (uint32_t *)(ring->mqd_ptr + mqd_size +
4134                                               sizeof(uint32_t));
4135                 aggregated_db_index =
4136                         amdgpu_mes_get_aggregated_doorbell_index(adev,
4137                                                                  ring->hw_prio);
4138
4139                 wptr_tmp = ring->wptr & ring->buf_mask;
4140                 atomic64_set((atomic64_t *)ring->wptr_cpu_addr, wptr_tmp);
4141                 *wptr_saved = wptr_tmp;
4142                 /* assume doorbell always being used by mes mapped queue */
4143                 if (*is_queue_unmap) {
4144                         WDOORBELL64(aggregated_db_index, wptr_tmp);
4145                         WDOORBELL64(ring->doorbell_index, wptr_tmp);
4146                 } else {
4147                         WDOORBELL64(ring->doorbell_index, wptr_tmp);
4148
4149                         if (*is_queue_unmap)
4150                                 WDOORBELL64(aggregated_db_index, wptr_tmp);
4151                 }
4152         } else {
4153                 if (ring->use_doorbell) {
4154                         /* XXX check if swapping is necessary on BE */
4155                         atomic64_set((atomic64_t *)ring->wptr_cpu_addr,
4156                                      ring->wptr);
4157                         WDOORBELL64(ring->doorbell_index, ring->wptr);
4158                 } else {
4159                         WREG32_SOC15(GC, 0, regCP_RB0_WPTR,
4160                                      lower_32_bits(ring->wptr));
4161                         WREG32_SOC15(GC, 0, regCP_RB0_WPTR_HI,
4162                                      upper_32_bits(ring->wptr));
4163                 }
4164         }
4165 }
4166
4167 static u64 gfx_v12_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
4168 {
4169         /* gfx12 hardware is 32bit rptr */
4170         return *(uint32_t *)ring->rptr_cpu_addr;
4171 }
4172
4173 static u64 gfx_v12_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
4174 {
4175         u64 wptr;
4176
4177         /* XXX check if swapping is necessary on BE */
4178         if (ring->use_doorbell)
4179                 wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr);
4180         else
4181                 BUG();
4182         return wptr;
4183 }
4184
4185 static void gfx_v12_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
4186 {
4187         struct amdgpu_device *adev = ring->adev;
4188         uint32_t *wptr_saved;
4189         uint32_t *is_queue_unmap;
4190         uint64_t aggregated_db_index;
4191         uint32_t mqd_size = adev->mqds[AMDGPU_HW_IP_COMPUTE].mqd_size;
4192         uint64_t wptr_tmp;
4193
4194         if (ring->is_mes_queue) {
4195                 wptr_saved = (uint32_t *)(ring->mqd_ptr + mqd_size);
4196                 is_queue_unmap = (uint32_t *)(ring->mqd_ptr + mqd_size +
4197                                               sizeof(uint32_t));
4198                 aggregated_db_index =
4199                         amdgpu_mes_get_aggregated_doorbell_index(adev,
4200                                                                  ring->hw_prio);
4201
4202                 wptr_tmp = ring->wptr & ring->buf_mask;
4203                 atomic64_set((atomic64_t *)ring->wptr_cpu_addr, wptr_tmp);
4204                 *wptr_saved = wptr_tmp;
4205                 /* assume doorbell always used by mes mapped queue */
4206                 if (*is_queue_unmap) {
4207                         WDOORBELL64(aggregated_db_index, wptr_tmp);
4208                         WDOORBELL64(ring->doorbell_index, wptr_tmp);
4209                 } else {
4210                         WDOORBELL64(ring->doorbell_index, wptr_tmp);
4211
4212                         if (*is_queue_unmap)
4213                                 WDOORBELL64(aggregated_db_index, wptr_tmp);
4214                 }
4215         } else {
4216                 /* XXX check if swapping is necessary on BE */
4217                 if (ring->use_doorbell) {
4218                         atomic64_set((atomic64_t *)ring->wptr_cpu_addr,
4219                                      ring->wptr);
4220                         WDOORBELL64(ring->doorbell_index, ring->wptr);
4221                 } else {
4222                         BUG(); /* only DOORBELL method supported on gfx12 now */
4223                 }
4224         }
4225 }
4226
4227 static void gfx_v12_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
4228 {
4229         struct amdgpu_device *adev = ring->adev;
4230         u32 ref_and_mask, reg_mem_engine;
4231         const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio.hdp_flush_reg;
4232
4233         if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
4234                 switch (ring->me) {
4235                 case 1:
4236                         ref_and_mask = nbio_hf_reg->ref_and_mask_cp2 << ring->pipe;
4237                         break;
4238                 case 2:
4239                         ref_and_mask = nbio_hf_reg->ref_and_mask_cp6 << ring->pipe;
4240                         break;
4241                 default:
4242                         return;
4243                 }
4244                 reg_mem_engine = 0;
4245         } else {
4246                 ref_and_mask = nbio_hf_reg->ref_and_mask_cp0;
4247                 reg_mem_engine = 1; /* pfp */
4248         }
4249
4250         gfx_v12_0_wait_reg_mem(ring, reg_mem_engine, 0, 1,
4251                                adev->nbio.funcs->get_hdp_flush_req_offset(adev),
4252                                adev->nbio.funcs->get_hdp_flush_done_offset(adev),
4253                                ref_and_mask, ref_and_mask, 0x20);
4254 }
4255
4256 static void gfx_v12_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
4257                                        struct amdgpu_job *job,
4258                                        struct amdgpu_ib *ib,
4259                                        uint32_t flags)
4260 {
4261         unsigned vmid = AMDGPU_JOB_GET_VMID(job);
4262         u32 header, control = 0;
4263
4264         BUG_ON(ib->flags & AMDGPU_IB_FLAG_CE);
4265
4266         header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
4267
4268         control |= ib->length_dw | (vmid << 24);
4269
4270         if (ring->is_mes_queue)
4271                 /* inherit vmid from mqd */
4272                 control |= 0x400000;
4273
4274         amdgpu_ring_write(ring, header);
4275         BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
4276         amdgpu_ring_write(ring,
4277 #ifdef __BIG_ENDIAN
4278                 (2 << 0) |
4279 #endif
4280                 lower_32_bits(ib->gpu_addr));
4281         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
4282         amdgpu_ring_write(ring, control);
4283 }
4284
4285 static void gfx_v12_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
4286                                            struct amdgpu_job *job,
4287                                            struct amdgpu_ib *ib,
4288                                            uint32_t flags)
4289 {
4290         unsigned vmid = AMDGPU_JOB_GET_VMID(job);
4291         u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
4292
4293         if (ring->is_mes_queue)
4294                 /* inherit vmid from mqd */
4295                 control |= 0x40000000;
4296
4297         amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
4298         BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
4299         amdgpu_ring_write(ring,
4300 #ifdef __BIG_ENDIAN
4301                                 (2 << 0) |
4302 #endif
4303                                 lower_32_bits(ib->gpu_addr));
4304         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
4305         amdgpu_ring_write(ring, control);
4306 }
4307
4308 static void gfx_v12_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
4309                                      u64 seq, unsigned flags)
4310 {
4311         bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
4312         bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
4313
4314         /* RELEASE_MEM - flush caches, send int */
4315         amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6));
4316         amdgpu_ring_write(ring, (PACKET3_RELEASE_MEM_GCR_SEQ |
4317                                  PACKET3_RELEASE_MEM_GCR_GL2_WB |
4318                                  PACKET3_RELEASE_MEM_CACHE_POLICY(3) |
4319                                  PACKET3_RELEASE_MEM_EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
4320                                  PACKET3_RELEASE_MEM_EVENT_INDEX(5)));
4321         amdgpu_ring_write(ring, (PACKET3_RELEASE_MEM_DATA_SEL(write64bit ? 2 : 1) |
4322                                  PACKET3_RELEASE_MEM_INT_SEL(int_sel ? 2 : 0)));
4323
4324         /*
4325          * the address should be Qword aligned if 64bit write, Dword
4326          * aligned if only send 32bit data low (discard data high)
4327          */
4328         if (write64bit)
4329                 BUG_ON(addr & 0x7);
4330         else
4331                 BUG_ON(addr & 0x3);
4332         amdgpu_ring_write(ring, lower_32_bits(addr));
4333         amdgpu_ring_write(ring, upper_32_bits(addr));
4334         amdgpu_ring_write(ring, lower_32_bits(seq));
4335         amdgpu_ring_write(ring, upper_32_bits(seq));
4336         amdgpu_ring_write(ring, ring->is_mes_queue ?
4337                          (ring->hw_queue_id | AMDGPU_FENCE_MES_QUEUE_FLAG) : 0);
4338 }
4339
4340 static void gfx_v12_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
4341 {
4342         int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
4343         uint32_t seq = ring->fence_drv.sync_seq;
4344         uint64_t addr = ring->fence_drv.gpu_addr;
4345
4346         gfx_v12_0_wait_reg_mem(ring, usepfp, 1, 0, lower_32_bits(addr),
4347                                upper_32_bits(addr), seq, 0xffffffff, 4);
4348 }
4349
4350 static void gfx_v12_0_ring_invalidate_tlbs(struct amdgpu_ring *ring,
4351                                    uint16_t pasid, uint32_t flush_type,
4352                                    bool all_hub, uint8_t dst_sel)
4353 {
4354         amdgpu_ring_write(ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0));
4355         amdgpu_ring_write(ring,
4356                           PACKET3_INVALIDATE_TLBS_DST_SEL(dst_sel) |
4357                           PACKET3_INVALIDATE_TLBS_ALL_HUB(all_hub) |
4358                           PACKET3_INVALIDATE_TLBS_PASID(pasid) |
4359                           PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type));
4360 }
4361
4362 static void gfx_v12_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
4363                                          unsigned vmid, uint64_t pd_addr)
4364 {
4365         if (ring->is_mes_queue)
4366                 gfx_v12_0_ring_invalidate_tlbs(ring, 0, 0, false, 0);
4367         else
4368                 amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
4369
4370         /* compute doesn't have PFP */
4371         if (ring->funcs->type == AMDGPU_RING_TYPE_GFX) {
4372                 /* sync PFP to ME, otherwise we might get invalid PFP reads */
4373                 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
4374                 amdgpu_ring_write(ring, 0x0);
4375         }
4376 }
4377
4378 static void gfx_v12_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
4379                                           u64 seq, unsigned int flags)
4380 {
4381         struct amdgpu_device *adev = ring->adev;
4382
4383         /* we only allocate 32bit for each seq wb address */
4384         BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
4385
4386         /* write fence seq to the "addr" */
4387         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4388         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4389                                  WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
4390         amdgpu_ring_write(ring, lower_32_bits(addr));
4391         amdgpu_ring_write(ring, upper_32_bits(addr));
4392         amdgpu_ring_write(ring, lower_32_bits(seq));
4393
4394         if (flags & AMDGPU_FENCE_FLAG_INT) {
4395                 /* set register to trigger INT */
4396                 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4397                 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4398                                          WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
4399                 amdgpu_ring_write(ring, SOC15_REG_OFFSET(GC, 0, regCPC_INT_STATUS));
4400                 amdgpu_ring_write(ring, 0);
4401                 amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
4402         }
4403 }
4404
4405 static void gfx_v12_0_ring_emit_cntxcntl(struct amdgpu_ring *ring,
4406                                          uint32_t flags)
4407 {
4408         uint32_t dw2 = 0;
4409
4410         dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
4411         if (flags & AMDGPU_HAVE_CTX_SWITCH) {
4412                 /* set load_global_config & load_global_uconfig */
4413                 dw2 |= 0x8001;
4414                 /* set load_cs_sh_regs */
4415                 dw2 |= 0x01000000;
4416                 /* set load_per_context_state & load_gfx_sh_regs for GFX */
4417                 dw2 |= 0x10002;
4418         }
4419
4420         amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4421         amdgpu_ring_write(ring, dw2);
4422         amdgpu_ring_write(ring, 0);
4423 }
4424
4425 static unsigned gfx_v12_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring,
4426                                                    uint64_t addr)
4427 {
4428         unsigned ret;
4429
4430         amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
4431         amdgpu_ring_write(ring, lower_32_bits(addr));
4432         amdgpu_ring_write(ring, upper_32_bits(addr));
4433         /* discard following DWs if *cond_exec_gpu_addr==0 */
4434         amdgpu_ring_write(ring, 0);
4435         ret = ring->wptr & ring->buf_mask;
4436         /* patch dummy value later */
4437         amdgpu_ring_write(ring, 0);
4438
4439         return ret;
4440 }
4441
4442 static int gfx_v12_0_ring_preempt_ib(struct amdgpu_ring *ring)
4443 {
4444         int i, r = 0;
4445         struct amdgpu_device *adev = ring->adev;
4446         struct amdgpu_kiq *kiq = &adev->gfx.kiq[0];
4447         struct amdgpu_ring *kiq_ring = &kiq->ring;
4448         unsigned long flags;
4449
4450         if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues)
4451                 return -EINVAL;
4452
4453         spin_lock_irqsave(&kiq->ring_lock, flags);
4454
4455         if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size)) {
4456                 spin_unlock_irqrestore(&kiq->ring_lock, flags);
4457                 return -ENOMEM;
4458         }
4459
4460         /* assert preemption condition */
4461         amdgpu_ring_set_preempt_cond_exec(ring, false);
4462
4463         /* assert IB preemption, emit the trailing fence */
4464         kiq->pmf->kiq_unmap_queues(kiq_ring, ring, PREEMPT_QUEUES_NO_UNMAP,
4465                                    ring->trail_fence_gpu_addr,
4466                                    ++ring->trail_seq);
4467         amdgpu_ring_commit(kiq_ring);
4468
4469         spin_unlock_irqrestore(&kiq->ring_lock, flags);
4470
4471         /* poll the trailing fence */
4472         for (i = 0; i < adev->usec_timeout; i++) {
4473                 if (ring->trail_seq ==
4474                     le32_to_cpu(*(ring->trail_fence_cpu_addr)))
4475                         break;
4476                 udelay(1);
4477         }
4478
4479         if (i >= adev->usec_timeout) {
4480                 r = -EINVAL;
4481                 DRM_ERROR("ring %d failed to preempt ib\n", ring->idx);
4482         }
4483
4484         /* deassert preemption condition */
4485         amdgpu_ring_set_preempt_cond_exec(ring, true);
4486         return r;
4487 }
4488
4489 static void gfx_v12_0_ring_emit_frame_cntl(struct amdgpu_ring *ring,
4490                                            bool start,
4491                                            bool secure)
4492 {
4493         uint32_t v = secure ? FRAME_TMZ : 0;
4494
4495         amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0));
4496         amdgpu_ring_write(ring, v | FRAME_CMD(start ? 0 : 1));
4497 }
4498
4499 static void gfx_v12_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg,
4500                                      uint32_t reg_val_offs)
4501 {
4502         struct amdgpu_device *adev = ring->adev;
4503
4504         amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
4505         amdgpu_ring_write(ring, 0 |     /* src: register*/
4506                                 (5 << 8) |      /* dst: memory */
4507                                 (1 << 20));     /* write confirm */
4508         amdgpu_ring_write(ring, reg);
4509         amdgpu_ring_write(ring, 0);
4510         amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
4511                                 reg_val_offs * 4));
4512         amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
4513                                 reg_val_offs * 4));
4514 }
4515
4516 static void gfx_v12_0_ring_emit_wreg(struct amdgpu_ring *ring,
4517                                      uint32_t reg,
4518                                      uint32_t val)
4519 {
4520         uint32_t cmd = 0;
4521
4522         switch (ring->funcs->type) {
4523         case AMDGPU_RING_TYPE_GFX:
4524                 cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM;
4525                 break;
4526         case AMDGPU_RING_TYPE_KIQ:
4527                 cmd = (1 << 16); /* no inc addr */
4528                 break;
4529         default:
4530                 cmd = WR_CONFIRM;
4531                 break;
4532         }
4533         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4534         amdgpu_ring_write(ring, cmd);
4535         amdgpu_ring_write(ring, reg);
4536         amdgpu_ring_write(ring, 0);
4537         amdgpu_ring_write(ring, val);
4538 }
4539
4540 static void gfx_v12_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
4541                                         uint32_t val, uint32_t mask)
4542 {
4543         gfx_v12_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20);
4544 }
4545
4546 static void gfx_v12_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
4547                                                    uint32_t reg0, uint32_t reg1,
4548                                                    uint32_t ref, uint32_t mask)
4549 {
4550         int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
4551
4552         gfx_v12_0_wait_reg_mem(ring, usepfp, 0, 1, reg0, reg1,
4553                                ref, mask, 0x20);
4554 }
4555
4556 static void gfx_v12_0_ring_soft_recovery(struct amdgpu_ring *ring,
4557                                          unsigned vmid)
4558 {
4559         struct amdgpu_device *adev = ring->adev;
4560         uint32_t value = 0;
4561
4562         value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03);
4563         value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01);
4564         value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1);
4565         value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid);
4566         WREG32_SOC15(GC, 0, regSQ_CMD, value);
4567 }
4568
4569 static void
4570 gfx_v12_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
4571                                       uint32_t me, uint32_t pipe,
4572                                       enum amdgpu_interrupt_state state)
4573 {
4574         uint32_t cp_int_cntl, cp_int_cntl_reg;
4575
4576         if (!me) {
4577                 switch (pipe) {
4578                 case 0:
4579                         cp_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_INT_CNTL_RING0);
4580                         break;
4581                 default:
4582                         DRM_DEBUG("invalid pipe %d\n", pipe);
4583                         return;
4584                 }
4585         } else {
4586                 DRM_DEBUG("invalid me %d\n", me);
4587                 return;
4588         }
4589
4590         switch (state) {
4591         case AMDGPU_IRQ_STATE_DISABLE:
4592                 cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg);
4593                 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
4594                                             TIME_STAMP_INT_ENABLE, 0);
4595                 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
4596                                             GENERIC0_INT_ENABLE, 0);
4597                 WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl);
4598                 break;
4599         case AMDGPU_IRQ_STATE_ENABLE:
4600                 cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg);
4601                 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
4602                                             TIME_STAMP_INT_ENABLE, 1);
4603                 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
4604                                             GENERIC0_INT_ENABLE, 1);
4605                 WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl);
4606                 break;
4607         default:
4608                 break;
4609         }
4610 }
4611
4612 static void gfx_v12_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
4613                                                      int me, int pipe,
4614                                                      enum amdgpu_interrupt_state state)
4615 {
4616         u32 mec_int_cntl, mec_int_cntl_reg;
4617
4618         /*
4619          * amdgpu controls only the first MEC. That's why this function only
4620          * handles the setting of interrupts for this specific MEC. All other
4621          * pipes' interrupts are set by amdkfd.
4622          */
4623
4624         if (me == 1) {
4625                 switch (pipe) {
4626                 case 0:
4627                         mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE0_INT_CNTL);
4628                         break;
4629                 case 1:
4630                         mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE1_INT_CNTL);
4631                         break;
4632                 default:
4633                         DRM_DEBUG("invalid pipe %d\n", pipe);
4634                         return;
4635                 }
4636         } else {
4637                 DRM_DEBUG("invalid me %d\n", me);
4638                 return;
4639         }
4640
4641         switch (state) {
4642         case AMDGPU_IRQ_STATE_DISABLE:
4643                 mec_int_cntl = RREG32_SOC15_IP(GC, mec_int_cntl_reg);
4644                 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
4645                                              TIME_STAMP_INT_ENABLE, 0);
4646                 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
4647                                              GENERIC0_INT_ENABLE, 0);
4648                 WREG32_SOC15_IP(GC, mec_int_cntl_reg, mec_int_cntl);
4649                 break;
4650         case AMDGPU_IRQ_STATE_ENABLE:
4651                 mec_int_cntl = RREG32_SOC15_IP(GC, mec_int_cntl_reg);
4652                 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
4653                                              TIME_STAMP_INT_ENABLE, 1);
4654                 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
4655                                              GENERIC0_INT_ENABLE, 1);
4656                 WREG32_SOC15_IP(GC, mec_int_cntl_reg, mec_int_cntl);
4657                 break;
4658         default:
4659                 break;
4660         }
4661 }
4662
4663 static int gfx_v12_0_set_eop_interrupt_state(struct amdgpu_device *adev,
4664                                             struct amdgpu_irq_src *src,
4665                                             unsigned type,
4666                                             enum amdgpu_interrupt_state state)
4667 {
4668         switch (type) {
4669         case AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP:
4670                 gfx_v12_0_set_gfx_eop_interrupt_state(adev, 0, 0, state);
4671                 break;
4672         case AMDGPU_CP_IRQ_GFX_ME0_PIPE1_EOP:
4673                 gfx_v12_0_set_gfx_eop_interrupt_state(adev, 0, 1, state);
4674                 break;
4675         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
4676                 gfx_v12_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
4677                 break;
4678         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
4679                 gfx_v12_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
4680                 break;
4681         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
4682                 gfx_v12_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
4683                 break;
4684         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
4685                 gfx_v12_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
4686                 break;
4687         default:
4688                 break;
4689         }
4690         return 0;
4691 }
4692
4693 static int gfx_v12_0_eop_irq(struct amdgpu_device *adev,
4694                              struct amdgpu_irq_src *source,
4695                              struct amdgpu_iv_entry *entry)
4696 {
4697         int i;
4698         u8 me_id, pipe_id, queue_id;
4699         struct amdgpu_ring *ring;
4700         uint32_t mes_queue_id = entry->src_data[0];
4701
4702         DRM_DEBUG("IH: CP EOP\n");
4703
4704         if (adev->enable_mes && (mes_queue_id & AMDGPU_FENCE_MES_QUEUE_FLAG)) {
4705                 struct amdgpu_mes_queue *queue;
4706
4707                 mes_queue_id &= AMDGPU_FENCE_MES_QUEUE_ID_MASK;
4708
4709                 spin_lock(&adev->mes.queue_id_lock);
4710                 queue = idr_find(&adev->mes.queue_id_idr, mes_queue_id);
4711                 if (queue) {
4712                         DRM_DEBUG("process mes queue id = %d\n", mes_queue_id);
4713                         amdgpu_fence_process(queue->ring);
4714                 }
4715                 spin_unlock(&adev->mes.queue_id_lock);
4716         } else {
4717                 me_id = (entry->ring_id & 0x0c) >> 2;
4718                 pipe_id = (entry->ring_id & 0x03) >> 0;
4719                 queue_id = (entry->ring_id & 0x70) >> 4;
4720
4721                 switch (me_id) {
4722                 case 0:
4723                         if (pipe_id == 0)
4724                                 amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
4725                         else
4726                                 amdgpu_fence_process(&adev->gfx.gfx_ring[1]);
4727                         break;
4728                 case 1:
4729                 case 2:
4730                         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4731                                 ring = &adev->gfx.compute_ring[i];
4732                                 /* Per-queue interrupt is supported for MEC starting from VI.
4733                                  * The interrupt can only be enabled/disabled per pipe instead
4734                                  * of per queue.
4735                                  */
4736                                 if ((ring->me == me_id) &&
4737                                     (ring->pipe == pipe_id) &&
4738                                     (ring->queue == queue_id))
4739                                         amdgpu_fence_process(ring);
4740                         }
4741                         break;
4742                 }
4743         }
4744
4745         return 0;
4746 }
4747
4748 static int gfx_v12_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
4749                                               struct amdgpu_irq_src *source,
4750                                               unsigned type,
4751                                               enum amdgpu_interrupt_state state)
4752 {
4753         switch (state) {
4754         case AMDGPU_IRQ_STATE_DISABLE:
4755         case AMDGPU_IRQ_STATE_ENABLE:
4756                 WREG32_FIELD15_PREREG(GC, 0, CP_INT_CNTL_RING0,
4757                                       PRIV_REG_INT_ENABLE,
4758                                       state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
4759                 break;
4760         default:
4761                 break;
4762         }
4763
4764         return 0;
4765 }
4766
4767 static int gfx_v12_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
4768                                                struct amdgpu_irq_src *source,
4769                                                unsigned type,
4770                                                enum amdgpu_interrupt_state state)
4771 {
4772         switch (state) {
4773         case AMDGPU_IRQ_STATE_DISABLE:
4774         case AMDGPU_IRQ_STATE_ENABLE:
4775                 WREG32_FIELD15_PREREG(GC, 0, CP_INT_CNTL_RING0,
4776                                PRIV_INSTR_INT_ENABLE,
4777                                state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
4778                 break;
4779         default:
4780                 break;
4781         }
4782
4783         return 0;
4784 }
4785
4786 static void gfx_v12_0_handle_priv_fault(struct amdgpu_device *adev,
4787                                         struct amdgpu_iv_entry *entry)
4788 {
4789         u8 me_id, pipe_id, queue_id;
4790         struct amdgpu_ring *ring;
4791         int i;
4792
4793         me_id = (entry->ring_id & 0x0c) >> 2;
4794         pipe_id = (entry->ring_id & 0x03) >> 0;
4795         queue_id = (entry->ring_id & 0x70) >> 4;
4796
4797         switch (me_id) {
4798         case 0:
4799                 for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
4800                         ring = &adev->gfx.gfx_ring[i];
4801                         /* we only enabled 1 gfx queue per pipe for now */
4802                         if (ring->me == me_id && ring->pipe == pipe_id)
4803                                 drm_sched_fault(&ring->sched);
4804                 }
4805                 break;
4806         case 1:
4807         case 2:
4808                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4809                         ring = &adev->gfx.compute_ring[i];
4810                         if (ring->me == me_id && ring->pipe == pipe_id &&
4811                             ring->queue == queue_id)
4812                                 drm_sched_fault(&ring->sched);
4813                 }
4814                 break;
4815         default:
4816                 BUG();
4817                 break;
4818         }
4819 }
4820
4821 static int gfx_v12_0_priv_reg_irq(struct amdgpu_device *adev,
4822                                   struct amdgpu_irq_src *source,
4823                                   struct amdgpu_iv_entry *entry)
4824 {
4825         DRM_ERROR("Illegal register access in command stream\n");
4826         gfx_v12_0_handle_priv_fault(adev, entry);
4827         return 0;
4828 }
4829
4830 static int gfx_v12_0_priv_inst_irq(struct amdgpu_device *adev,
4831                                    struct amdgpu_irq_src *source,
4832                                    struct amdgpu_iv_entry *entry)
4833 {
4834         DRM_ERROR("Illegal instruction in command stream\n");
4835         gfx_v12_0_handle_priv_fault(adev, entry);
4836         return 0;
4837 }
4838
4839 static void gfx_v12_0_emit_mem_sync(struct amdgpu_ring *ring)
4840 {
4841         const unsigned int gcr_cntl =
4842                         PACKET3_ACQUIRE_MEM_GCR_CNTL_GL2_INV(1) |
4843                         PACKET3_ACQUIRE_MEM_GCR_CNTL_GL2_WB(1) |
4844                         PACKET3_ACQUIRE_MEM_GCR_CNTL_GLM_INV(1) |
4845                         PACKET3_ACQUIRE_MEM_GCR_CNTL_GLM_WB(1) |
4846                         PACKET3_ACQUIRE_MEM_GCR_CNTL_GL1_INV(1) |
4847                         PACKET3_ACQUIRE_MEM_GCR_CNTL_GLV_INV(1) |
4848                         PACKET3_ACQUIRE_MEM_GCR_CNTL_GLK_INV(1) |
4849                         PACKET3_ACQUIRE_MEM_GCR_CNTL_GLI_INV(1);
4850
4851         /* ACQUIRE_MEM - make one or more surfaces valid for use by the subsequent operations */
4852         amdgpu_ring_write(ring, PACKET3(PACKET3_ACQUIRE_MEM, 6));
4853         amdgpu_ring_write(ring, 0); /* CP_COHER_CNTL */
4854         amdgpu_ring_write(ring, 0xffffffff);  /* CP_COHER_SIZE */
4855         amdgpu_ring_write(ring, 0xffffff);  /* CP_COHER_SIZE_HI */
4856         amdgpu_ring_write(ring, 0); /* CP_COHER_BASE */
4857         amdgpu_ring_write(ring, 0);  /* CP_COHER_BASE_HI */
4858         amdgpu_ring_write(ring, 0x0000000A); /* POLL_INTERVAL */
4859         amdgpu_ring_write(ring, gcr_cntl); /* GCR_CNTL */
4860 }
4861
4862 static void gfx_v12_ip_print(void *handle, struct drm_printer *p)
4863 {
4864         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4865         uint32_t i, j, k, reg, index = 0;
4866         uint32_t reg_count = ARRAY_SIZE(gc_reg_list_12_0);
4867
4868         if (!adev->gfx.ip_dump_core)
4869                 return;
4870
4871         for (i = 0; i < reg_count; i++)
4872                 drm_printf(p, "%-50s \t 0x%08x\n",
4873                            gc_reg_list_12_0[i].reg_name,
4874                            adev->gfx.ip_dump_core[i]);
4875
4876         /* print compute queue registers for all instances */
4877         if (!adev->gfx.ip_dump_compute_queues)
4878                 return;
4879
4880         reg_count = ARRAY_SIZE(gc_cp_reg_list_12);
4881         drm_printf(p, "\nnum_mec: %d num_pipe: %d num_queue: %d\n",
4882                    adev->gfx.mec.num_mec,
4883                    adev->gfx.mec.num_pipe_per_mec,
4884                    adev->gfx.mec.num_queue_per_pipe);
4885
4886         for (i = 0; i < adev->gfx.mec.num_mec; i++) {
4887                 for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) {
4888                         for (k = 0; k < adev->gfx.mec.num_queue_per_pipe; k++) {
4889                                 drm_printf(p, "\nmec %d, pipe %d, queue %d\n", i, j, k);
4890                                 for (reg = 0; reg < reg_count; reg++) {
4891                                         drm_printf(p, "%-50s \t 0x%08x\n",
4892                                                    gc_cp_reg_list_12[reg].reg_name,
4893                                                    adev->gfx.ip_dump_compute_queues[index + reg]);
4894                                 }
4895                                 index += reg_count;
4896                         }
4897                 }
4898         }
4899
4900         /* print gfx queue registers for all instances */
4901         if (!adev->gfx.ip_dump_gfx_queues)
4902                 return;
4903
4904         index = 0;
4905         reg_count = ARRAY_SIZE(gc_gfx_queue_reg_list_12);
4906         drm_printf(p, "\nnum_me: %d num_pipe: %d num_queue: %d\n",
4907                    adev->gfx.me.num_me,
4908                    adev->gfx.me.num_pipe_per_me,
4909                    adev->gfx.me.num_queue_per_pipe);
4910
4911         for (i = 0; i < adev->gfx.me.num_me; i++) {
4912                 for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) {
4913                         for (k = 0; k < adev->gfx.me.num_queue_per_pipe; k++) {
4914                                 drm_printf(p, "\nme %d, pipe %d, queue %d\n", i, j, k);
4915                                 for (reg = 0; reg < reg_count; reg++) {
4916                                         drm_printf(p, "%-50s \t 0x%08x\n",
4917                                                    gc_gfx_queue_reg_list_12[reg].reg_name,
4918                                                    adev->gfx.ip_dump_gfx_queues[index + reg]);
4919                                 }
4920                                 index += reg_count;
4921                         }
4922                 }
4923         }
4924 }
4925
4926 static void gfx_v12_ip_dump(void *handle)
4927 {
4928         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4929         uint32_t i, j, k, reg, index = 0;
4930         uint32_t reg_count = ARRAY_SIZE(gc_reg_list_12_0);
4931
4932         if (!adev->gfx.ip_dump_core)
4933                 return;
4934
4935         amdgpu_gfx_off_ctrl(adev, false);
4936         for (i = 0; i < reg_count; i++)
4937                 adev->gfx.ip_dump_core[i] = RREG32(SOC15_REG_ENTRY_OFFSET(gc_reg_list_12_0[i]));
4938         amdgpu_gfx_off_ctrl(adev, true);
4939
4940         /* dump compute queue registers for all instances */
4941         if (!adev->gfx.ip_dump_compute_queues)
4942                 return;
4943
4944         reg_count = ARRAY_SIZE(gc_cp_reg_list_12);
4945         amdgpu_gfx_off_ctrl(adev, false);
4946         mutex_lock(&adev->srbm_mutex);
4947         for (i = 0; i < adev->gfx.mec.num_mec; i++) {
4948                 for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) {
4949                         for (k = 0; k < adev->gfx.mec.num_queue_per_pipe; k++) {
4950                                 /* ME0 is for GFX so start from 1 for CP */
4951                                 soc24_grbm_select(adev, adev->gfx.me.num_me + i, j, k, 0);
4952                                 for (reg = 0; reg < reg_count; reg++) {
4953                                         adev->gfx.ip_dump_compute_queues[index + reg] =
4954                                                 RREG32(SOC15_REG_ENTRY_OFFSET(
4955                                                         gc_cp_reg_list_12[reg]));
4956                                 }
4957                                 index += reg_count;
4958                         }
4959                 }
4960         }
4961         soc24_grbm_select(adev, 0, 0, 0, 0);
4962         mutex_unlock(&adev->srbm_mutex);
4963         amdgpu_gfx_off_ctrl(adev, true);
4964
4965         /* dump gfx queue registers for all instances */
4966         if (!adev->gfx.ip_dump_gfx_queues)
4967                 return;
4968
4969         index = 0;
4970         reg_count = ARRAY_SIZE(gc_gfx_queue_reg_list_12);
4971         amdgpu_gfx_off_ctrl(adev, false);
4972         mutex_lock(&adev->srbm_mutex);
4973         for (i = 0; i < adev->gfx.me.num_me; i++) {
4974                 for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) {
4975                         for (k = 0; k < adev->gfx.me.num_queue_per_pipe; k++) {
4976                                 soc24_grbm_select(adev, i, j, k, 0);
4977
4978                                 for (reg = 0; reg < reg_count; reg++) {
4979                                         adev->gfx.ip_dump_gfx_queues[index + reg] =
4980                                                 RREG32(SOC15_REG_ENTRY_OFFSET(
4981                                                         gc_gfx_queue_reg_list_12[reg]));
4982                                 }
4983                                 index += reg_count;
4984                         }
4985                 }
4986         }
4987         soc24_grbm_select(adev, 0, 0, 0, 0);
4988         mutex_unlock(&adev->srbm_mutex);
4989         amdgpu_gfx_off_ctrl(adev, true);
4990 }
4991
4992 static const struct amd_ip_funcs gfx_v12_0_ip_funcs = {
4993         .name = "gfx_v12_0",
4994         .early_init = gfx_v12_0_early_init,
4995         .late_init = gfx_v12_0_late_init,
4996         .sw_init = gfx_v12_0_sw_init,
4997         .sw_fini = gfx_v12_0_sw_fini,
4998         .hw_init = gfx_v12_0_hw_init,
4999         .hw_fini = gfx_v12_0_hw_fini,
5000         .suspend = gfx_v12_0_suspend,
5001         .resume = gfx_v12_0_resume,
5002         .is_idle = gfx_v12_0_is_idle,
5003         .wait_for_idle = gfx_v12_0_wait_for_idle,
5004         .set_clockgating_state = gfx_v12_0_set_clockgating_state,
5005         .set_powergating_state = gfx_v12_0_set_powergating_state,
5006         .get_clockgating_state = gfx_v12_0_get_clockgating_state,
5007         .dump_ip_state = gfx_v12_ip_dump,
5008         .print_ip_state = gfx_v12_ip_print,
5009 };
5010
5011 static const struct amdgpu_ring_funcs gfx_v12_0_ring_funcs_gfx = {
5012         .type = AMDGPU_RING_TYPE_GFX,
5013         .align_mask = 0xff,
5014         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
5015         .support_64bit_ptrs = true,
5016         .secure_submission_supported = true,
5017         .get_rptr = gfx_v12_0_ring_get_rptr_gfx,
5018         .get_wptr = gfx_v12_0_ring_get_wptr_gfx,
5019         .set_wptr = gfx_v12_0_ring_set_wptr_gfx,
5020         .emit_frame_size = /* totally 242 maximum if 16 IBs */
5021                 5 + /* COND_EXEC */
5022                 7 + /* PIPELINE_SYNC */
5023                 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
5024                 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
5025                 2 + /* VM_FLUSH */
5026                 8 + /* FENCE for VM_FLUSH */
5027                 5 + /* COND_EXEC */
5028                 7 + /* HDP_flush */
5029                 4 + /* VGT_flush */
5030                 31 + /* DE_META */
5031                 3 + /* CNTX_CTRL */
5032                 5 + /* HDP_INVL */
5033                 8 + 8 + /* FENCE x2 */
5034                 8, /* gfx_v12_0_emit_mem_sync */
5035         .emit_ib_size = 4, /* gfx_v12_0_ring_emit_ib_gfx */
5036         .emit_ib = gfx_v12_0_ring_emit_ib_gfx,
5037         .emit_fence = gfx_v12_0_ring_emit_fence,
5038         .emit_pipeline_sync = gfx_v12_0_ring_emit_pipeline_sync,
5039         .emit_vm_flush = gfx_v12_0_ring_emit_vm_flush,
5040         .emit_hdp_flush = gfx_v12_0_ring_emit_hdp_flush,
5041         .test_ring = gfx_v12_0_ring_test_ring,
5042         .test_ib = gfx_v12_0_ring_test_ib,
5043         .insert_nop = amdgpu_ring_insert_nop,
5044         .pad_ib = amdgpu_ring_generic_pad_ib,
5045         .emit_cntxcntl = gfx_v12_0_ring_emit_cntxcntl,
5046         .init_cond_exec = gfx_v12_0_ring_emit_init_cond_exec,
5047         .preempt_ib = gfx_v12_0_ring_preempt_ib,
5048         .emit_frame_cntl = gfx_v12_0_ring_emit_frame_cntl,
5049         .emit_wreg = gfx_v12_0_ring_emit_wreg,
5050         .emit_reg_wait = gfx_v12_0_ring_emit_reg_wait,
5051         .emit_reg_write_reg_wait = gfx_v12_0_ring_emit_reg_write_reg_wait,
5052         .soft_recovery = gfx_v12_0_ring_soft_recovery,
5053         .emit_mem_sync = gfx_v12_0_emit_mem_sync,
5054 };
5055
5056 static const struct amdgpu_ring_funcs gfx_v12_0_ring_funcs_compute = {
5057         .type = AMDGPU_RING_TYPE_COMPUTE,
5058         .align_mask = 0xff,
5059         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
5060         .support_64bit_ptrs = true,
5061         .get_rptr = gfx_v12_0_ring_get_rptr_compute,
5062         .get_wptr = gfx_v12_0_ring_get_wptr_compute,
5063         .set_wptr = gfx_v12_0_ring_set_wptr_compute,
5064         .emit_frame_size =
5065                 7 + /* gfx_v12_0_ring_emit_hdp_flush */
5066                 5 + /* hdp invalidate */
5067                 7 + /* gfx_v12_0_ring_emit_pipeline_sync */
5068                 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
5069                 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
5070                 2 + /* gfx_v12_0_ring_emit_vm_flush */
5071                 8 + 8 + 8 + /* gfx_v12_0_ring_emit_fence x3 for user fence, vm fence */
5072                 8, /* gfx_v12_0_emit_mem_sync */
5073         .emit_ib_size = 7, /* gfx_v12_0_ring_emit_ib_compute */
5074         .emit_ib = gfx_v12_0_ring_emit_ib_compute,
5075         .emit_fence = gfx_v12_0_ring_emit_fence,
5076         .emit_pipeline_sync = gfx_v12_0_ring_emit_pipeline_sync,
5077         .emit_vm_flush = gfx_v12_0_ring_emit_vm_flush,
5078         .emit_hdp_flush = gfx_v12_0_ring_emit_hdp_flush,
5079         .test_ring = gfx_v12_0_ring_test_ring,
5080         .test_ib = gfx_v12_0_ring_test_ib,
5081         .insert_nop = amdgpu_ring_insert_nop,
5082         .pad_ib = amdgpu_ring_generic_pad_ib,
5083         .emit_wreg = gfx_v12_0_ring_emit_wreg,
5084         .emit_reg_wait = gfx_v12_0_ring_emit_reg_wait,
5085         .emit_reg_write_reg_wait = gfx_v12_0_ring_emit_reg_write_reg_wait,
5086         .emit_mem_sync = gfx_v12_0_emit_mem_sync,
5087 };
5088
5089 static const struct amdgpu_ring_funcs gfx_v12_0_ring_funcs_kiq = {
5090         .type = AMDGPU_RING_TYPE_KIQ,
5091         .align_mask = 0xff,
5092         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
5093         .support_64bit_ptrs = true,
5094         .get_rptr = gfx_v12_0_ring_get_rptr_compute,
5095         .get_wptr = gfx_v12_0_ring_get_wptr_compute,
5096         .set_wptr = gfx_v12_0_ring_set_wptr_compute,
5097         .emit_frame_size =
5098                 7 + /* gfx_v12_0_ring_emit_hdp_flush */
5099                 5 + /*hdp invalidate */
5100                 7 + /* gfx_v12_0_ring_emit_pipeline_sync */
5101                 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
5102                 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
5103                 2 + /* gfx_v12_0_ring_emit_vm_flush */
5104                 8 + 8 + 8, /* gfx_v12_0_ring_emit_fence_kiq x3 for user fence, vm fence */
5105         .emit_ib_size = 7, /* gfx_v12_0_ring_emit_ib_compute */
5106         .emit_ib = gfx_v12_0_ring_emit_ib_compute,
5107         .emit_fence = gfx_v12_0_ring_emit_fence_kiq,
5108         .test_ring = gfx_v12_0_ring_test_ring,
5109         .test_ib = gfx_v12_0_ring_test_ib,
5110         .insert_nop = amdgpu_ring_insert_nop,
5111         .pad_ib = amdgpu_ring_generic_pad_ib,
5112         .emit_rreg = gfx_v12_0_ring_emit_rreg,
5113         .emit_wreg = gfx_v12_0_ring_emit_wreg,
5114         .emit_reg_wait = gfx_v12_0_ring_emit_reg_wait,
5115         .emit_reg_write_reg_wait = gfx_v12_0_ring_emit_reg_write_reg_wait,
5116 };
5117
5118 static void gfx_v12_0_set_ring_funcs(struct amdgpu_device *adev)
5119 {
5120         int i;
5121
5122         adev->gfx.kiq[0].ring.funcs = &gfx_v12_0_ring_funcs_kiq;
5123
5124         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
5125                 adev->gfx.gfx_ring[i].funcs = &gfx_v12_0_ring_funcs_gfx;
5126
5127         for (i = 0; i < adev->gfx.num_compute_rings; i++)
5128                 adev->gfx.compute_ring[i].funcs = &gfx_v12_0_ring_funcs_compute;
5129 }
5130
5131 static const struct amdgpu_irq_src_funcs gfx_v12_0_eop_irq_funcs = {
5132         .set = gfx_v12_0_set_eop_interrupt_state,
5133         .process = gfx_v12_0_eop_irq,
5134 };
5135
5136 static const struct amdgpu_irq_src_funcs gfx_v12_0_priv_reg_irq_funcs = {
5137         .set = gfx_v12_0_set_priv_reg_fault_state,
5138         .process = gfx_v12_0_priv_reg_irq,
5139 };
5140
5141 static const struct amdgpu_irq_src_funcs gfx_v12_0_priv_inst_irq_funcs = {
5142         .set = gfx_v12_0_set_priv_inst_fault_state,
5143         .process = gfx_v12_0_priv_inst_irq,
5144 };
5145
5146 static void gfx_v12_0_set_irq_funcs(struct amdgpu_device *adev)
5147 {
5148         adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
5149         adev->gfx.eop_irq.funcs = &gfx_v12_0_eop_irq_funcs;
5150
5151         adev->gfx.priv_reg_irq.num_types = 1;
5152         adev->gfx.priv_reg_irq.funcs = &gfx_v12_0_priv_reg_irq_funcs;
5153
5154         adev->gfx.priv_inst_irq.num_types = 1;
5155         adev->gfx.priv_inst_irq.funcs = &gfx_v12_0_priv_inst_irq_funcs;
5156 }
5157
5158 static void gfx_v12_0_set_imu_funcs(struct amdgpu_device *adev)
5159 {
5160         if (adev->flags & AMD_IS_APU)
5161                 adev->gfx.imu.mode = MISSION_MODE;
5162         else
5163                 adev->gfx.imu.mode = DEBUG_MODE;
5164
5165         adev->gfx.imu.funcs = &gfx_v12_0_imu_funcs;
5166 }
5167
5168 static void gfx_v12_0_set_rlc_funcs(struct amdgpu_device *adev)
5169 {
5170         adev->gfx.rlc.funcs = &gfx_v12_0_rlc_funcs;
5171 }
5172
5173 static void gfx_v12_0_set_mqd_funcs(struct amdgpu_device *adev)
5174 {
5175         /* set gfx eng mqd */
5176         adev->mqds[AMDGPU_HW_IP_GFX].mqd_size =
5177                 sizeof(struct v12_gfx_mqd);
5178         adev->mqds[AMDGPU_HW_IP_GFX].init_mqd =
5179                 gfx_v12_0_gfx_mqd_init;
5180         /* set compute eng mqd */
5181         adev->mqds[AMDGPU_HW_IP_COMPUTE].mqd_size =
5182                 sizeof(struct v12_compute_mqd);
5183         adev->mqds[AMDGPU_HW_IP_COMPUTE].init_mqd =
5184                 gfx_v12_0_compute_mqd_init;
5185 }
5186
5187 static void gfx_v12_0_set_user_wgp_inactive_bitmap_per_sh(struct amdgpu_device *adev,
5188                                                           u32 bitmap)
5189 {
5190         u32 data;
5191
5192         if (!bitmap)
5193                 return;
5194
5195         data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_WGPS__SHIFT;
5196         data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_WGPS_MASK;
5197
5198         WREG32_SOC15(GC, 0, regGC_USER_SHADER_ARRAY_CONFIG, data);
5199 }
5200
5201 static u32 gfx_v12_0_get_wgp_active_bitmap_per_sh(struct amdgpu_device *adev)
5202 {
5203         u32 data, wgp_bitmask;
5204         data = RREG32_SOC15(GC, 0, regCC_GC_SHADER_ARRAY_CONFIG);
5205         data |= RREG32_SOC15(GC, 0, regGC_USER_SHADER_ARRAY_CONFIG);
5206
5207         data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_WGPS_MASK;
5208         data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_WGPS__SHIFT;
5209
5210         wgp_bitmask =
5211                 amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh >> 1);
5212
5213         return (~data) & wgp_bitmask;
5214 }
5215
5216 static u32 gfx_v12_0_get_cu_active_bitmap_per_sh(struct amdgpu_device *adev)
5217 {
5218         u32 wgp_idx, wgp_active_bitmap;
5219         u32 cu_bitmap_per_wgp, cu_active_bitmap;
5220
5221         wgp_active_bitmap = gfx_v12_0_get_wgp_active_bitmap_per_sh(adev);
5222         cu_active_bitmap = 0;
5223
5224         for (wgp_idx = 0; wgp_idx < 16; wgp_idx++) {
5225                 /* if there is one WGP enabled, it means 2 CUs will be enabled */
5226                 cu_bitmap_per_wgp = 3 << (2 * wgp_idx);
5227                 if (wgp_active_bitmap & (1 << wgp_idx))
5228                         cu_active_bitmap |= cu_bitmap_per_wgp;
5229         }
5230
5231         return cu_active_bitmap;
5232 }
5233
5234 static int gfx_v12_0_get_cu_info(struct amdgpu_device *adev,
5235                                  struct amdgpu_cu_info *cu_info)
5236 {
5237         int i, j, k, counter, active_cu_number = 0;
5238         u32 mask, bitmap;
5239         unsigned disable_masks[8 * 2];
5240
5241         if (!adev || !cu_info)
5242                 return -EINVAL;
5243
5244         amdgpu_gfx_parse_disable_cu(disable_masks, 8, 2);
5245
5246         mutex_lock(&adev->grbm_idx_mutex);
5247         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
5248                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
5249                         bitmap = i * adev->gfx.config.max_sh_per_se + j;
5250                         if (!((gfx_v12_0_get_sa_active_bitmap(adev) >> bitmap) & 1))
5251                                 continue;
5252                         mask = 1;
5253                         counter = 0;
5254                         gfx_v12_0_select_se_sh(adev, i, j, 0xffffffff, 0);
5255                         if (i < 8 && j < 2)
5256                                 gfx_v12_0_set_user_wgp_inactive_bitmap_per_sh(
5257                                         adev, disable_masks[i * 2 + j]);
5258                         bitmap = gfx_v12_0_get_cu_active_bitmap_per_sh(adev);
5259
5260                         /**
5261                          * GFX12 could support more than 4 SEs, while the bitmap
5262                          * in cu_info struct is 4x4 and ioctl interface struct
5263                          * drm_amdgpu_info_device should keep stable.
5264                          * So we use last two columns of bitmap to store cu mask for
5265                          * SEs 4 to 7, the layout of the bitmap is as below:
5266                          *    SE0: {SH0,SH1} --> {bitmap[0][0], bitmap[0][1]}
5267                          *    SE1: {SH0,SH1} --> {bitmap[1][0], bitmap[1][1]}
5268                          *    SE2: {SH0,SH1} --> {bitmap[2][0], bitmap[2][1]}
5269                          *    SE3: {SH0,SH1} --> {bitmap[3][0], bitmap[3][1]}
5270                          *    SE4: {SH0,SH1} --> {bitmap[0][2], bitmap[0][3]}
5271                          *    SE5: {SH0,SH1} --> {bitmap[1][2], bitmap[1][3]}
5272                          *    SE6: {SH0,SH1} --> {bitmap[2][2], bitmap[2][3]}
5273                          *    SE7: {SH0,SH1} --> {bitmap[3][2], bitmap[3][3]}
5274                          */
5275                         cu_info->bitmap[0][i % 4][j + (i / 4) * 2] = bitmap;
5276
5277                         for (k = 0; k < adev->gfx.config.max_cu_per_sh; k++) {
5278                                 if (bitmap & mask)
5279                                         counter++;
5280
5281                                 mask <<= 1;
5282                         }
5283                         active_cu_number += counter;
5284                 }
5285         }
5286         gfx_v12_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
5287         mutex_unlock(&adev->grbm_idx_mutex);
5288
5289         cu_info->number = active_cu_number;
5290         cu_info->simd_per_cu = NUM_SIMD_PER_CU;
5291
5292         return 0;
5293 }
5294
5295 const struct amdgpu_ip_block_version gfx_v12_0_ip_block = {
5296         .type = AMD_IP_BLOCK_TYPE_GFX,
5297         .major = 12,
5298         .minor = 0,
5299         .rev = 0,
5300         .funcs = &gfx_v12_0_ip_funcs,
5301 };
This page took 0.353416 seconds and 4 git commands to generate.